# Quantification of single-cell protein abundance

Calculation of single-cell protein abundance by aggregation of cell-wise intensity and correlation between replicates.  

In [1]:
## logging functions
import logging
## data functions
import numpy as np
## system functions
import sys
## system functions from roux
from roux.lib.io import read_table
## stats functions from roux
from roux.stat.io import perc_label
## visualization functions
import matplotlib.pyplot as plt
## data functions from roux
import roux.lib.dfs as rd # attributes
sys.path.append('../')

In [3]:
## parameters
input_path=None
output_path=None

## data curation
rename={
        'abundance':'protein abundance',
        }
## validations (set True by default)
validate_cells_per_construct=True

In [4]:
## parameters inferred
output_dir_path=dirname(output_path)
logging.info(output_path)

cols_value=list(rename.values())
col_value=cols_value[0] # preferred

## Non-aggregated

In [5]:
df01=read_table(input_path)
df01.head(1)

In [6]:
## rename
df1=(df01
    .rename(columns=rename,errors='raise')
    )
df1=df1.drop(df1.filter(regex='^abundance.*').columns, axis=1)
df1.head(1)

In [7]:
to_table(df1,f'{output_dir_path}/01_renamed.tsv')

## Aggregated by replicates

In [8]:
## aggregate cells
df2=(df1
    .groupby(['gene symbol query','status partner','pairs','label','replicate',])
    # .agg(**{c+' mean':(c,np.mean) for c in cols_value})
    .agg({c:[np.mean,np.std] for c in cols_value})
    .rd.flatten_columns()
    .reset_index()
    )
if validate_cells_per_construct:
    assert not df2[f'{col_value} std'].isnull().any(), "-> "+perc_label(df2[f'{col_value} std'].isnull())+" of the constructs have 1 cell."
df2.head(1)

In [9]:
to_table(df2,f'{output_dir_path}/02_aggby_replicates.tsv')

## Aggregated by genes

In [10]:
## aggregate replicates
df3=(df2
    .groupby(['gene symbol query','status partner','pairs','label'])
    .agg({c+' mean':[np.mean,np.std] for c in cols_value})
    .rd.flatten_columns()
    .rd.renameby_replace({'mean mean':'mean','mean std':'std'})
    .reset_index()
    .assign(
    **{
        f'{col_value} std/mean':lambda df : df[f'{col_value} std']/df[f'{col_value} mean'],
    })     
)
df3.head(1)

In [11]:
assert all(df3['status partner'].unique() == ['DELTA','WT'])
assert all(df3.groupby('pairs')['status partner'].nunique()==2)
assert all(df3.groupby('pairs')['label'].nunique()==4)

In [12]:
to_table(df3,output_path)

### Plots

#### Between-replicate correlations

In [13]:
## data for the plot
data=(df2
    .pivot(index=['gene symbol query','status partner','pairs','label',],
       columns=['replicate'],
       values=[f"{col_value} mean"],
          )
    .droplevel(0,axis=1)
)
assert (data==0).sum().sum()==0, 'plog needed.'
data=data.applymap(np.log10)
data.head(1)

In [24]:
## plot
fig,axs=plt.subplots(1,3,figsize=[7,4])
axs=axs.flatten()
from modules.tools.plot import plot_scatters
plot_scatters(
    data=data,
    axs=axs,
    cols=['replicate1','replicate2','replicate3'],
    )
to_plot(to_table(df2,f'{output_dir_path}/02_aggby_replicates_plots/scatters'),fmts=['pdf','png'])