In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display, Markdown

import sys
sys.path.insert(0, '../py_scripts')

import numpy as np
import scipy as sp
import pandas as pd
import numpy.random as rand
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats


import time
import pickle


import noise_models as noise
import model_fitting as fit
import fig_plot as fplot
import thermo_models as thermo



sns.set(context='talk', font_scale=1.0, color_codes=True, palette='deep', style='ticks', 
        rc={'mathtext.fontset': 'cm', 'xtick.direction': 'in','ytick.direction': 'in',
            'axes.linewidth': 1.5, 'figure.dpi':100, 'text.usetex':False})

# Instructions

This notebook runs the fitting routines for the thermodynamic push and push-pull models. To make this run, you will need to setup the input files as follows:

1. Make datasets.csv file.
This file lists the different datasets, models, and fit parameters. The columns of this file should be:
dataset: The name of each file containing a different construct.

    model: The type of model you wish to use to fit each model. This determines which fit parameters will be necessary to fit each data set. The options are: 
    
        substrate_only: This model assumes that only the substrate exists and will only fit the background pplation rate.
    
        non-pplatable: This moel assumes the substrate cannot be pplated, and so has no fit parameters. However, we can still predict the distribution of observed pplated substrate based on the empty cell noise controls, so it is still useful to include these datasets.
    
        push: Model assumes the existence of substrate and kinase and will fit the background pplation rate, kinase zipper binding affinity, and kinase pplation rate.
    
        pushpull: Model fits the full pushpull construct with background pplation rate, kinase zipper binding affinity, kinase pplation rate, pptase zipper binding affinity, and pptase depplation rate.

    kinase_variant: Name of the kinase variant pplation rate parameter.

    kinase_zipper: Name of the kinase zipper binding affinity parameter.

    pptase_variant: Name of the pptase variant  depplation rate parameter. 

    pptase_zipper: Name of the pptase zipper binding affinity parameter. 

    substrate_col: Name of the column containing the substrate in the data file.

    phospho_col: Name of the column containing the pplated substrate in the data file.

    writer_col: Name of the column containing the kinase in the data file.

    eraser_col:  Name of the column containing the pptase in the data file.

    Note: All parameters are shared across and within datasets/models if they have the same name. This is true even if they correspond to different thermodynamic parameters (e.g., if the pptase and kinase zipper have the same name, then they are the same parameter). To ensure unique parameters, make sure they have unique names. The background pplation rate is also shared by all models by default.

2. Make noise_key.csv file
This file specifies the noise control datasets. The columns should be:

    component: The molecule species (e.g., writer, eraser, substrate, phospho).

    anti_label: The label of the column containig the antibody values for the active cells.

    GFP_label: The label of the column containig the GFP values for the active cells.

    empty_anti_label: The label of the column containig the antibody values for the empty cells.

    empty_GFP_label: The label of the column containig the GFP values for the empty cells.

3. Rename noise dataset files as follows:

    noise_writer.csv

    noise_eraser.csv

    noise_substrate_phospho_combined.csv

    noise_empty_cell.csv

# Load data

In [None]:
seed = 42

print("Seed:", seed)

rand.seed(seed)


# name of dataset folder
# label = "21_10_15_highgating"
# label = "21_10_15_medhighgating"
label = "22_01_20"


df_dataset_key = pd.read_csv("../data/"+label+"/dataset_key.csv", sep=', ', engine='python').set_index("exp_name")
display(df_dataset_key)

df_anti2GFP_key = pd.read_csv("../data/"+label+"/anti_to_GFP_key.csv", sep=', ', engine='python').set_index("component")
display(df_anti2GFP_key)


df_anti_noGFP2GFP_key = pd.read_csv("../data/"+label+"/anti_noGFP_to_GFP_key.csv", sep=', ', engine='python').set_index("component")
display(df_anti_noGFP2GFP_key)

# load datasets

df_list = []
for exp_name, row in df_dataset_key.iterrows():
    
    df = pd.read_csv("../data/{}/{}.csv".format(label, row['file_name']))     
    df = df.drop("Unnamed: 0", axis=1, errors='ignore').sample(frac=1.0, replace=True, random_state=seed).reset_index(drop=True)
#     display(df)
    
    if row['model'] == 'push' or row['model'] == "substrate_only" or row['model'] == "non-pplatable":
        df = df[[row['substrate_col'],row['phospho_col'],row['writer_col']]].rename(columns={row['substrate_col']:'ST_anti_exp', 
                                                                                     row['phospho_col']:'SpT_anti_exp', 
                                                                                     row['writer_col']:'WT_anti_exp'})
        df['ET_anti_exp'] = 1e-8
    elif row['model'] == 'pushpull':
        df = df[[row['substrate_col'],row['phospho_col'],row['writer_col'],row['eraser_col']]].rename(columns={row['substrate_col']:'ST_anti_exp', 
                                                                                                       row['phospho_col']:'SpT_anti_exp', 
                                                                                                       row['writer_col']:'WT_anti_exp',
                                                                                                       row['eraser_col']:'ET_anti_exp'})

    
    df['exp_name'] = exp_name
    df.index.rename('cell_index', inplace=True)
    df_list.append(df)
    
# dataframe containing all datasets   
df_data = pd.concat(df_list) #.drop("Unnamed: 0", axis=1, errors='ignore')
df_data = df_data.reset_index().set_index(['cell_index', 'exp_name'])
df_data = df_data.reorder_levels(df_data.index.names[::-1])

print(len(df_data.index))
df_data.dropna(inplace=True)
print(len(df_data.index))
df_data = df_data[(df_data[df_data.columns] > 0.0).all(axis=1)]
print(len(df_data.index))





# record fraction of phospho substrate
df_data['Sp_frac_anti_exp'] = df_data['SpT_anti_exp'] / df_data['ST_anti_exp']


display(df_data)

# Plot raw data for each experiment

In [None]:
for exp_name, row in df_dataset_key.iterrows():
    
    if row['model'] == 'push' or row['model'] == "substrate_only" or row['model'] == "non-pplatable":
        fplot.plot_push_dataset_summary(df_data, exp_name)
    elif row['model'] == 'pushpull':
        fplot.plot_pushpull_dataset_summary(df_data, exp_name)
                

    

# Noise models converting antibody measurements to GFP measurements

In [None]:

# antibody to GFP unit conversion

# points per bin
ppbin = 10

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_kinase', 'file_name']))

empty_kinase_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['empty_kinase', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['empty_kinase', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['kinase', 'file_name']))

kinase_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['kinase', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['kinase', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
empty_kinase_anti2GFP.plot(ax, color='g')
kinase_anti2GFP.plot(ax)

ax.set_title("Kinase")

ax.vlines(10**np.mean(np.log10(empty_kinase_anti2GFP.get_in_data())), ymin=1e1, ymax=1e5, color='k', ls='--')

ax.hlines(10**np.mean(np.log10(empty_kinase_anti2GFP.get_out_data())), xmin=1e1, xmax=1e5, color='k', ls='--')

ax.set_ylabel('out:GFP')
ax.set_xlabel('in:antibody')

ax.plot(np.linspace(1e0, 1e6), np.linspace(1e0, 1e6), 'r--')


plt.show()


################################


df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_pptase', 'file_name']))

empty_pptase_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['empty_pptase', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['empty_pptase', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['pptase', 'file_name']))

pptase_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['pptase', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['pptase', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
empty_pptase_anti2GFP.plot(ax, color='g')
pptase_anti2GFP.plot(ax)

ax.set_title("Pptase")

ax.vlines(10**np.mean(np.log10(empty_pptase_anti2GFP.get_in_data())), ymin=1e1, ymax=1e5, color='k', ls='--')

ax.hlines(10**np.mean(np.log10(empty_pptase_anti2GFP.get_out_data())), xmin=1e1, xmax=1e5, color='k', ls='--')

ax.set_ylabel('out:GFP')
ax.set_xlabel('in:antibody')

ax.plot(np.linspace(1e0, 1e6), np.linspace(1e0, 1e6), 'r--')


plt.show()

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_substrate', 'file_name']))

empty_substrate_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['empty_substrate', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['empty_substrate', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['substrate', 'file_name']))

substrate_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['substrate', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['substrate', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
empty_substrate_anti2GFP.plot(ax, color='g')
substrate_anti2GFP.plot(ax)

ax.set_title("Substrate")

ax.vlines(10**np.mean(np.log10(empty_substrate_anti2GFP.get_in_data())), ymin=1e1, ymax=1e5, color='k', ls='--')

ax.hlines(10**np.mean(np.log10(empty_substrate_anti2GFP.get_out_data())), xmin=1e1, xmax=1e5, color='k', ls='--')

ax.set_ylabel('out:GFP')
ax.set_xlabel('in:antibody')

ax.plot(np.linspace(1e0, 1e6), np.linspace(1e0, 1e6), 'r--')

plt.show()

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_phospho', 'file_name']))

empty_phospho_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['empty_phospho', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['empty_phospho', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['phospho', 'file_name']))

phospho_anti2GFP = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['phospho', 'anti_col_name']].values, 
                                            df[df_anti2GFP_key.loc['phospho', 'GFP_col_name']].values, ppbin=ppbin, verbose=False)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
empty_phospho_anti2GFP.plot(ax, color='g')
phospho_anti2GFP.plot(ax)

ax.set_title("Phospho")

ax.vlines(10**np.mean(np.log10(empty_phospho_anti2GFP.get_in_data())), ymin=1e1, ymax=1e5, color='k', ls='--')

ax.hlines(10**np.mean(np.log10(empty_phospho_anti2GFP.get_out_data())), xmin=1e1, xmax=1e5, color='k', ls='--')

ax.set_ylabel('out:GFP')
ax.set_xlabel('in:antibody')

ax.plot(np.linspace(1e0, 1e6), np.linspace(1e0, 1e6), 'r--')


plt.show()


################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_phospho', 'file_name']))

empty_phospho_GFP2anti = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['empty_phospho', 'GFP_col_name']].values, 
                                            df[df_anti2GFP_key.loc['empty_phospho', 'anti_col_name']].values, ppbin=ppbin, verbose=False)


df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['phospho', 'file_name']))

phospho_GFP2anti = noise.RandomConditionalNoise(df[df_anti2GFP_key.loc['phospho', 'GFP_col_name']].values, 
                                            df[df_anti2GFP_key.loc['phospho', 'anti_col_name']].values, ppbin=ppbin, verbose=False)

# phospho_GFP2anti.add_cells(empty_phospho_GFP2anti)



fig, ax = plt.subplots(1, 1, figsize=(6, 4))
empty_phospho_GFP2anti.plot(ax, color='g')
phospho_GFP2anti.plot(ax)

ax.set_title("Inverse Phospho")

ax.set_ylabel('out:antibody')
ax.set_xlabel('in:GFP')

ax.plot(np.linspace(1e0, 1e6), np.linspace(1e0, 1e6), 'r--')


ax.vlines(10**np.mean(np.log10(empty_phospho_anti2GFP.get_out_data())), ymin=1e1, ymax=1e5, color='k', ls='--')

ax.hlines(10**np.mean(np.log10(empty_phospho_anti2GFP.get_in_data())), xmin=1e1, xmax=1e5, color='k', ls='--')


plt.show()


# Noise models converting antibody measurements without GFP to antibody measurements with GFP

In [None]:
# number of percentile bins to consider
nbins = 1000

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti_noGFP2GFP_key.loc['kinase', 'file_name']))

kinase_anti_noGFP2GFP = noise.PercentileNoise(df[df_anti_noGFP2GFP_key.loc['kinase', 'anti_col_name']].values, 
                                          kinase_anti2GFP.get_in_data(), nbins=nbins, verbose=True)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
kinase_anti_noGFP2GFP.plot(ax)

ax.set_title("Kinase")

ax.plot(np.linspace(1e0, 1e5), np.linspace(1e0, 1e5), 'r--')

ax.set_ylabel("out: antibody w GFP")
ax.set_xlabel("in: antibody w/o GFP")

ax.vlines([np.quantile(kinase_anti_noGFP2GFP.get_in_data(), 0.01)], ymin=1e0, ymax=1e4, color='k', ls='dashed')


ax.set_xscale('log')
ax.set_yscale('log')

plt.show()

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti_noGFP2GFP_key.loc['pptase', 'file_name']))

pptase_anti_noGFP2GFP = noise.PercentileNoise(df[df_anti_noGFP2GFP_key.loc['pptase', 'anti_col_name']].values,
                                          pptase_anti2GFP.get_in_data(), nbins=nbins, verbose=True)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
pptase_anti_noGFP2GFP.plot(ax)

ax.set_title("Pptase")

ax.plot(np.linspace(1e0, 1e5), np.linspace(1e0, 1e5), 'r--')

ax.set_ylabel("out: antibody w GFP")
ax.set_xlabel("in: antibody w/o GFP")

ax.vlines([np.quantile(pptase_anti_noGFP2GFP.get_in_data(), 0.01)], ymin=1e0, ymax=1e4, color='k', ls='dashed')


ax.set_xscale('log')
ax.set_yscale('log')

plt.show()

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti_noGFP2GFP_key.loc['substrate', 'file_name']))

substrate_anti_noGFP2GFP = noise.PercentileNoise(df[df_anti_noGFP2GFP_key.loc['substrate', 'anti_col_name']].values,
                                             substrate_anti2GFP.get_in_data(), nbins=nbins, verbose=True)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
substrate_anti_noGFP2GFP.plot(ax)

ax.set_title("Substrate")

ax.plot(np.linspace(1e0, 1e5), np.linspace(1e0, 1e5), 'r--')

ax.set_ylabel("out: antibody w GFP")
ax.set_xlabel("in: antibody w/o GFP")

ax.vlines([np.quantile(substrate_anti_noGFP2GFP.get_in_data(), 0.01)], ymin=1e0, ymax=1e4, color='k', ls='dashed')


ax.set_xscale('log')
ax.set_yscale('log')

plt.show()

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti_noGFP2GFP_key.loc['phospho', 'file_name']))

phospho_anti_noGFP2GFP = noise.PercentileNoise(df[df_anti_noGFP2GFP_key.loc['phospho', 'anti_col_name']].values, 
                                           phospho_anti2GFP.get_in_data(), nbins=nbins, verbose=True)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
phospho_anti_noGFP2GFP.plot(ax)

ax.set_title("Phospho")

ax.plot(np.linspace(1e0, 1e5), np.linspace(1e0, 1e5), 'r--')

ax.set_ylabel("out: antibody w GFP")
ax.set_xlabel("in: antibody w/o GFP")

ax.vlines([np.quantile(phospho_anti_noGFP2GFP.get_in_data(), 0.01)], ymin=1e0, ymax=1e4, color='k', ls='dashed')


ax.set_xscale('log')
ax.set_yscale('log')

plt.show()

################################

df = pd.read_csv("../data/{}/{}.csv".format(label, df_anti_noGFP2GFP_key.loc['phospho', 'file_name']))

phospho_anti_GFP2noGFP = noise.PercentileNoise(phospho_anti2GFP.get_in_data(), 
                                            df[df_anti_noGFP2GFP_key.loc['phospho', 'anti_col_name']].values, nbins=nbins, verbose=True)


fig, ax = plt.subplots(1, 1, figsize=(6, 4))
phospho_anti_GFP2noGFP.plot(ax)

ax.set_title("Phospho")

ax.plot(np.linspace(1e0, 1e5), np.linspace(1e0, 1e5), 'r--')

ax.set_ylabel("out: antibody w/o GFP")
ax.set_xlabel("in: antibody w GFP")


ax.set_xscale('log')
ax.set_yscale('log')

plt.show()




# Choose cutoffs for empty cells

In [None]:
GFP_shift_quantile = 0.5
empty_cell_quantile = 0.01

# 50th percentile used of empty cells considered zero concentration threshold
# 5th percentile of expressive cells used for empty cell threshold


kinase_anti_cutoff = np.quantile(kinase_anti_noGFP2GFP.get_in_data(), empty_cell_quantile)
kinase_antiwGFP_cutoff = np.quantile(kinase_anti_noGFP2GFP.get_out_data(), empty_cell_quantile)
kinase_GFP_cutoff = np.quantile(kinase_anti2GFP.get_out_data(), empty_cell_quantile)
kinase_GFP_shift = np.quantile(empty_kinase_anti2GFP.get_out_data(), GFP_shift_quantile)

pptase_anti_cutoff = np.quantile(pptase_anti_noGFP2GFP.get_in_data(), empty_cell_quantile)
pptase_antiwGFP_cutoff = np.quantile(pptase_anti_noGFP2GFP.get_out_data(), empty_cell_quantile)
pptase_GFP_cutoff= np.quantile(pptase_anti2GFP.get_out_data(), empty_cell_quantile)
pptase_GFP_shift= np.quantile(empty_pptase_anti2GFP.get_out_data(), GFP_shift_quantile)

substrate_anti_cutoff = np.quantile(substrate_anti_noGFP2GFP.get_in_data(), empty_cell_quantile)
substrate_antiwGFP_cutoff = np.quantile(substrate_anti_noGFP2GFP.get_out_data(), empty_cell_quantile)
substrate_GFP_cutoff = np.quantile(substrate_anti2GFP.get_out_data(), empty_cell_quantile)
substrate_GFP_shift = np.quantile(empty_substrate_anti2GFP.get_out_data(), GFP_shift_quantile)


phospho_anti_cutoff = np.quantile(phospho_anti_noGFP2GFP.get_in_data(), empty_cell_quantile)
phospho_antiwGFP_cutoff = np.quantile(phospho_anti_noGFP2GFP.get_out_data(), empty_cell_quantile)
phospho_GFP_cutoff = np.quantile(phospho_anti2GFP.get_out_data(), empty_cell_quantile)
phospho_GFP_shift = np.quantile(empty_phospho_anti2GFP.get_out_data(), GFP_shift_quantile)


binrange = (0, 6)

fig, axes = plt.subplots(1, 2, figsize=(12, 4), squeeze=False)
    
fig.suptitle("Kinase", y=1.05)

ax = axes[0, 0]

sns.histplot(empty_kinase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(kinase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')
sns.histplot(kinase_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/o GFP', element='step', fill=False, color='b', stat='density')

ax.set_xlabel("Antibody")


ax.vlines([kinase_anti_cutoff], ymin=0, ymax=1.5, color='b', ls='dashed')
ax.vlines([kinase_antiwGFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')


ax.legend(loc='lower left', fontsize='xx-small', bbox_to_anchor=(0.0, 1.0), ncol=3)


ax = axes[0, 1]


sns.histplot(empty_kinase_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(kinase_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')

ax.set_xlabel("GFP")

ax.vlines([kinase_GFP_shift], ymin=0, ymax=1.5, color='g', ls='dashed')
ax.vlines([kinase_GFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')


plt.show()

#####################################


fig, axes = plt.subplots(1, 2, figsize=(12, 4), squeeze=False)
    
fig.suptitle("Pptase", y=1.05)

ax = axes[0, 0]

sns.histplot(empty_pptase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(pptase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')
sns.histplot(pptase_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/o GFP', element='step', fill=False, color='b', stat='density')

ax.set_xlabel("Antibody")


ax.vlines([pptase_anti_cutoff], ymin=0, ymax=1.5, color='b', ls='dashed')
ax.vlines([pptase_antiwGFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')

ax.legend(loc='lower left', fontsize='xx-small', bbox_to_anchor=(0.0, 1.0), ncol=3)


ax = axes[0, 1]


sns.histplot(empty_pptase_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(pptase_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')

ax.set_xlabel("GFP")


ax.vlines([pptase_GFP_shift], ymin=0, ymax=1.5, color='g', ls='dashed')
ax.vlines([pptase_GFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')


plt.show()

#####################################

fig, axes = plt.subplots(1, 2, figsize=(12, 4), squeeze=False)
    
fig.suptitle("Substrate", y=1.05)

ax = axes[0, 0]

sns.histplot(empty_substrate_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(substrate_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')
sns.histplot(substrate_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/o GFP', element='step', fill=False, color='b', stat='density')

ax.set_xlabel("Antibody")


ax.vlines([substrate_anti_cutoff], ymin=0, ymax=1.5, color='b', ls='dashed')
ax.vlines([substrate_antiwGFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')


ax.legend(loc='lower left', fontsize='xx-small', bbox_to_anchor=(0.0, 1.0), ncol=3)


ax = axes[0, 1]


sns.histplot(empty_substrate_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(substrate_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')

ax.set_xlabel("GFP")
ax.vlines([substrate_GFP_shift], ymin=0, ymax=1.5, color='g', ls='dashed')
ax.vlines([substrate_GFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')


plt.show()

#####################################

fig, axes = plt.subplots(1, 2, figsize=(12, 4), squeeze=False)
    
fig.suptitle("Phospho", y=1.05)

ax = axes[0, 0]

sns.histplot(empty_phospho_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(phospho_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')
sns.histplot(phospho_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/o GFP', element='step', fill=False, color='b', stat='density')

ax.set_xlabel("Antibody")
ax.vlines([phospho_anti_cutoff], ymin=0, ymax=1.5, color='b', ls='dashed')
ax.vlines([phospho_antiwGFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')

ax.legend(loc='lower left', fontsize='xx-small', bbox_to_anchor=(0.0, 1.0), ncol=3)


ax = axes[0, 1]



sns.histplot(empty_phospho_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='empty control', element='step', fill=False, color='g', stat='density')
sns.histplot(phospho_anti2GFP.get_out_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
             label='activated control w/ GFP', element='step', fill=False, color='r', stat='density')

ax.set_xlabel("GFP")
ax.vlines([phospho_GFP_shift], ymin=0, ymax=1.5, color='g', ls='dashed')
ax.vlines([phospho_GFP_cutoff], ymin=0, ymax=1.5, color='r', ls='dashed')


plt.show()




# Convert antibody measurements without GFP to antibody measurements with GFP

In [None]:
# cells below antibody cutoff are marked as empty cells and are preserved
# cells above cutoff are shifted to antibody units w/ GFP

df_data['WT_empty'] = False
df_data['ST_empty'] = False
df_data['SpT_empty'] = False
df_data['ET_empty'] = False

df_data['WT_anti_wGFP_infer'] = 0.0
df_data['ST_anti_wGFP_infer'] = 0.0
df_data['SpT_anti_wGFP_infer'] = 0.0
df_data['ET_anti_wGFP_infer'] = 0.0

for exp_name, row in df_dataset_key.iterrows():
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
       
    idx = df_tmp.query("WT_anti_exp < @kinase_anti_cutoff").index
    df_data.loc[idx, 'WT_empty'] = True
    df_data.loc[idx, 'WT_anti_wGFP_infer'] = df_data.loc[idx, 'WT_anti_exp']
    
    idx = df_tmp.query("WT_anti_exp >= @kinase_anti_cutoff").index
    df_data.loc[idx, 'WT_anti_wGFP_infer'] = kinase_anti_noGFP2GFP.transform(df_data.loc[idx, 'WT_anti_exp'])
    
        
    idx = df_tmp.query("ST_anti_exp < @substrate_anti_cutoff").index
    df_data.loc[idx, 'ST_empty'] = True
    df_data.loc[idx, 'ST_anti_wGFP_infer'] = df_data.loc[idx, 'ST_anti_exp']
    
    
    idx = df_tmp.query("ST_anti_exp >= @substrate_anti_cutoff").index
    df_data.loc[idx, 'ST_anti_wGFP_infer'] = substrate_anti_noGFP2GFP.transform(df_data.loc[idx, 'ST_anti_exp'])
    
    
    idx = df_tmp.query("SpT_anti_exp < @phospho_anti_cutoff").index
    df_data.loc[idx, 'SpT_empty'] = True
    df_data.loc[idx, 'SpT_anti_wGFP_infer'] = df_data.loc[idx, 'SpT_anti_exp']
    
    
    idx = df_tmp.query("SpT_anti_exp >= @phospho_anti_cutoff").index
    df_data.loc[idx, 'SpT_anti_wGFP_infer'] = phospho_anti_noGFP2GFP.transform(df_data.loc[idx, 'SpT_anti_exp'])

    idx = df_tmp.query("ET_anti_exp < @pptase_anti_cutoff").index
    df_data.loc[idx, 'ET_empty'] = True
    df_data.loc[idx, 'ET_anti_wGFP_infer'] = df_data.loc[idx, 'ET_anti_exp']

    idx = df_tmp.query("ET_anti_exp >= @pptase_anti_cutoff").index
    df_data.loc[idx, 'ET_anti_wGFP_infer'] = pptase_anti_noGFP2GFP.transform(df_data.loc[idx, 'ET_anti_exp'])

    
display(df_data)
print(len(df_data))
print(len(df_data.dropna()))
    
    


# Plot distributions of each component in antibody units and compare to controls

In [None]:

for exp_name, row in df_dataset_key.iterrows():
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
    
    binrange = (0, 6)
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 10), squeeze=False)
      
    fig.suptitle(exp_name)

        
    ax = axes[0, 0]
                
    sns.histplot(df_tmp['ST_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label="experiment (raw data)", element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['ST_anti_wGFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label="experiment (inferred antibodies w/ GFP)", element='step', fill=False, color='r', stat='density')
    sns.histplot(substrate_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control w/ GFP', color='r', stat='density')
    sns.histplot(substrate_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control w/o GFP', color='b', stat='density')
    sns.histplot(empty_substrate_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Substrate Antibody")
    
    ax.vlines([substrate_anti_cutoff], ymin=0, ymax=1.5, color='k', ls='dashed')

    ax.legend(loc='lower left', fontsize='xx-small', bbox_to_anchor=(0.0, 1.0), ncol=1)

    
    ax = axes[0, 1]
                
    sns.histplot(df_tmp['SpT_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label="experiment (raw data)", element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['SpT_anti_wGFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label="experiment (inferred antibodies w/ GFP)", element='step', fill=False, color='r', stat='density')
    sns.histplot(phospho_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control w/ GFP', color='r', stat='density')
    sns.histplot(phospho_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control w/o GFP', color='b', stat='density')
    sns.histplot(empty_phospho_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.vlines([phospho_anti_cutoff], ymin=0, ymax=1.5, color='k', ls='dashed')


    
    ax = axes[1, 0]
        
    sns.histplot(df_tmp['WT_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label="experiment (raw data)", element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['WT_anti_wGFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label="experiment (inferred antibodies w/ GFP)", element='step', fill=False, color='r', stat='density')
    sns.histplot(kinase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control w/ GFP', color='r', stat='density')
    sns.histplot(kinase_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control w/o GFP', color='b', stat='density')
    sns.histplot(empty_kinase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Kinase Antibody")
    
    ax.vlines([kinase_anti_cutoff], ymin=0, ymax=1.5, color='k', ls='dashed')

    

    
    if row['model'] == 'pushpull':
    
        ax = axes[1, 1]

        sns.histplot(df_tmp['ET_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label="experiment (raw data)", element='step', fill=False, color='k', stat='density')
        sns.histplot(df_tmp['ET_anti_wGFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label="experiment (inferred antibodies w/ GFP)", element='step', fill=False, color='r', stat='density')
        sns.histplot(pptase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label='activated control w/ GFP', color='r', stat='density')
        sns.histplot(pptase_anti_noGFP2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label='activated control w/o GFP', color='b', stat='density')
        sns.histplot(empty_pptase_anti2GFP.get_in_data(), binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label='empty control', color='g', stat='density')

        ax.set_xlabel("Pptase Antibody")
        
        ax.vlines([pptase_anti_cutoff], ymin=0, ymax=1.5, color='k', ls='dashed')



    plt.tight_layout()
    plt.show()
        
    

# Convert antibody measurements with GFP to GFP units

In [None]:
# convert from antibody to GFP values
# all GFP values are then shifted 


df_data['WT_GFP_infer'] = 0.0
df_data['ST_GFP_infer'] = 0.0
df_data['SpT_GFP_infer'] = 0.0
df_data['ET_GFP_infer'] = 0.0

df_data['WT_conc_infer'] = 0.0
df_data['ST_conc_infer'] = 0.0
df_data['SpT_conc_infer'] = 0.0
df_data['ET_conc_infer'] = 0.0

display(df_data)

for exp_name, row in df_dataset_key.iterrows():
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
       
    print(exp_name)
            
    
    idx = df_tmp.query("WT_empty").index
    df_data.loc[idx, 'WT_GFP_infer'] = empty_kinase_anti2GFP.transform(df_data.loc[idx, 'WT_anti_wGFP_infer'])
    df_data.loc[idx, 'WT_conc_infer'] = np.maximum(df_data.loc[idx, 'WT_GFP_infer'] - kinase_GFP_cutoff, 0.0)
    
    print("Empty Kinase:", 100*len(idx)/len(df_tmp.index), "%")
    
    idx = df_tmp.query("not WT_empty").index
    df_data.loc[idx, 'WT_GFP_infer'] = kinase_anti2GFP.transform(df_data.loc[idx, 'WT_anti_wGFP_infer'])
        
    df_data.loc[idx, 'WT_conc_infer'] = np.maximum(df_data.loc[idx, 'WT_GFP_infer'] - kinase_GFP_cutoff, 0.0)
        
    
    idx = df_tmp.query("ST_empty").index
    df_data.loc[idx, 'ST_GFP_infer'] = empty_substrate_anti2GFP.transform(df_data.loc[idx, 'ST_anti_wGFP_infer'])
    df_data.loc[idx, 'ST_conc_infer'] = np.maximum(df_data.loc[idx, 'ST_GFP_infer'] - substrate_GFP_cutoff, 0.0)
    
    print("Empty Substrate:", 100*len(idx)/len(df_tmp.index), "%")
    
    idx = df_tmp.query("not ST_empty").index
    df_data.loc[idx, 'ST_GFP_infer'] = substrate_anti2GFP.transform(df_data.loc[idx, 'ST_anti_wGFP_infer'])
    df_data.loc[idx, 'ST_conc_infer'] = np.maximum(df_data.loc[idx, 'ST_GFP_infer'] - substrate_GFP_cutoff, 0.0)
            
    
    idx = df_tmp.query("SpT_empty").index
    df_data.loc[idx, 'SpT_GFP_infer'] = empty_phospho_anti2GFP.transform(df_data.loc[idx, 'SpT_anti_wGFP_infer'])
    df_data.loc[idx, 'SpT_conc_infer'] = np.maximum(df_data.loc[idx, 'SpT_GFP_infer'] - phospho_GFP_cutoff, 0.0)
    
    print("Empty Phospho:", 100*len(idx)/len(df_tmp.index), "%")

    idx = df_tmp.query("not SpT_empty").index
    df_data.loc[idx, 'SpT_GFP_infer'] = phospho_anti2GFP.transform(df_data.loc[idx, 'SpT_anti_wGFP_infer'])
    df_data.loc[idx, 'SpT_conc_infer'] = np.maximum(df_data.loc[idx, 'SpT_GFP_infer'] - phospho_GFP_cutoff, 0.0)
    
    if row['model'] == 'pushpull':
    
        idx = df_tmp.query("ET_empty").index
        df_data.loc[idx, 'ET_GFP_infer'] = empty_pptase_anti2GFP.transform(df_data.loc[idx, 'ET_anti_wGFP_infer'])
        df_data.loc[idx, 'ET_conc_infer'] = np.maximum(df_data.loc[idx, 'ET_GFP_infer'] - pptase_GFP_cutoff, 0.0)

        print("Empty Pptase:", 100*len(idx)/len(df_tmp.index), "%")

        idx = df_tmp.query("not ET_empty").index
        df_data.loc[idx, 'ET_GFP_infer'] = pptase_anti2GFP.transform(df_data.loc[idx, 'ET_anti_wGFP_infer'])
        df_data.loc[idx, 'ET_conc_infer'] = np.maximum(df_data.loc[idx, 'ET_GFP_infer'] - pptase_GFP_cutoff, 0.0)
        
    else:
        df_data.loc[df_tmp.index, 'ET_GFP_infer'] = 0.0
        df_data.loc[df_tmp.index, 'ET_conc_infer'] = 0.0

    
    print("#########################")
    
        
        
display(df_data)
print(len(df_data))
print(len(df_data.dropna()))

# Fit model

In [None]:
start = time.time()

res, param_dict = fit.fit_push(df_dataset_key, df_data, phospho_GFP_cutoff)

end = time.time()

print(end-start)

# Save model parameters


In [None]:
df_dataset_key.to_csv("../data/"+label+"/model_params.csv", sep=',', index=True)

# Load model parameters if fitting was performed previously

In [None]:
df_dataset_key = pd.read_csv("../data/"+label+"/model_params.csv", sep=',', engine='python', index_col='exp_name')

display(df_dataset_key)

# Compute predicted GFP values

In [None]:
# predict concentration values
# shift to GFP units

df_data['SpT_conc_predict'] = 0.0
df_data['SpT_GFP_predict'] = 0.0

for exp_name, row in df_dataset_key.iterrows():
    

    df_tmp = df_data.query("exp_name==@exp_name").dropna()

    if row['model'] == 'substrate_only':
        
        params = [row['bg_phospho_rate']]

        df_data.loc[df_tmp.index, 'SpT_conc_predict'] = thermo.predict_substrate_only(df_tmp['ST_conc_infer'].values, *params)

    elif row['model'] == 'non-pplatable':

        df_data.loc[df_tmp.index, 'SpT_conc_predict'] = thermo.predict_nonpplatable(df_tmp['ST_conc_infer'].values)

    elif row['model'] == 'push':
        
        params = [row['bg_phospho_rate'], row['kinase_phospho_rate'], row['kinase_binding_affinity']]

        df_data.loc[df_tmp.index, 'SpT_conc_predict'] = thermo.predict_push(df_tmp['WT_conc_infer'].values, df_tmp['ST_conc_infer'].values, *params)

    elif row['model'] == 'pushpull':
        
        params = [row['bg_phospho_rate'], row['kinase_phospho_rate'], row['kinase_binding_affinity'], row['pptase_dephospho_rate'], row['pptase_binding_affinity']]

        df_data.loc[df_tmp.index, 'SpT_conc_predict'] = thermo.predict_pushpull(df_tmp['WT_conc_infer'].values, df_tmp['ET_conc_infer'].values, df_tmp['ST_conc_infer'].values, *params)


    df_data.loc[df_tmp.index, 'SpT_GFP_predict'] = df_data.loc[df_tmp.index, 'SpT_conc_predict'] + phospho_GFP_shift
    

display(df_data)

print(len(df_data))
print(len(df_data.dropna()))

# Convert predicted GFP values to antibody values in the absence of GFP

In [None]:
df_data['SpT_predict_empty'] = False
df_data['SpT_anti_wGFP_predict'] = 0.0
df_data['SpT_anti_predict'] = 0.0

for exp_name, row in df_dataset_key.iterrows():
    
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
    
    
    idx = df_tmp.query("SpT_GFP_predict < @phospho_GFP_cutoff").index
    df_data.loc[idx, 'SpT_predict_empty'] = True
    df_data.loc[idx, 'SpT_anti_wGFP_predict'] = empty_phospho_GFP2anti.transform(df_data.loc[idx, 'SpT_GFP_predict'])
    df_data.loc[idx, 'SpT_anti_predict'] = df_data.loc[idx, 'SpT_anti_wGFP_predict']
    
    idx = df_tmp.query("SpT_GFP_predict >= @phospho_anti_cutoff").index
    df_data.loc[idx, 'SpT_anti_wGFP_predict'] = phospho_GFP2anti.transform(df_data.loc[idx, 'SpT_GFP_predict'])
    df_data.loc[idx, 'SpT_anti_predict'] = phospho_anti_GFP2noGFP.transform(df_data.loc[idx, 'SpT_anti_wGFP_predict'])
    
    
    
print(len(df_data))
print(len(df_data.dropna()))
display(df_data)

# Save predictions

In [None]:
df_data.to_csv("../data/"+label+"/model_predictions.csv", sep=',')