In [None]:
%load_ext autoreload
%autoreload 2

# %config InlineBackend.figure_formats = ['svg']

from IPython.display import display, Markdown

import sys, os
sys.path.insert(0, '../py_scripts')

import numpy as np
import scipy as sp
import pandas as pd
import numpy.random as rand
import scipy.stats as stats


import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import time
import glob
import pickle

import noise_models as noise
import thermo_models as thermo

import fig_plot as fplot
import model_fitting as fit

sns.set(context='talk', font_scale=1.0, color_codes=True, palette='deep', style='ticks', 
        rc={'mathtext.fontset': 'cm', 'xtick.direction': 'in','ytick.direction': 'in',
            'axes.linewidth': 1.5, 'figure.dpi':100, 'text.usetex':False})

This file uses the model parameters found from fitting to predict the antibody distributions for the phosphorylated substrate.

In [None]:
# label = "21_10_15_highgating"
# label = "21_10_15_highgating_regularize"
label = "21_10_15_medhighgating"
# label = "22_01_20"
# label = "22_01_20_noGFPexpressionshift"
# label = "22_05_05_twolayer"


df_params = pd.read_csv("../data/"+label+"/model_params.csv", sep=',', engine='python', index_col='name')

display(df_params)

In [None]:
fig = plt.figure(figsize=(8, 8))

ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)


ax = ax1

affinities = ['SH2', 'nb SH2', 'L+A', 'E+A', 'E+E', 'pptase_non-binding', 'kinase_non-binding']

labels = [x for x in affinities if x in df_params.index.values]


vals = 1.0 / 10**df_params.loc[labels, 'val']

y_low = vals-1.0/10**df_params.loc[labels, 'val_max']
y_high = 1.0/10**df_params.loc[labels, 'val_min']-vals

ax.barh(labels, vals, xerr=(y_low, y_high))



ax.set_xscale('log')
ax.set_ylabel("Zipper")
ax.set_xlabel("Binding Strength\n[Concentration$^{-1}$]")

ax.set_xlim(1e-8, 1e4)

ax = ax2

activities = ['kinase2_wt', 'kinase2_dead', 'bg_kinase2dephospho_rate', 'bg_kinase2phospho_rate', 'pptase_wt', 'pptase_dead',
               'kinase1_wt', 'kinase_wt', '419', '375', 'kinase_dead', 'bg_phospho_rate']



labels = [x for x in activities if x in df_params.index.values]

vals = 10**df_params.loc[labels, 'val']

y_low = vals-10**df_params.loc[labels, 'val_min']
y_high = 10**df_params.loc[labels, 'val_max']-vals

ax.barh(labels, vals, xerr=(y_low, y_high))



ax.set_xscale('log')
ax.set_ylabel("Variant")
ax.set_xlabel("Phospho Rate")

ax.set_xlim(1e-8, 1e2)


plt.tight_layout()

plt.show()

In [None]:
df_data = pd.read_csv("../data/"+label+"/model_predictions.csv", sep='\s*,\s*', index_col=[0, 1])
display(df_data)

df_dataset_key = pd.read_csv("../data/"+label+"/dataset_key.csv", sep='\s*,\s*', engine='python').set_index("exp_name")
# df_dataset_key = df_dataset_key.query("model!='pushpull'")
display(df_dataset_key)

df_anti2GFP_key = pd.read_csv("../data/"+label+"/MOCU_key.csv", sep='\s*,\s*', engine='python').set_index("component")
display(df_anti2GFP_key)

df_phospho_empty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_phospho', 'file_name']))
empty_anti_vals = df_phospho_empty[df_anti2GFP_key.loc['empty_phospho', 'anti_col_name']].values
empty_anti_vals = empty_anti_vals[empty_anti_vals > 0.0]
empty_GFP_vals = df_phospho_empty[df_anti2GFP_key.loc['empty_phospho', 'GFP_col_name']].values
empty_GFP_vals = empty_GFP_vals[empty_GFP_vals > 0.0]

df_phospho_nonempty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['phospho', 'file_name']))
nonempty_anti_vals = df_phospho_nonempty[df_anti2GFP_key.loc['phospho', 'anti_col_name']].values
nonempty_anti_vals = nonempty_anti_vals[nonempty_anti_vals > 0.0]
nonempty_GFP_vals = df_phospho_nonempty[df_anti2GFP_key.loc['phospho', 'GFP_col_name']].values
nonempty_GFP_vals = nonempty_GFP_vals[nonempty_GFP_vals > 0.0]


In [None]:

(ks, pval) = stats.ks_2samp(nonempty_anti_vals, empty_anti_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)

fig = plt.figure(figsize=(30, 12))

# which data set to put on each axis
ax_dict = {'substrate_only': 1, 'non-pplatable': 2, 'kinase_dead': 3, 'kinase_variant_375': 4, 'kinase_variant_419': 5,
          'kinase_non-binding': 6, 'kinase_zipper_E+A': 7, 'kinase_zipper_E+E': 8, 'kinase_zipper_L+A': 9, 
           'pptase_dead': 11, 'pptase': 12, 'pptase_non-binding': 13}


for exp_name, row in df_dataset_key.iterrows():
    
    if exp_name not in ax_dict:
        continue
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
    
    ax = fig.add_subplot(3, 5, ax_dict[exp_name])
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['phospho_anti_exp'], df_tmp['phospho_anti_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
    
    df_dataset_key.loc[exp_name, 'KS_stat'] = ks
#     df_dataset_key.loc[exp_name, 'R2_stat'] = R2
    
plt.tight_layout()

plt.show()


In [None]:

(ks, pval) = stats.ks_2samp(nonempty_anti_vals, empty_anti_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)

fig = plt.figure(figsize=(30, 12))

# which data set to put on each axis
ax_dict = {'substrate_only': 1, 'non-pplatable': 2, 'kinase_dead': 3, 'kinase_variant_375': 4, 'kinase_variant_419': 5,
          'kinase_non-binding': 6, 'kinase_zipper_E+A': 7, 'kinase_zipper_E+E': 8, 'kinase_zipper_L+A': 9, 
           'pptase_dead': 11, 'pptase': 12, 'pptase_non-binding': 13}


for exp_name, row in df_dataset_key.iterrows():
    
    if exp_name not in ax_dict:
        continue
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
    
    ax = fig.add_subplot(3, 5, ax_dict[exp_name])
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['phospho_GFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['phospho_GFP_noisy_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['phospho_GFP_infer'], df_tmp['phospho_GFP_noisy_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
    
    df_dataset_key.loc[exp_name, 'KS_stat'] = ks
#     df_dataset_key.loc[exp_name, 'R2_stat'] = R2
    
plt.tight_layout()

plt.show()


In [None]:

(ks, pval) = stats.ks_2samp(nonempty_anti_vals, empty_anti_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)

fig = plt.figure(figsize=(24, 8))

# which data set to put on each axis
ax_dict = {'no WT': 1, 'nb SH2': 2, 'WT2 dead': 3, 'full': 4}


for exp_name, row in df_dataset_key.iterrows():
    
        
    if exp_name not in ax_dict:
        continue
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()

    
    ax = fig.add_subplot(2, 4, 2*(ax_dict[exp_name]-1)+2)
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['phospho_anti_exp'], df_tmp['phospho_anti_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
       
    ax = fig.add_subplot(2, 4, 2*(ax_dict[exp_name]-1)+1)
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['kinase2phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['kinase2phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Kinase2 Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['kinase2phospho_anti_exp'], df_tmp['kinase2phospho_anti_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
    
    df_dataset_key.loc[exp_name, 'KS_stat'] = ks
#     df_dataset_key.loc[exp_name, 'R2_stat'] = R2
    
plt.tight_layout()

plt.show()


In [None]:

(ks, pval) = stats.ks_2samp(nonempty_anti_vals, empty_anti_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)

fig = plt.figure(figsize=(24, 8))

# which data set to put on each axis
ax_dict = {'no WT': 1, 'nb SH2': 2, 'WT2 dead': 3, 'full': 4}


for exp_name, row in df_dataset_key.iterrows():
    
        
    if exp_name not in ax_dict:
        continue
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()

    
    ax = fig.add_subplot(2, 4, 2*(ax_dict[exp_name]-1)+2)
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['phospho_GFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['phospho_GFP_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['phospho_GFP_infer'], df_tmp['phospho_GFP_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
       
    ax = fig.add_subplot(2, 4, 2*(ax_dict[exp_name]-1)+1)
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['kinase2phospho_GFP_infer'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['kinase2phospho_GFP_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Kinase2 Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['kinase2phospho_GFP_infer'], df_tmp['kinase2phospho_GFP_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
    
    df_dataset_key.loc[exp_name, 'KS_stat'] = ks
#     df_dataset_key.loc[exp_name, 'R2_stat'] = R2
    
plt.tight_layout()

plt.show()


In [None]:
df_dataset_key.to_csv("../data/"+label+"/dataset_key.csv", sep=',')

In [None]:

for exp_name, row in df_dataset_key.iterrows():
    
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
    
    binrange = (0, 6)
        
    
    
    fig, axes = plt.subplots(1, 2, figsize=(12, 4), squeeze=False)
    
    fig.suptitle(exp_name)
    
    #plot phospho in antibody and GFP units for exp and predicted
       

    ax = axes[0, 0]


    sns.histplot(empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')
    sns.histplot(nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='active control', color='r', stat='density')
    
    sns.histplot(df_data.loc[df_tmp.index, 'phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='experiment', element='step', fill=False, color='k', stat='density')
    
    sns.histplot(df_data.loc[df_tmp.index, 'phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='model', element='step', fill=False, color='b', stat='density')

    ax.set_xscale('log')
    ax.set_xlabel("Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')



    ax = axes[0, 1]

    sns.histplot(empty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')
    sns.histplot(nonempty_GFP_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='active control', color='r', stat='density')
    
    sns.histplot(df_tmp["phospho_GFP_infer"], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='inferred', element='step', fill=False, color='k', stat='density')
    
    sns.histplot(df_tmp["phospho_GFP_predict"], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='b', stat='density')


    ax.set_xscale('log')
    ax.set_xlabel("GFP")

    ax.legend(loc='upper right', fontsize='xx-small')
    

    plt.tight_layout()
    plt.show()
        
                

    

In [None]:
total_loss = np.sum((np.log10(df_data['phospho_GFP_predict'])-np.log10(df_data['phospho_GFP_infer']))**2)

print("total loss:", total_loss)

for exp_name, row in df_dataset_key.iterrows():
    
    df_tmp = df_data.query("exp_name==@exp_name")
    
    fig, axes = plt.subplots(2, 2, constrained_layout=True, figsize=(8, 10), squeeze=False)
    
#     ST_bin_edges = [1e2, 10**2.5, 1e3, 10**3.5, 1e4]
    ST_bin_edges = None
    
    fig.suptitle(exp_name)
    
    ax = axes[0, 0]
    
    fplot.plot_activation_curves(df_tmp, fig, ax, 'kinase_anti_exp', 'phospho_anti_exp', 'substrate_anti_exp', 
                             nSTbins=4, ST_bin_edges=ST_bin_edges, xlim=(1e1, 1e5), ylim=(1e1, 1e4), error_bands=True, use_median=True, error_band_range=(0.5, 0.95))
    
    ax = axes[0, 1]
    
    
    fplot.plot_activation_curves(df_tmp, fig, ax, 'kinase_anti_exp', 'phospho_anti_predict', 'substrate_anti_exp', 
                             nSTbins=4, xlim=(1e1, 1e5), ylim=(1e1, 1e4), error_bands=True, use_median=True, error_band_range=(0.5, 0.95))

    loss = np.sum((np.log10(df_tmp.query("exp_name==@exp_name")['phospho_GFP_predict'])-np.log10(df_tmp.query("exp_name==@exp_name")['phospho_GFP_infer']))**2)
    loss /= total_loss
    
    KS = row['KS_stat']
    
    ax.text(0.95, 0.95, "loss: {0:.2f}\nKS-stat: {1:0.2f}".format(loss, KS), transform=ax.transAxes, ha='right', va='top', fontsize='x-small', ma='left')
    
    
    ax = axes[1, 0]
    
    fplot.plot_sensitivity_curves(df_tmp, fig, ax, 'kinase_anti_exp', 'phospho_anti_exp', 'substrate_anti_exp', 
                             nSTbins=4, ST_bin_edges=ST_bin_edges, xlim=(1e1, 1e5), ylim=(-2, 2), use_median=True, ylog_scale=True)
    
    ax = axes[1, 1]
    
    fplot.plot_sensitivity_curves(df_tmp, fig, ax, 'kinase_anti_exp', 'phospho_anti_predict', 'substrate_anti_exp', 
                             nSTbins=4, ST_bin_edges=ST_bin_edges, xlim=(1e1, 1e5), ylim=(-2, 2), use_median=True, ylog_scale=True)
    
    plt.show()
    

    