In [None]:
%load_ext autoreload
%autoreload 2

# %config InlineBackend.figure_formats = ['svg']

from IPython.display import display, Markdown

import sys, os
sys.path.insert(0, '../py_scripts')

import numpy as np
import scipy as sp
import pandas as pd
import numpy.random as rand
import scipy.stats as stats


import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import time
import glob
import pickle

import push_pull as pp
import noise_models as noise
import thermo_models as thermo

import fig_plot as fplot
import model_fitting as fit

sns.set(context='talk', font_scale=1.0, color_codes=True, palette='deep', style='ticks', 
        rc={'mathtext.fontset': 'cm', 'xtick.direction': 'in','ytick.direction': 'in',
            'axes.linewidth': 1.5, 'figure.dpi':100, 'text.usetex':False})

This file uses the model parameters found from fitting to predict the antibody distributions for the phosphorylated substrate.

In [None]:
# label = "21_10_15_highgating"
# label = "21_10_15_medhighgating"
label = "22_01_20"



df_dataset_key = pd.read_csv("../data/"+label+"/model_params.csv", sep=',', engine='python', index_col='exp_name')

display(df_dataset_key)

In [None]:
fig = plt.figure(figsize=(12, 8))

ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2, sharey=ax1)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4, sharey=ax3)


ax = ax1

df_sub = df_dataset_key.query("model=='push'").drop_duplicates('kinase_zipper').sort_values('kinase_binding_affinity').replace({"kinase_non-binding": 'kinase_nb'})

ax.bar(df_sub['kinase_zipper'], 1/df_sub['kinase_binding_affinity'])

ax.set_yscale('log')
ax.set_xlabel("Kinase Zipper")
ax.set_ylabel("Binding Strength\n[Concentration$^{-1}$]")

ax = ax2

df_sub = df_dataset_key.query("model=='pushpull'").drop_duplicates('pptase_zipper').sort_values('pptase_binding_affinity').replace({"kinase_non-binding": 'pptase_nb'})

ax.bar(df_sub['pptase_zipper'], 1/df_sub['pptase_binding_affinity'])

ax.set_yscale('log')
ax.set_xlabel("Pptase Zipper")
ax.set_ylabel("Binding Strength\n[Concentration$^{-1}$]")



ax = ax3

df_sub = df_dataset_key.query("model=='push'").drop_duplicates('kinase_variant').sort_values('kinase_phospho_rate')

ax.bar(df_sub['kinase_variant'], df_sub['kinase_phospho_rate'])

ax.set_yscale('log')
ax.set_xlabel("Kinase Variant")
ax.set_ylabel("Phospho Rate")




ax = ax4

df_sub = df_dataset_key.query("model=='pushpull'").drop_duplicates('pptase_variant').sort_values('pptase_dephospho_rate')

ax.bar(df_sub['pptase_variant'], df_sub['pptase_dephospho_rate'])

ax.set_yscale('log')
ax.set_xlabel("Pptase Variant")
ax.set_ylabel("Dephospho Rate")


plt.tight_layout()

plt.show()

In [None]:
df_data = pd.read_csv("../data/"+label+"/model_predictions.csv", sep=',', index_col=[0, 1])
display(df_data)

df_anti2GFP_key = pd.read_csv("../data/"+label+"/anti_to_GFP_key.csv", sep=', ', engine='python').set_index("component")
display(df_anti2GFP_key)

df_anti_noGFP2GFP_key = pd.read_csv("../data/"+label+"/anti_noGFP_to_GFP_key.csv", sep=', ', engine='python').set_index("component")
display(df_anti_noGFP2GFP_key)

df_phospho_empty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_phospho', 'file_name']))
empty_vals = df_phospho_empty[df_anti2GFP_key.loc['empty_phospho', 'anti_col_name']].values
empty_vals = empty_vals[empty_vals > 0.0]

df_phospho_active = pd.read_csv("../data/{}/{}.csv".format(label, df_anti_noGFP2GFP_key.loc['phospho', 'file_name']))
active_vals = df_phospho_active[df_anti_noGFP2GFP_key.loc['phospho', 'anti_col_name']].values
active_vals = active_vals[active_vals > 0.0]


In [None]:

(ks, pval) = stats.ks_2samp(active_vals, empty_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)

fig = plt.figure(figsize=(30, 12))

# which data set to put on each axis
# ax_dict = {'substrate_only': 1, 'non-pplatable': 2, 'kinase_dead': 3, 'kinase_variant_375': 4, 'kinase_variant_419': 5,
#           'kinase_non-binding': 6, 'kinase_zipper_E+A': 7, 'kinase_zipper_E+E': 8, 'kinase_zipper_L+A': 9, 
#            'pptase_dead': 11, 'pptase': 12, 'pptase_non-binding': 13}

ax_dict = {'substrate_only': 1, 'non-pplatable': 2, 'kinase_dead': 3, 'kinase_variant_375': 4, 'kinase_variant_419': 5,
          'kinase_non-binding': 6, 'kinase_zipper_E+A': 7, 'kinase_zipper_E+E': 8, 'kinase_zipper_L+A': 9, 
           'pptase_dead': 11, 'pptase': 12}

for exp_name, row in df_dataset_key.iterrows():
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()
    
    ax = fig.add_subplot(3, 5, ax_dict[exp_name])
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['SpT_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['SpT_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(active_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

#     (ks, pval) = stats.ks_2samp(df_tmp['SpT_anti_exp'], df_tmp['SpT_anti_predict'])
        
#     MSE = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_predict']))**2)
#     var = np.sum((np.log10(df_tmp['SpT_GFP_infer'])-np.log10(df_tmp['SpT_GFP_infer']).mean())**2)
#     R2 = 1 - MSE/var
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
    
    df_dataset_key.loc[exp_name, 'KS_stat'] = ks
#     df_dataset_key.loc[exp_name, 'R2_stat'] = R2
    
plt.tight_layout()

plt.show()


In [None]:
df_dataset_key.to_csv("../data/"+label+"/model_params.csv", sep=',')