In [None]:
%load_ext autoreload
%autoreload 2

# %config InlineBackend.figure_formats = ['svg']

from IPython.display import display, Markdown

import sys, os
sys.path.insert(0, '../py_scripts')

import numpy as np
import scipy as sp
import pandas as pd
import numpy.random as rand
import scipy.stats as stats


import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import time
import glob
import pickle

import noise_models as noise
import thermo_models as thermo

import fig_plot as fplot
import model_fitting as fit

sns.set(context='talk', font_scale=1.0, color_codes=True, palette='deep', style='ticks', 
        rc={'mathtext.fontset': 'cm', 'xtick.direction': 'in','ytick.direction': 'in',
            'axes.linewidth': 1.5, 'figure.dpi':100, 'text.usetex':False})

In [None]:

label = "220520_pushpull"
# label = "220520_pushpull_newnoise"

# label = '220810_secondlayer'
# label = '220810_secondlayer_oldnoise'
# label = '220921_secondlayer'
# label = '221123_secondlayer'
# label = '230129_secondlayer'



df_params = pd.read_csv("../data/"+label+"/model_params.csv", sep=',', engine='python', index_col='name')

display(df_params)

In [None]:
fig = plt.figure(figsize=(8, 8))

ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)


ax = ax1

affinities = ['SH2', 'SH2_mono', 'nb_SH2', 'SH2_nb', 'L+A', 'E+A', 'E+E', 'pptase_non-binding', 'kinase_non-binding']

labels = [x for x in affinities if x in df_params.index.values]


vals = 1.0 / 10**df_params.loc[labels, 'val']

y_low = vals-1.0/10**df_params.loc[labels, 'val_max']
y_high = 1.0/10**df_params.loc[labels, 'val_min']-vals

ax.barh(labels, vals, xerr=(y_low, y_high))



ax.set_xscale('log')
ax.set_ylabel("Zipper")
ax.set_xlabel("Binding Strength\n[Concentration$^{-1}$]")

ax.set_xlim(1e-8, 1e4)

ax = ax2

# activities = ['kinase2_wt', 'kinase2_dead', 'kin2_bg_phospho_rate', 'pptase1_wt', 'pptase_wt', 'pptase_weak', 'pptase_dead',
#                'kinase1_wt', 'kinase_wt', '419', '375', 'kinase_dead', 'sub_bg_phospho_rate', 'sub2_bg_phospho_rate', 'bg_flouresce']

# labels = [x for x in activities if x in df_params.index.values]

labels = [x for x in df_params.index.values if x not in affinities]


vals = 10**df_params.loc[labels, 'val']

y_low = vals-10**df_params.loc[labels, 'val_min']
y_high = 10**df_params.loc[labels, 'val_max']-vals

ax.barh(labels, vals, xerr=(y_low, y_high))



ax.set_xscale('log')
ax.set_ylabel("Variant")
ax.set_xlabel("Phospho Rate")

ax.set_xlim(1e-8, 1e4)


plt.tight_layout()

plt.show()

In [None]:
df_data = pd.read_csv("../data/"+label+"/model_predictions.csv", sep='\s*,\s*', index_col=[0, 1], engine='python')
display(df_data)

df_dataset_key = pd.read_csv("../data/"+label+"/dataset_key.csv", sep='\s*,\s*', engine='python').set_index("exp_name")
display(df_dataset_key)

df_anti2GFP_key = pd.read_csv("../data/"+label+"/MOCU_key.csv", sep='\s*,\s*', engine='python').set_index("component")
display(df_anti2GFP_key)

df_phospho_empty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_phospho', 'file_name']))
empty_anti_vals = df_phospho_empty[df_anti2GFP_key.loc['empty_phospho', 'anti_col_name']].values
empty_anti_vals = empty_anti_vals[empty_anti_vals > 0.0]

df_phospho_nonempty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['phospho', 'file_name']))
nonempty_anti_vals = df_phospho_nonempty[df_anti2GFP_key.loc['phospho', 'anti_col_name']].values
nonempty_anti_vals = nonempty_anti_vals[nonempty_anti_vals > 0.0]

df_kinase2_phospho_empty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['empty_kinase2_phospho', 'file_name']))
kinase2_empty_anti_vals = df_kinase2_phospho_empty[df_anti2GFP_key.loc['empty_kinase2_phospho', 'anti_col_name']].values
kinase2_empty_anti_vals = kinase2_empty_anti_vals[kinase2_empty_anti_vals > 0.0]

df_kinase2_phospho_nonempty = pd.read_csv("../data/{}/{}.csv".format(label, df_anti2GFP_key.loc['kinase2_phospho', 'file_name']))
kinase2_nonempty_anti_vals = df_kinase2_phospho_nonempty[df_anti2GFP_key.loc['kinase2_phospho', 'anti_col_name']].values
kinase2_nonempty_anti_vals = kinase2_nonempty_anti_vals[kinase2_nonempty_anti_vals > 0.0]


In [None]:

(ks, pval) = stats.ks_2samp(nonempty_anti_vals, empty_anti_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)

fig = plt.figure(figsize=(24, 16))

# which data set to put on each axis
ax_dict = {'substrate_only': 1, 'non-pplatable': 2, 'kinase_dead': 3, 'kinase_variant_375': 4, 'kinase_variant_419': 5,
          'kinase_non-binding': 6, 'kinase_zipper_E+A': 7, 'kinase_zipper_E+E': 8, 'kinase_zipper_L+A': 9, 'L3': 10, "L7": 11,
           'pptase_dead': 12, 'pptase_non-binding': 13, 'pptase_weak': 14, 'pptase_low': 15, 'pptase': 16}


for exp_name, row in df_dataset_key.iterrows():
    
    if exp_name not in ax_dict:
        continue
        
    df_tmp = df_data.query("exp_name==@exp_name")
    
    ax = fig.add_subplot(4, 4, ax_dict[exp_name])
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    sns.histplot(df_tmp['phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')
    
    ax.vlines(empty_anti_vals.mean(), ymin=0, ymax=2.0, color='k', ls='--')

    ax.set_xlabel("Phospho Antibody")

    ax.set_ylim(0, 2.0)
    
    ax.legend(loc='upper right', fontsize='xx-small')
        
#     ax.set_yscale('log')

    (ks, pval) = stats.ks_2samp(df_tmp['phospho_anti_exp'], df_tmp['phospho_anti_predict'])
        
    ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
    
    df_dataset_key.loc[exp_name, 'KS_stat'] = ks
    
plt.tight_layout()

plt.show()


In [None]:

(ks, pval) = stats.ks_2samp(nonempty_anti_vals, empty_anti_vals)

print("KS for empty and activated controls:", ks)

binrange = (0, 6)


# which data set to put on each axis
# ax_dict = {'no WT1': 1, 'nb SH2': 2, 'WT2 dead': 3, 'two_layer': 4, 'mono': 5, 'no pptase': 6, 'med pptase': 7, 'high pptase': 8}

ax_dict = {exp_name: 1+i for i, exp_name in enumerate(df_dataset_key.index.values)}

# exclude = {'mono', '2_noWT1', '3_noWT1', '4_noWT1', 'pptase_weak', 'pptase_strong', 
#                                            'WT1_weak_pptase_weak_high_pptase', 'WT1_weak_pptase_strong_high_pptase'}

exclude = {'mono'}

fig = plt.figure(figsize=(4*6, (len(ax_dict)+1)//2 * 4))

for exp_name, row in df_dataset_key.iterrows():
    
        
    if exp_name not in ax_dict:
        continue
        
    df_tmp = df_data.query("exp_name==@exp_name").dropna()

    
    ax = fig.add_subplot((len(ax_dict)+1)//2, 4, 2*(ax_dict[exp_name]-1)+2)
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    if exp_name not in exclude:
        sns.histplot(df_tmp['phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
        
    if exp_name not in exclude:
        (ks, pval) = stats.ks_2samp(df_tmp['phospho_anti_exp'], df_tmp['phospho_anti_predict'])

        df_dataset_key.loc[exp_name, 'phospho_KS_stat'] = ks

        ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
       
    ax = fig.add_subplot((len(ax_dict)+1)//2, 4, 2*(ax_dict[exp_name]-1)+1)
    
    ax.set_title(exp_name)
        
    sns.histplot(df_tmp['kinase2_phospho_anti_exp'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label=exp_name, element='step', fill=False, color='k', stat='density')
    if exp_name not in exclude:
        sns.histplot(df_tmp['kinase2_phospho_anti_predict'], binrange=binrange, log_scale=True, bins=64, ax=ax, 
                     label='predicted', element='step', fill=False, color='r', stat='density')
    sns.histplot(kinase2_nonempty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='activated control', color='b', stat='density')
    sns.histplot(kinase2_empty_anti_vals, binrange=binrange, log_scale=True, bins=64, ax=ax, 
                 label='empty control', color='g', stat='density')

    ax.set_xlabel("Kinase2 Phospho Antibody")

    ax.legend(loc='upper right', fontsize='xx-small')
       
    if exp_name not in exclude:
        (ks, pval) = stats.ks_2samp(df_tmp['kinase2_phospho_anti_exp'], df_tmp['kinase2_phospho_anti_predict'])

        ax.text(0.05, 0.95, r"$D_{{KS}}={:.2f}$".format(ks), transform=ax.transAxes, fontsize='x-small', ha='left', va='top')
        
        df_dataset_key.loc[exp_name, 'kinase2_phospho_KS_stat'] = ks
    
plt.tight_layout()

plt.savefig("../data/"+label+'/phospho_fits.pdf')

plt.show()


In [None]:
display(df_dataset_key)

df_dataset_key.to_csv("../data/"+label+"/dataset_key.csv", sep=',')

In [None]:
for exp_name, row in df_dataset_key.iterrows():
    
    df_tmp = df_data.query("exp_name==@exp_name")
    
    fig, axes = plt.subplots(1, 2, constrained_layout=True, figsize=(8, 5), squeeze=False)
    
    ##########################################################

    
    ax = axes[0, 0]
    
    fplot.plot_2d_avg_hex(df_tmp, fig, ax, 'substrate_anti_exp', 'phospho_anti_exp', 'kinase_anti_exp',  logscale=True, nbins=20, xlim=(1e1, 1e5), ylim=(1e1, 1e5), show_diagonal=True)
    
    ax = axes[0, 1]
    
    fplot.plot_2d_avg_hex(df_tmp, fig, ax, 'substrate_anti_exp', 'phospho_anti_predict', 'kinase_anti_exp',  logscale=True, nbins=20, xlim=(1e1, 1e5), ylim=(1e1, 1e5), show_diagonal=True)
    
    
    fig.suptitle(exp_name)
    
    plt.show()

In [None]:

for exp_name, row in df_dataset_key.iterrows():
    
#     if exp_name != "kinase_zipper_L+A":
#         continue
    
    df_tmp = df_data.query("exp_name==@exp_name")
            
    fig, axes = plt.subplots(1,3, constrained_layout=True, figsize=(12, 5), squeeze=False)
    ax = axes[0, 0]
    
    sns.histplot(df_tmp, x='substrate_anti_exp', y='phospho_anti_exp', ax=ax, log_scale=(True, True), binrange=[[0, 6], [0, 6]], bins=100)
    ax.plot([1e0, 1e6], [1e0, 1e6], 'k--')
    
    ax.set_xlim(1e0, 1e6)
    ax.set_ylim(1e0, 1e6)
    
    ax = axes[0, 1]
    sns.histplot(df_tmp, x='substrate_GFP_infer', y='phospho_GFP_infer', ax=ax, log_scale=(True, True), binrange=[[2, 6], [2, 6]], bins=100)
    ax.plot([1e2, 1e6], [1e2, 1e6], 'k--')
    sns.histplot(df_tmp, x='substrate_GFP_denoise', y='phospho_GFP_denoise', ax=ax, log_scale=(True, True), color='g', binrange=[[2, 6], [2, 6]], bins=100)

#     ax.plot([1e0, 1e6], [1/3*1e0, 1/3*1e6], 'r--')
    ax.set_xlim(1e2, 1e6)
    ax.set_ylim(1e2, 1e6)
    
    ax = axes[0, 2]
    ax.plot([1e2, 1e6], [1e2, 1e6], 'k--')
    
    sns.histplot(df_tmp, x='substrate_GFP_infer', y='phospho_GFP_predict', ax=ax, log_scale=(True, True), color='g', binrange=[[2, 6], [2, 6]], bins=100)
    
    ax.set_xlim(1e2, 1e6)
    ax.set_ylim(1e2, 1e6)
    
    fig.suptitle(exp_name)
    
    plt.show()
    
    if row['model'] == 'two_layer' or row['model'] == 'two_layer_mono' or row['model'] == 'two_layer_nowriter'  or row['model'] == 'two_layer_noeraser':
        
        fig, axes = plt.subplots(1,3, constrained_layout=True, figsize=(12, 5), squeeze=False)
        ax = axes[0, 0]

        sns.histplot(df_tmp, x='kinase2_anti_exp', y='kinase2_phospho_anti_exp', ax=ax, log_scale=(True, True), binrange=[[0, 6], [0, 6]], bins=100)
        ax.plot([1e0, 1e6], [1e0, 1e6], 'k--')
        ax.plot([1e0, 1e6], [3*1e0, 3*1e6], 'r--')

        ax.set_xlim(1e0, 1e6)
        ax.set_ylim(1e0, 1e6)

        ax = axes[0, 1]
        sns.histplot(df_tmp, x='kinase2_GFP_infer', y='kinase2_phospho_GFP_infer', ax=ax, log_scale=(True, True), binrange=[[1, 6], [1, 6]], bins=100)
        ax.plot([1e2, 1e6], [1e2, 1e6], 'k--')
        ax.plot([1e0, 1e6], [3*1e0, 3*1e6], 'r--')
        sns.histplot(df_tmp, x='kinase2_GFP_denoise', y='kinase2_phospho_GFP_denoise', ax=ax, log_scale=(True, True), color='g', binrange=[[1, 6], [1, 6]], bins=100)

        ax.set_xlim(1e2, 1e6)
        ax.set_ylim(1e2, 1e6)

        ax = axes[0, 2]
        ax.plot([1e2, 1e6], [1e2, 1e6], 'k--')
        ax.plot([1e0, 1e6], [3*1e0, 3*1e6], 'r--')

        sns.histplot(df_tmp, x='kinase2_GFP_infer', y='kinase2_phospho_GFP_predict', ax=ax, log_scale=(True, True), color='g', binrange=[[1, 6], [1, 6]], bins=100)

        ax.set_xlim(1e2, 1e6)
        ax.set_ylim(1e2, 1e6)

        
        fig.suptitle(exp_name)

        plt.show()


    