In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display, Markdown

import sys
sys.path.insert(0, '../py_scripts')

import numpy as np
import scipy as sp
import pandas as pd
import numpy.random as rand
import numpy.linalg as la
import numpy.ma as ma
import scipy.optimize as opt
import scipy.stats as stats

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import time

import push_pull as pp
import noise_models as noise

sns.set(context='talk', font_scale=1.0, color_codes=True, palette='deep', style='ticks', 
        rc={'mathtext.fontset': 'cm', 'xtick.direction': 'in','ytick.direction': 'in',
            'axes.linewidth': 1.5, 'figure.dpi':100, 'text.usetex':False})

In [None]:


# dataset, construct, model
s_list = [
#     ['I+E', 'I+E', 'push'],
#     ['S+E', 'S+E', 'push'],
    ['S+R', 'S+R', 'push'],
#     ['RR(E) only', 'RR(E) only', 'push'],
#     ['RR+A', 'RR+A', 'push'],
#     ['Substrate only', 'Substrate only', 'background']
         ]

df_info = pd.DataFrame(s_list, columns=['dataset', 'construct', 'model'])
     
display(df_info)

df_list = []
for index, row in df_info.iterrows():
    df = pd.read_csv("../data/push_data/{}.csv".format(row['dataset']))    
    df['dataset'] = row['dataset']    
    df_list.append(df)
    
    
df = pd.concat(df_list).drop("Unnamed: 0", axis=1, errors='ignore')


df.set_index("dataset", inplace=True, append=True)
df = df.reorder_levels(df.index.names[::-1])


df = df[(df[df.columns[:-1]] > 0).all(axis=1)].rename(columns={'Kinase': 'WT_anti', 'Substrate': 'ST_anti', 'Phosphorylation': 'SpT_anti'})

df['phospho_frac_anti'] = df['SpT_anti'] / df['ST_anti']


print(len(df.index), "/", len(df.index))

display(df)


nconstructs = df.groupby("dataset").ngroups
fig, axes = plt.subplots(nconstructs, 1, figsize=(4, 4*nconstructs),
                        sharex=True, sharey=True, squeeze=False)

for i, (construct, group) in enumerate(df.groupby("dataset")):
    
    sns.histplot(group, x='WT_anti', y='phospho_frac_anti', 
                 log_scale=(True, True), ax=axes[i, 0])
    
    axes[i, 0].hlines(1e0, xmin=1e0, xmax=1e5, color='k', linestyle='--')
    axes[i, 0].set_title(construct)

plt.show()

In [None]:
nbins_anti = 100
nbins_GFP = 100

nonempty_writer_noise = noise.EmpiricalNoise("../data/noise_data/Kinase Noise.csv", 
                                   'Flag Antibody', 'GFP', 
                                   nbins_anti=nbins_anti, nbins_GFP=nbins_GFP, 
                                   verbose=True)

empty_writer_noise = noise.EmpiricalNoise("../data/noise_data/Empty Cell.csv", 
                                   'Flag Antibody', 'GFP', 
                                   nbins_anti=nbins_anti, nbins_GFP=nbins_GFP, 
                                   verbose=True)

mix_writer_noise = noise.MixtureNoise(nonempty_writer_noise, empty_writer_noise)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
nonempty_writer_noise.plot(ax)
empty_writer_noise.plot(ax, color='g')
plt.show()

writer_noise = nonempty_writer_noise
    
substrate_noise = noise.EmpiricalNoise("../data/noise_data/Substrate Noise.csv", 
                                   'Myc Antibody', 'GFP', 
                                   nbins_anti=nbins_anti, nbins_GFP=nbins_GFP, 
                                   verbose=True)

empty_substrate_noise = noise.EmpiricalNoise("../data/noise_data/Empty Cell.csv", 
                                   'Myc Antibody', 'GFP', 
                                   nbins_anti=nbins_anti, nbins_GFP=nbins_GFP, 
                                   verbose=True)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
substrate_noise.plot(ax)
empty_substrate_noise.plot(ax, color='g')
plt.show()


    
phospho_noise = noise.EmpiricalNoise("../data/noise_data/PE Noise.csv", 
                                   'PE Antibody', 'GFP', 
                                   nbins_anti=nbins_anti, nbins_GFP=nbins_GFP, 
                                   verbose=True)

empty_phospho_noise = noise.EmpiricalNoise("../data/noise_data/Empty Cell.csv", 
                                   'Phosphorylation', 'GFP', 
                                   nbins_anti=nbins_anti, nbins_GFP=nbins_GFP, 
                                   verbose=True)

fig, ax = plt.subplots(1, 1, figsize=(6, 4))
phospho_noise.plot(ax)
empty_phospho_noise.plot(ax, color='g')
plt.show()

In [None]:

for dataset, group in df.groupby("dataset"):
    
#     print(dataset)
    
    frac = mix_writer_noise.calc_mixture(group['WT_anti'])
    
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    
    ax.hist(np.log10(group['WT_anti']), histtype='step', bins='auto', 
            label='full', density=True, color='r')
    ax.hist(np.log10(mix_writer_noise.nonempty.df['Flag Antibody']), color='b', density=True,
            histtype='step', bins='auto', label='nonempty')
    ax.hist(np.log10(mix_writer_noise.empty.df['Flag Antibody']), color='g', density=True,
            histtype='step', bins='auto', label='empty')
    
    ax.set_xlabel("anti")
    
#     x = np.concatenate([writer_noise.nonempty.df['Flag Antibody'], 
#                         writer_noise.empty.df['Flag Antibody']])
#     weights = np.concatenate([(1-frac)*np.ones_like(writer_noise.nonempty.df['Flag Antibody']),
#                              frac*np.ones_like(writer_noise.empty.df['Flag Antibody'])])
    
    
#     ax.hist(np.log10(x), weights=weights, color='m', density=True,
#             histtype='step', bins=100, label='mixture')
    
    
#     hist, bin_edges = np.histogram(np.log10(group['WT_anti']), bins='auto')
    
#     hist = hist/hist.sum()
    
#     ax.plot(10**((bin_edges[:-1]+bin_edges[1:])/2), hist, 'b', label='full')
    
#     x = writer_noise.nonempty.edges_anti
#     ax.plot(10**((x[:-1]+x[1:])/2), writer_noise.nonempty.prob_anti, 'g', label='nonempty')
#     x = writer_noise.empty.edges_anti
#     ax.plot(10**((x[:-1]+x[1:])/2), writer_noise.empty.prob_anti, 'r', label='empty')
    
    
    ax.set_xscale('log')
    ax.legend(loc='upper left')
    plt.show()

        
    df.loc[group.index, 'WT_GFP'] = writer_noise.anti_to_GFP(group['WT_anti'], plot=False)

    df.loc[group.index, 'ST_GFP'] = substrate_noise.anti_to_GFP(group['ST_anti'], plot=False)

    df.loc[group.index, 'SpT_GFP'] = phospho_noise.anti_to_GFP(group['SpT_anti'], plot=False)
    
    
df['phospho_frac_GFP'] = df['SpT_GFP'] / df['ST_GFP']


print(len(df))
df.dropna(inplace=True)
print(len(df))

display(df)

In [None]:
def solve(df, df_info, param_dict, x0, bounds, verbose=False):

    if verbose:
        start = time.time()

    def func(x):
                        
        loss = 0.0
        norm = 0.0
        
        for index, row in df_info.iterrows():
            dataset = row['dataset']
            
            construct = row['construct']
            
            df_data = df.query("dataset=='{}'".format(dataset))
            
            N_data = len(df_data.index)
            
            norm += N_data
            
            noise_params = np.array(x)[param_dict[construct][0:1]]
            model_params = 10**np.array(x)[param_dict[construct][1:]]
            
#             print(noise_params, model_params)
            
            if row['model'] == 'background':
                model = pp.Background()
                
                L = model.loss_log(df_data['SpT_GFP'].values, 
                                            df_data[['ST_GFP']].values,
                                            model_params)
                
#                 print(L)
                
                loss += N_data * L
                

            elif row['model'] == 'push':
                
                
                
#                 print(noise_params)
                
                model = pp.PushAmp()
                
                L = N_data * model.loss_log(df_data['SpT_GFP'].values, 
                                            df_data[['WT_GFP', 'ST_GFP']].values,
                                            model_params)

#                 L = model.loss_mixture(df_data['SpT_GFP'].values, 
#                                             df_data[['WT_GFP', 'ST_GFP']].values,
#                                             model_params, noise_params)

                loss += N_data * L
    
#                 print(L)
                
            loss /= norm
        
#         print(loss)
        return loss


    if verbose:
        print("Initial Loss:", func(x0))


    res = opt.minimize(func, x0, method='L-BFGS-B', 
                       jac='2-point', bounds=bounds, 
                       options={'iprint':101, 'eps': 1e-8, 
                                'gtol': 1e-8, 'ftol':1e-12,
                               'finite_diff_rel_step': 1e-4})
    

    if verbose:
        print("Final Loss:", res.fun)

        end = time.time()

        print("Time Elapsed", end-start, "seconds")

        print(res)
        
    
    return res

In [None]:
model_count = {'push': 0, 'background': 0}
param_dict = {}

param_labels = [r"$\rho$", r"$\log_{10}(v_{bg}^p)$", r"$\log_{10}(v_{WS}^p)$"]
x0 = [0.0, 0.0, 1.0]
bounds = [(0.0, 1.0e-6), (None, None), (None, None)]

for index, row in df_info.iterrows():
    
    construct = row['construct']
    model = row['model']
                
    if construct not in param_dict:
        if model == 'background':
            param_dict[construct] = [0, 1]
            model_count['background'] +=1
        elif model == 'push':
            param_dict[construct] = [0, 1, 2, 3+model_count['push']]
            model_count['push'] += 1
            
            param_labels.append(construct + ": " + r"$\log_{10}(\alpha_{WS})$")
            x0.append(3.0)
            bounds.append((None, None))
        

print(param_labels)
print(param_dict)
print(model_count)

print(x0)
print(bounds)


res = solve(df, df_info, param_dict, x0, bounds, verbose=True)


In [None]:
hess = la.inv(res.hess_inv.todense())

print("Model parameters:")
for i in range(len(res.x)):
    display(Markdown(param_labels[i] + " = " + str(res.x[i])))
    

s_list = []

for i, labeli in enumerate(param_labels):
    for j, labelj in enumerate(param_labels):
        s_list.append([labeli, labelj, np.log10(np.abs(hess[i, j])+1e-4)])

df_hess = pd.DataFrame(s_list, columns=['param1', 'param2', 'hess'])

sns.heatmap(df_hess.pivot("param1", "param2", "hess"), 
            cbar_kws={'label': r"$\log_{10}(|H_{ij}|)$"}, 
            cmap='cividis', center=0)

plt.show()

evals, evecs = la.eigh(hess)

evals = evals[::-1]
evecs = evecs[:, ::-1]

s_list = []
for i, labeli in enumerate(param_labels):
    for j in range(len(evals)):
        s_list.append([labeli, "{0:}: {1:07.4f}".format(j, evals[j]), np.abs(evecs[i, j])])
        

df_evecs = pd.DataFrame(s_list, columns=['param', "PC (eigenval)", 'val'])

sns.heatmap(df_evecs.pivot("PC (eigenval)", "param", "val"), 
            cbar_kws={'label': "weight"}, 
            cmap='RdBu', center=0)

plt.show()

In [None]:
hess_inv = res.hess_inv.todense()

zippers = {}


for index, row in df_info.iterrows():

    construct = row['construct']
    if row['model'] == 'push':
        
        if construct not in zippers:
            idx = param_dict[construct][3]
            
            zippers[construct] = (res.x[idx], 0.01*np.sqrt(hess_inv[idx, idx]))


print(zippers)
            
fig, ax = plt.subplots(1, 1, figsize=(8, 6))

y = 10**np.array([zippers[key][0] for key in zippers])
y_low =  y - 10**np.array([zippers[key][0]-zippers[key][1] for key in zippers])
y_up =  10**np.array([zippers[key][0]+zippers[key][1] for key in zippers]) - y

ax.bar(zippers.keys(), y, yerr=(y_low, y_up))

ax.set_yscale('log')
ax.set_xlabel("Zipper")
ax.set_ylabel("Inverse Binding Strength\n" + r"$\alpha_{WS} = k^{off}/k^{on}$ [GFP]")
plt.xticks(rotation=45)

plt.show()

In [None]:


for index, row in df_info.iterrows():
    
    dataset = row['dataset']
    model = row['model']
    construct = row['construct']
    
    df_data = df_data = df.query("dataset=='{}'".format(dataset))
    
    
    if model == 'push':
#         params = 10**np.array(res.x)[param_dict[construct]]
        
        noise_params = np.array(res.x)[param_dict[construct][0:1]]
        model_params = 10**np.array(res.x)[param_dict[construct][1:]]
    
        amp = pp.PushAmp()
        SpT_GFP_predict = amp.predict_all(df_data[['WT_GFP', 'ST_GFP']].values, model_params)
#         SpT_GFP_predict = amp.predict_all(np.asfortranarray(np.c_[np.zeros(len(df_data.index)), 
#                                                 df_data['ST_GFP'].values]), params)
        
    elif model == 'background':
        
        SpT_GFP_predict = np.zeros(len(df_data.index))
        
    df.loc[df_data.index, 'SpT_GFP_predict'] = SpT_GFP_predict
    
    
df['SpT_anti_predict'] = phospho_noise.GFP_to_anti(df['SpT_GFP_predict'])

df['phospho_frac_GFP_predict'] = df['SpT_GFP_predict'] / df['ST_GFP']
df['phospho_frac_anti_predict'] = df['SpT_anti_predict'] / df['ST_anti']

display(df)

In [None]:
fig, axes = plt.subplots(len(df_info.index), 1, figsize=(4, 4*len(df_info.index)),
                        sharex=True, sharey=True, squeeze=False)


for i, row in df_info.iterrows():
    
    dataset = row['dataset']
    model = row['model']
    construct = row['construct']
    
    df_data = df_data = df.query("dataset=='{}'".format(dataset))
    
    sns.histplot(df_data, x='WT_GFP', y='phospho_frac_GFP_predict', 
                 log_scale=(True, False), ax=axes[i, 0], bins=32)
    
    axes[i, 0].set_ylabel("phospho_frac_GFP_predict", fontsize='small')
    
    axes[i, 0].set_title("{0:} {1:}".format(dataset, "theoretical GFP"), fontsize='small')
    axes[i, 0].set_ylim(0, 1.0)
    
    
    if model == 'push':
        params = 10**np.array(res.x)[param_dict[construct][1:]]
        
        ST_GFP_mean = df_data['ST_GFP'].mean()
        WT_GFP = np.logspace(0, 6, base=10)
        
        amp = pp.PushAmp()
        SpT_GFP = amp.predict_all(np.asfortranarray(np.c_[WT_GFP, ST_GFP_mean*np.ones_like(WT_GFP)]), params)
        
       
        axes[i, 0].plot(WT_GFP, SpT_GFP/ST_GFP_mean, 'k--')
    
    
    

plt.show()

In [None]:
fig, axes = plt.subplots(len(df_info.index), 2, figsize=(8, 4*len(df_info.index)),
                        sharex=True, sharey=True, squeeze=False)

for i, row in df_info.iterrows():
    
    dataset = row['dataset']
    model = row['model']
    construct = row['construct']
    
    df_data = df.query("dataset=='{}'".format(dataset))
    
    sns.histplot(df_data, x='WT_anti', y='phospho_frac_anti', 
                 log_scale=(True, True), ax=axes[i, 0])
    sns.histplot(df_data, x='WT_anti', y='phospho_frac_anti_predict', 
                 log_scale=(True, True), ax=axes[i, 1])
    
    axes[i, 0].hlines(1e0, xmin=1e0, xmax=1e5, color='k', linestyle='--')
    axes[i, 1].hlines(1e0, xmin=1e0, xmax=1e5, color='k', linestyle='--')
    axes[i, 0].set_title("{0:} {1:}".format(dataset, "exp"))
    axes[i, 1].set_title("{0:} {1:}".format(dataset, "theory"))
    
    axes[i, 0].set_ylim(1e-3, 1e2)
    axes[i, 1].set_ylim(1e-3, 1e2)
        
    if model == 'push':
#         params = 10**np.array(res.x)[param_dict[construct]]
        noise_params = np.array(res.x)[param_dict[construct][0:1]]
        model_params = 10**np.array(res.x)[param_dict[construct][1:]]
        
        ST_GFP_mean = df_data['ST_GFP'].mean()
        WT_GFP = np.logspace(0, 6, base=10)
        
        amp = pp.PushAmp()
        SpT_GFP = amp.predict_all(np.asfortranarray(np.c_[WT_GFP, ST_GFP_mean*np.ones_like(WT_GFP)]), model_params)

        WT_anti = writer_noise.GFP_to_anti_max(WT_GFP)        
        ST_anti = substrate_noise.GFP_to_anti_max(ST_GFP_mean*np.ones_like(WT_GFP))
        SpT_anti = phospho_noise.GFP_to_anti_max(SpT_GFP)
        
        axes[i, 0].plot(WT_anti, SpT_anti/ST_anti, 'k-')
        axes[i, 1].plot(WT_anti, SpT_anti/ST_anti, 'k-')

    

plt.show()