In [None]:
%load_ext autoreload
%autoreload 2

from IPython.display import display


import numpy as np
import scipy as sp
import pandas as pd
import numpy.random as rand
import numpy.linalg as la
import numpy.ma as ma
import scipy.optimize as opt
import scipy.stats as stats

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import time

import push_pull_amp as ppamp

sns.set(context='talk', font_scale=1.0, color_codes=True, palette='deep', style='ticks', 
        rc={'mathtext.fontset': 'cm', 'xtick.direction': 'in','ytick.direction': 'in',
            'axes.linewidth': 1.5, 'figure.dpi':100, 'text.usetex':False})

# Synthetic Data

## Generate Data

In [None]:
vWS, kWSp, kSp, kSu = 2.0, 0.5, 1e-2, 2e-2

vWSp = kWSp/kSu
vSp = kSp/kSu

print(vWS, vWSp, vSp)

n_samples = 10000

WT = 10**np.random.normal(0.1, 0.7, size=n_samples)
ST = 10**np.random.normal(1.0, 0.1, size=n_samples)

push = ppamp.Push()
push.set_data(WT, ST, np.array([]))
 
SpT_true = push.predict_all(np.array([vWS, vWSp, vSp]))

mu_GFP = 4.0
sigma2_GFP = 0.4
mu_anti = 2.5
sigma2_anti = 0.3
sigma_GFP_anti = 0.2

A = sigma_GFP_anti / sigma2_GFP
B = mu_anti - A*mu_GFP
Sigma2 = (sigma2_anti*sigma2_GFP - sigma_GFP_anti**2) / sigma2_GFP

print(Sigma2, A, B)
    
SpT_noise = 10**np.random.normal(A*np.log10(SpT_true)+B, np.sqrt(Sigma2))

df_syn = pd.DataFrame(np.c_[WT, ST, SpT_true, SpT_noise], columns=['WT', 'ST', 'SpT_true', 'SpT_noise'])

df_syn['SpT_true/ST'] = df_syn['SpT_true'] / df_syn['ST']
df_syn['SpT_noise/ST'] = df_syn['SpT_noise'] / df_syn['ST']

display(df_syn)

print(np.cov(np.log10(df_syn[['WT', 'SpT_noise/ST']].values.T)))


In [None]:
ax = sns.histplot(df_syn, x='SpT_true', y='SpT_noise', log_scale=(True, True))

t = np.linspace(np.log10(df_syn['SpT_true'].min()), np.log10(df_syn['SpT_true'].max()))
ax.plot(10**t, 10**(A*t+B), 'k--')

plt.show()


sns.histplot(df_syn, x='WT', y='SpT_true/ST', log_scale=(True, True))

plt.show()


sns.histplot(df_syn, x='WT', y='SpT_noise/ST', log_scale=(True, True))

plt.show()

## Fit Model

In [None]:
# def solve(push, verbose=False):

#     if verbose:
#         start = time.time()

   

#     def cpp_loss(x, args):
        
# #         print(x)

#         (vWS, vWSp, vSp, Sigma2, A, B) = x
#         (push) = args

#         loss = push.loss(np.array([vWS, vWSp, vSp]), np.array([Sigma2, A, B]))

#         return loss

#     x0 = (1.0, 1.0, 1.0, 1.0, 1.0, 1.0)
#     # x0 = (1.0, 1.0, 1e-3)
#     bounds = [(1e-6, None), (1e-6, None), (0.0, None), (1e-6, None), (0.0, None), (None, None)]

#     if verbose:
#         print("Initial Loss:", cpp_loss(x0, (push)))


#     res = opt.minimize(cpp_loss, x0, args=(push,), method='L-BFGS-B', jac='2-point', bounds=bounds, options={'iprint':101, 'eps': 1e-8, 'gtol': 1e-8, 'ftol':1e-8})

#     params = res.x[:3]
#     noise_params = res.x[3:]

#     if verbose:
#         print("Final Loss:", res.fun)

#         end = time.time()

#         print("Time Elapsed", end-start, "seconds")

#         print(res)
    
#     return noise_params, params

def solve(push, verbose=False):

    if verbose:
        start = time.time()

   

    def cpp_loss(x, args):
        
#         print(x)

        (vWS, vWSp, vSp, Sigma2, A, B) = x
        (push) = args

        loss, grad = push.loss_grad(np.array([vWS, vWSp, vSp]), np.array([Sigma2, A, B]))

        return loss, grad

    x0 = (1.0, 1.0, 1.0, 1.0, 1.0, 1.0)
    # x0 = (1.0, 1.0, 1e-3)
    bounds = [(1e-6, None), (1e-6, None), (0.0, None), (1e-6, None), (0.0, None), (None, None)]

    if verbose:
        print("Initial Loss:", cpp_loss(x0, (push)))


    res = opt.minimize(cpp_loss, x0, args=(push,), method='L-BFGS-B', jac=True, bounds=bounds, options={'iprint':101, 'eps': 1e-8, 'gtol': 1e-8, 'ftol':1e-8})

    params = res.x[:3]
    noise_params = res.x[3:]

    if verbose:
        print("Final Loss:", res.fun)

        end = time.time()

        print("Time Elapsed", end-start, "seconds")

        print(res)
    
    return noise_params, params


# noise_params = np.array([sigma2, a, b])

push = ppamp.Push()

push.set_data(df_syn['WT'].values.copy(), df_syn['ST'].values.copy(), df_syn['SpT_noise'].values.copy())
# push.set_noise_params(noise_params)

noise_params, params = solve(push, verbose=True)

print("True values:",vWS, vWSp, vSp, Sigma2, A, B)

In [None]:
df_syn['SpT_predict'] = push.predict_all(params)
df_syn['SpT_predict/ST'] = df_syn['SpT_predict'] / df_syn['ST']


ax = sns.histplot(df_syn, x='SpT_true', log_scale=True, element="step", fill=False)
sns.histplot(df_syn, x='SpT_predict', log_scale=True, ax=ax, color='g', element="step", fill=False)
plt.show()



fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)

sns.histplot(df_syn, x='WT', y='SpT_true/ST', log_scale=(True, False), ax=ax1)
sns.histplot(df_syn, x='WT', y='SpT_predict/ST', log_scale=(True, False), color='g', ax=ax2)

ax1.set_ylim(0, 1.0)

ax1.set_title("Denoised Experimental Data")
ax2.set_title("Theoretical Prediction")

plt.show()



In [None]:
rand.seed(0)

(Sigma2, A, B) = noise_params

df_syn['SpT_predict_noise'] = 10**np.random.normal(A*np.log10(df_syn['SpT_predict'])+B, np.sqrt(Sigma2))

df_syn['SpT_predict_noise/ST'] = df_syn['SpT_predict_noise'] / df_syn['ST']


ax = sns.histplot(df_syn, x='SpT_noise', log_scale=True, element="step", fill=False, bins=100, label="Exp")
sns.histplot(df_syn, x='SpT_predict_noise', log_scale=True, ax=ax, color='g', element="step", fill=False, bins=100, label="Predict")

ax.legend()
plt.show()


ax = sns.histplot(df_syn, x='SpT_noise/ST', log_scale=True, element="step", fill=False, bins=100, label="Exp")
sns.histplot(df_syn, x='SpT_predict_noise/ST', log_scale=True, ax=ax, color='g', element="step", fill=False, bins=100, label="Predict")

ax.legend()
plt.show()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)

sns.histplot(df_syn, x='WT', y='SpT_noise/ST', log_scale=(True, True), ax=ax1)
sns.histplot(df_syn, x='WT', y='SpT_predict_noise/ST', log_scale=(True, True), color='g', ax=ax2)

ax1.set_title("Experimental Data")
ax2.set_title("Theoretical Prediction")


plt.show()





# Real Data

## Load Data

In [None]:
# df = pd.read_csv("../data/push/RR.csv")
# df = pd.read_csv("../data/push/EE(E).csv")
# df = pd.read_csv("../data/push/EE(I).csv")
df = pd.read_csv("../data/push/EE(L).csv")
# df = pd.read_csv("../data/push/EE(S).csv")




df = df[(df[df.columns] >= 0).all(axis=1)].rename(columns={'Kinase': 'WT_anti', 'Substrate': 'ST_anti', 'Phosphorylation': 'SpT_anti'})


df_sample = df.sample(frac=1.0, random_state=776)

df_sample['SpT_anti/ST_anti'] = df_sample['SpT_anti'] / df_sample['ST_anti']


print(len(df_sample.index), "/", len(df.index))

display(df_sample)

print(np.cov(np.log10(df_sample[['WT_anti', 'SpT_anti/ST_anti']].values.T)))

fig, ax = plt.subplots(1, 1, figsize=(4, 4))

sns.histplot(df_sample, x='WT_anti', y='SpT_anti/ST_anti', log_scale=(True, True), ax=ax)

plt.show()


fig, ax = plt.subplots(1, 1, figsize=(4, 4))

ax = sns.histplot(df_sample, x='ST_anti', y='SpT_anti', log_scale=(True, True), ax=ax)
t = np.logspace(1, 4, base=10)
ax.plot(t, t, 'k--')

plt.show()



## Resample from Noise Models

In [None]:
nbins_anti = 100
nbins_gfp = 100

# Writer

df_writer = pd.read_csv("../data/Kinase Noise.csv")
df_writer = df_writer[(df_writer[df_writer.columns] >= 0).all(axis=1)]

display(df_writer)

ax = sns.histplot(df_writer, x='GFP - Area', y='Flag Antibody', bins=(nbins_gfp, nbins_anti), 
             log_scale=(True, True), cbar=True)
t = np.logspace(0, 6, base=10)
ax.plot(t, 0.02*t+100, 'k--')

plt.show()

hist_writer, xedges_writer, yedges_writer = np.histogram2d(np.log10(df_writer['Flag Antibody']), np.log10(df_writer['GFP - Area']), bins=(nbins_anti, nbins_gfp))

# print(hist_writer)

df_sample['WT_anti_bin'] = pd.cut(df_sample['WT_anti'], bins=10**xedges_writer, labels=False)

# display(df_writer)

# Substrate

df_substrate = pd.read_csv("../data/Substrate Noise.csv")
df_substrate = df_substrate[(df_substrate[df_substrate.columns] >= 0).all(axis=1)]

display(df_substrate)

ax = sns.histplot(df_substrate, x='GFP - Area', y='Myc Antibody', bins=(nbins_gfp, nbins_anti), 
             log_scale=(True, True), cbar=True)
t = np.logspace(0, 6, base=10)
ax.plot(t, 0.1*t, 'k--')
plt.show()

hist_substrate, xedges_substrate, yedges_substrate = np.histogram2d(np.log10(df_substrate['Myc Antibody']), np.log10(df_substrate['GFP - Area']), bins=(nbins_anti, nbins_gfp))

# print(hist_writer)

df_sample['ST_anti_bin'] = pd.cut(df_sample['ST_anti'], bins=10**xedges_substrate, labels=False)


In [None]:
df_resample = pd.concat([df_sample for i in range(10)]).reset_index(drop=True)

display(df_resample)

df_resample['WT_GFP'] = -1

for WT_anti_bin, group in df_resample.groupby(['WT_anti_bin']):
#     print(WT_anti_bin)
    
    norm = np.sum(hist_writer[WT_anti_bin])
    if norm > 0.0:
        p = hist_writer[WT_anti_bin] / norm
    else:
        continue
        
    samples = rand.choice(10**((yedges_writer[:nbins_gfp]+yedges_writer[1:nbins_gfp+1])/2.0), size=len(group.index), p=p)
    # choice the bins numbers, then 
    
    df_resample.loc[group.index, 'WT_GFP'] = samples

df_resample['ST_GFP'] = -1
    
for ST_anti_bin, group in df_resample.groupby(['ST_anti_bin']):
#     print(ST_anti_bin)
    
    
    norm = np.sum(hist_substrate[ST_anti_bin])
    
    if norm > 0.0:
        p = hist_substrate[ST_anti_bin] / norm
    else:
        continue
    
    samples = rand.choice(10**((yedges_substrate[:nbins_gfp]+yedges_substrate[+1:nbins_gfp+1])/2.0), size=len(group.index), p=p)

    df_resample.loc[group.index, 'ST_GFP'] = samples

    
df_resample = df_resample[(df_resample['WT_GFP'] > 0) & (df_resample['ST_GFP'] > 0)] 


sns.histplot(df_resample, x='WT_GFP', y='WT_anti', bins=(yedges_writer, xedges_writer), 
             log_scale=(True, True), cbar=True)

plt.show()

sns.histplot(df_resample, x='ST_GFP', y='ST_anti', bins=(yedges_substrate, xedges_substrate), 
             log_scale=(True, True), cbar=True)


plt.show()


In [None]:
sns.histplot(df_resample, x='WT_anti', log_scale=True, label='WT_anti', bins=40)


ax = sns.histplot(df_resample, x='WT_GFP', log_scale=True, color='g', label='WT_GFP', bins=40)
ax.legend()
plt.show()

sns.histplot(df_resample, x='ST_anti', log_scale=True, label='ST_anti', bins=40)


ax = sns.histplot(df_resample, x='ST_GFP', log_scale=True, color='g', label='ST_GFP', bins=40)
ax.legend()
plt.show()

In [None]:
def solve(push, verbose=False):

    if verbose:
        start = time.time()

   

    def cpp_loss(x, args):
        
#         print(x)

        (vWS, vWSp, vSp, Sigma2, A, B) = x
        (push) = args

        loss, grad = push.loss_grad(np.array([vWS, vWSp, vSp]), np.array([Sigma2, A, B]))

        return loss, grad

    x0 = (1.0, 10.0, 0.0, 0.5, 1.0, 0.0)
    # x0 = (1.0, 1.0, 1e-3)
    bounds = [(1e-6, None), (1e-6, None), (0.0, None), (1e-6, 0.2), (0.0, None), (None, -1.0)]

    if verbose:
        print("Initial Loss:", cpp_loss(x0, (push)))


    res = opt.minimize(cpp_loss, x0, args=(push,), method='L-BFGS-B', jac=True, bounds=bounds, options={'iprint':101, 'eps': 1e-8, 'gtol': 1e-8, 'ftol':1e-8})

    params = res.x[:3]
    noise_params = res.x[3:]

    if verbose:
        print("Final Loss:", res.fun)

        end = time.time()

        print("Time Elapsed", end-start, "seconds")

        print(res)
    
    return noise_params, params


push = ppamp.Push()
push.set_data(df_resample['WT_GFP'].values.copy(), df_resample['ST_GFP'].values.copy(), df_resample['SpT_anti'].values.copy())

noise_params, params = solve(push, verbose=True)



In [None]:
# params = np.array([1.0, 100.0, 0.00000000e+00, 2.81397600e-01, 0.7, 0.0])
# noise_params = params[3:]

df_resample['SpT_GFP_predict'] = push.predict_all(params)
df_resample['SpT_GFP_predict/ST_GFP'] = df_resample['SpT_GFP_predict'] / df_resample['ST_GFP']

rand.seed(0)

(Sigma2, A, B) = noise_params

df_resample['SpT_anti_predict'] = 10**np.random.normal(A*np.log10(df_resample['SpT_GFP_predict'])+B, np.sqrt(Sigma2))

df_resample['SpT_anti_predict/ST_anti'] = df_resample['SpT_anti_predict'] / df_resample['ST_anti']


ax = sns.histplot(df_resample, x='SpT_GFP_predict', log_scale=True, label="GFP", bins=40)

sns.histplot(df_resample, x='SpT_anti_predict', log_scale=True, color='g', label="anti", bins=40)


ax.legend()
plt.show()

fig, ax = plt.subplots(1, 1, figsize=(4, 4))

sns.histplot(df_resample, x='WT_GFP', y='SpT_GFP_predict/ST_GFP', log_scale=(True, False), color='g', ax=ax, bins=40)

# ax.set_ylim(0, 1.0)

ax.set_title("Denoised Experimental Data")

plt.show()



In [None]:



ax = sns.histplot(df_resample, x='SpT_anti', log_scale=True, element="step", fill=False, bins=100, label="Exp")
sns.histplot(df_resample, x='SpT_anti_predict', log_scale=True, ax=ax, color='g', element="step", fill=False, bins=100, label="Predict")

ax.legend()
plt.show()


ax = sns.histplot(df_resample, x='SpT_anti/ST_anti', log_scale=True, element="step", fill=False, bins=100, label="Exp")
sns.histplot(df_resample, x='SpT_anti_predict/ST_anti', log_scale=True, ax=ax, color='g', element="step", fill=False, bins=100, label="Predict")

ax.legend()
plt.show()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)

sns.histplot(df_resample, x='WT_anti', y='SpT_anti/ST_anti', log_scale=(True, True), ax=ax1)
sns.histplot(df_resample, x='WT_anti', y='SpT_anti_predict/ST_anti', log_scale=(True, True), color='g', ax=ax2)

ax1.hlines(1.0, xmin=1e1, xmax=1e4, color='k', linestyle='--')

ax1.set_title("Experimental Data")
ax2.set_title("Theoretical Prediction")


plt.show()



