In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('/Users/zhenyamordan/PyCharmProjects/Kinome-Regularization/')

In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
from scipy.optimize import curve_fit
import plotly.graph_objects as go
import subprocess

## Load data

In [4]:
!ls ./fof/data/data_06_10_23

KIR3KinaseCategories.xlsx         good_doses_mask.csv
drug_phenotype_data.csv           kinase_data.csv
final_mask_1_point_on_plateau.csv y_plateau_preprocessed.csv


In [5]:
y = pd.read_csv('./fof/data/data_06_10_23/drug_phenotype_data.csv')

In [6]:
X = pd.read_csv('./fof/data/data_06_10_23/kinase_data.csv')

In [7]:
data = pd.merge(y, X, left_index=True, right_index=True)

In [8]:
data

Unnamed: 0,drugNames,Raw Y (closest to 70),Raw Y (interpolation to 70),dose (label),dose (uM),drugShortNames,realNames,AKT1,AKT2,AKT3,...,TESK2,TNIK,TNK1,TRKB,TRKC,TXK,TYK1_LTK,TYRO3_SKY,ULK1,YES_YES1
0,NCGC00344999_A1,27.000000,27.654545,3,1.11,A1,NCGC00344999,0.825562,0.958012,0.929195,...,0.854304,0.027721,0.302221,0.000411,0.000200,0.025107,0.343913,0.228633,0.273383,0.001528
1,NCGC00344999_A1,50.433333,52.624242,4,0.37,A1,NCGC00344999,0.877345,0.984761,0.979264,...,0.952670,0.068760,0.577477,0.000973,0.000201,0.064854,0.566269,0.428022,0.534281,0.004563
2,NCGC00344999_A1,76.533333,78.551515,5,0.12,A1,NCGC00344999,0.915214,0.994564,0.994147,...,0.985187,0.160831,0.811382,0.003018,0.000210,0.157508,0.764517,0.653849,0.777644,0.014427
3,NCGC00344999_A1,73.566667,75.393939,6,0.04,A1,NCGC00344999,0.942042,0.998072,0.998365,...,0.994927,0.332354,0.930753,0.010412,0.000296,0.335096,0.889445,0.826574,0.914221,0.045557
4,NCGC00344999_A1,75.866667,77.103030,7,0.01,A1,NCGC00344999,0.960575,0.999315,0.999543,...,0.997752,0.563155,0.976166,0.036337,0.001109,0.575069,0.951738,0.922977,0.970018,0.134958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
402,NCGC00346549_H7,20.460000,19.076551,1,10.00,H7,AT-7867,0.004845,0.043636,0.005906,...,0.919266,0.568308,0.144326,0.481946,0.159750,0.978787,0.627206,0.565679,1.000000,0.749658
403,NCGC00346549_H7,40.060000,38.445977,2,3.30,H7,AT-7867,0.013398,0.107004,0.016757,...,0.952849,0.789877,0.344824,0.688925,0.386833,0.999763,0.734279,0.813559,1.000000,0.909249
404,NCGC00346549_H7,96.820000,93.884014,5,0.12,H7,AT-7867,0.239343,0.684571,0.298176,...,0.990581,0.987751,0.941170,0.966763,0.958432,1.000000,0.923971,0.992938,1.000000,0.996335
405,NCGC00346549_H7,90.820000,89.774728,6,0.04,H7,AT-7867,0.473748,0.850279,0.554458,...,0.994239,0.994974,0.980351,0.985151,0.987090,1.000000,0.951929,0.997117,1.000000,0.998138


In [9]:
mask = pd.read_csv('./fof/data/good_doses_mask.csv')

In [10]:
data = data[mask['if_used_in_final']].reset_index(drop=True)

In [11]:
data.shape

(347, 376)

## Fit sigmoid

In [12]:
def sigmoid(x, L ,x0, k, b):
    y = L / (1 + np.exp(-k*(x-x0))) + b
    return (y)

In [13]:
def plot_sigmoid(x, y, resolution, x_column, y_column, drug):
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=x, 
        y=y,
        name='Real data',
        mode='markers',
        marker_color='rgba(152, 0, 0, .8)'
    ))
    
    p0 = [max(y), np.median(x),1,min(y)]
    
    fig.update_layout(
        title=f"{x_column} vs {y_column} for {drug}",
        xaxis_title=x_column,
        yaxis_title=y_column
    )
    
    try:
        params, covariance = curve_fit(sigmoid, x, y, p0=p0, method='dogbox')
        delta = (x.max()-x.min())/resolution
        x_new = np.arange(x.min(), x.max() + delta / 2, delta)  # New x values for extrapolation
        y_new = sigmoid(x_new, *params)
        line = px.line(x=x_new, y=y_new)
        
        fig.add_trace(go.Scatter(
            x=x_new, 
            y=y_new,
            name='Fitted Sigmoid',
            mode='lines',
            marker_color='blue'
        ))
    except RuntimeError as e:
        print(f'Can not fit sigmoid')

    fig.show()
    
def plot_kinase_sigmoid(data, drug, x_column, y_columns, z_column, resolution = 100):
    x = data[data['drugNames'] == drug][x_column].to_numpy()
    z = data[data['drugNames'] == drug][z_column].to_numpy()
    for y_column in y_columns:
        y = data[data['drugNames'] == drug][y_column].to_numpy()
        plot_sigmoid(x, y, resolution, x_column, y_column, drug)
    plot_sigmoid(x, z, resolution, x_column, z_column, drug)
    
    

In [14]:
kinases = data.columns[7:].to_numpy()
print(kinases[:5])
drugs = data['drugNames'].unique()
print(drugs[:5])

['AKT1' 'AKT2' 'AKT3' 'ALK1_ACVRL1' 'ALK2_ACVR1']
['NCGC00344999_A1' 'NCGC00241102_E1' 'NCGC00249389_H1' 'NCGC00345784_C2'
 'NCGC00346673_G2']


In [18]:
i = 11
j = 12
drug = drugs[i] 
kinase_columns = kinases[[1,2,3]]
dose_column = 'dose (uM)'
response_column = 'Raw Y (interpolation to 70)'
plot_kinase_sigmoid(data, drug, dose_column, kinase_columns, response_column)

Can not fit sigmoid


Can not fit sigmoid


Can not fit sigmoid


## Run R

### Transform data

In [22]:
data = data[data['dose (uM)'] != 0.]

In [23]:
number_drug_dict = dict(zip(range(len(data)), data['drugNames'].unique()))
drug_number_dict = {v: k for k, v in number_drug_dict.items()}

In [24]:
number_kinase_dict = dict(zip(range(1, len(kinases)+1), kinases))
kinase_number_dict = {v: k for k, v in number_kinase_dict.items()}

In [25]:
y = data[response_column].to_numpy().reshape(-1,1)
X = data[kinases].values
t = data[dose_column].to_numpy().reshape(-1,1)
drug = data['drugNames'].apply(lambda x: drug_number_dict[x]).to_numpy().reshape(-1,1)

In [26]:
# X = (X - X.mean(axis=0)) / X.var(axis=0)

### Save

In [27]:
r_data = np.concatenate([drug, y, t, X], axis=1)
r_columns = ['subj', 'Y', 'time'] + [f'Cov_{i+1}' for i in range(X.shape[1])]

In [28]:
r_df = pd.DataFrame(data=r_data, columns=r_columns)
r_df

Unnamed: 0,subj,Y,time,Cov_1,Cov_2,Cov_3,Cov_4,Cov_5,Cov_6,Cov_7,...,Cov_360,Cov_361,Cov_362,Cov_363,Cov_364,Cov_365,Cov_366,Cov_367,Cov_368,Cov_369
0,0.0,27.654545,1.11,0.825562,0.958012,0.929195,0.873394,0.999992,0.978198,1.000000,...,0.854304,0.027721,0.302221,0.000411,0.000200,0.025107,0.343913,0.228633,0.273383,0.001528
1,0.0,52.624242,0.37,0.877345,0.984761,0.979264,0.959281,1.000000,0.991725,1.000000,...,0.952670,0.068760,0.577477,0.000973,0.000201,0.064854,0.566269,0.428022,0.534281,0.004563
2,0.0,78.551515,0.12,0.915214,0.994564,0.994147,0.987177,1.000000,0.996885,1.000000,...,0.985187,0.160831,0.811382,0.003018,0.000210,0.157508,0.764517,0.653849,0.777644,0.014427
3,0.0,75.393939,0.04,0.942042,0.998072,0.998365,0.995500,1.000000,0.998830,1.000000,...,0.994927,0.332354,0.930753,0.010412,0.000296,0.335096,0.889445,0.826574,0.914221,0.045557
4,0.0,77.103030,0.01,0.960575,0.999315,0.999543,0.997916,1.000000,0.999560,1.000000,...,0.997752,0.563155,0.976166,0.036337,0.001109,0.575069,0.951738,0.922977,0.970018,0.134958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,46.0,7.427293,1.11,0.960433,0.996855,0.928235,0.915098,1.000000,0.998901,0.998901,...,0.721582,0.002261,0.020236,0.348805,0.300735,0.001050,0.165440,0.479133,1.000000,0.001413
296,46.0,25.145200,0.37,0.975974,0.999325,0.951123,0.958103,1.000000,0.998901,0.998901,...,0.878859,0.007330,0.066630,0.618058,0.529689,0.006249,0.368352,0.722819,1.000000,0.005303
297,46.0,58.041145,0.12,0.985499,0.999855,0.966849,0.979502,1.000000,0.998901,0.998901,...,0.952637,0.024551,0.198193,0.830142,0.746503,0.038751,0.631744,0.880451,1.000000,0.020475
298,46.0,58.872451,0.04,0.991281,0.999969,0.977512,0.989781,1.000000,0.998901,0.998901,...,0.981940,0.079986,0.461122,0.936530,0.884694,0.207089,0.834582,0.953693,1.000000,0.076450


In [30]:
r_df.to_csv('./fof/data/r/prepared_data.csv', index=False)

### Run R code

r/run_algorithm.R -> data/r/result_coefficients.csv

### Analyse coefficients


In [22]:
target_kinases_dict = {
    264: 'FYN',
    255: 'EPHB2',
    281: 'LATS2',
    285: 'LYN',
    294: 'MINK_MINK1',
    361: 'YES_YES1',
}

In [23]:
c_df = pd.read_csv('./fof/data/r/result_coefficients.csv')

### Var

In [24]:
fig = px.histogram(x=c_df.var())
fig.show()

In [25]:
top_covariates = c_df.var().sort_values(ascending=False).index.tolist()
top_covariates

['V321',
 'V28',
 'V254',
 'V262',
 'V196',
 'V277',
 'V133',
 'V129',
 'V343',
 'V26',
 'V141',
 'V304',
 'V51',
 'V170',
 'V320',
 'V185',
 'V238',
 'V250',
 'V177',
 'V168',
 'V131',
 'V53',
 'V96',
 'V180',
 'V40',
 'V16',
 'V302',
 'V43',
 'V362',
 'V317',
 'V255',
 'V81',
 'V76',
 'V228',
 'V48',
 'V299',
 'V154',
 'V292',
 'V98',
 'V59',
 'V83',
 'V218',
 'V91',
 'V193',
 'V294',
 'V140',
 'V205',
 'V71',
 'V52',
 'V108',
 'V15',
 'V233',
 'V333',
 'V84',
 'V239',
 'V72',
 'V165',
 'V137',
 'V369',
 'V93',
 'V221',
 'V179',
 'V152',
 'V194',
 'V215',
 'V11',
 'V203',
 'V34',
 'V41',
 'V138',
 'V284',
 'V243',
 'V30',
 'V251',
 'V316',
 'V282',
 'V112',
 'V227',
 'V319',
 'V278',
 'V56',
 'V103',
 'V50',
 'V269',
 'V130',
 'V260',
 'V202',
 'V252',
 'V341',
 'V256',
 'V100',
 'V10',
 'V191',
 'V184',
 'V367',
 'V62',
 'V310',
 'V229',
 'V115',
 'V305',
 'V125',
 'V57',
 'V274',
 'V353',
 'V296',
 'V17',
 'V136',
 'V2',
 'V249',
 'V327',
 'V234',
 'V245',
 'V169',
 'V173',
 'V271'

In [26]:
top_covariates_indexes = [i[1:] for i in top_covariates[:10]]
top_covariates_indexes

['321', '28', '254', '262', '196', '277', '133', '129', '343', '26']

In [27]:
top_kinases = [number_kinase_dict[int(i)] for i in top_covariates_indexes]

In [28]:
top_kinases

['PIM3',
 'CDC7_DBF4',
 'EPHB4',
 'FMS',
 'WNK3',
 'IRAK4',
 'NEK8',
 'NEK3',
 'RIPK4',
 'CAMK4']

### Max T

In [29]:
c_df

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V360,V361,V362,V363,V364,V365,V366,V367,V368,V369
0,-2.678863,-0.883362,0.225737,-2.652871,-1.186284,3.450929,-0.717789,-0.024731,0.883328,4.761487,...,-3.660269,-0.933872,-3.19788,0.387937,2.6759,0.164686,2.03239,-2.793127,-0.322112,-4.517528
1,-5.31201,-4.360014,-3.991687,3.085771,-5.803061,3.765544,2.433703,-1.370032,-0.67051,3.016013,...,-1.555093,-0.065157,6.405555,3.462205,2.661568,2.204485,1.995197,5.034056,2.562234,-6.162925
2,-0.80159,1.316221,1.045942,1.428443,0.941715,0.697388,-2.097859,-1.176334,0.100263,-2.07102,...,-2.273897,-4.417024,-4.566388,-1.420682,-2.098362,4.383374,0.847301,0.088088,-1.187041,0.865133
3,0.36751,2.679381,0.132478,-1.961813,-1.24199,0.464821,-1.484845,-0.611118,-0.553962,5.025526,...,-0.089216,-4.932624,0.792862,0.947849,1.165891,-0.625673,-0.141959,-0.848033,-2.170994,1.508003
4,0.559021,1.133619,0.053923,-0.592566,-0.154671,-1.080477,0.149677,0.16206,-0.035643,1.25716,...,0.887789,-0.844424,0.375952,0.444107,0.522316,0.333128,-0.135998,-0.282442,-0.570537,-0.278346


In [30]:
top_covariates = c_df.max().sort_values(ascending=False).index
top_covariates_indexes = [i[1:] for i in top_covariates[:10]]
top_covariates_indexes

['196', '133', '185', '28', '131', '254', '317', '343', '320', '250']

In [31]:
top_kinases = [number_kinase_dict[int(i)] for i in top_covariates_indexes]

In [32]:
top_kinases

['WNK3',
 'NEK8',
 'TSSK3_STK22C',
 'CDC7_DBF4',
 'NEK6',
 'EPHB4',
 'PHKG1',
 'RIPK4',
 'PIM2',
 'EPHA8']

### Analyse Beta

In [33]:
b_df = pd.read_csv('./fof/data/r/result_beta.csv')

In [34]:
fig = px.line(b_df, x='time', y='Cov_1')
fig.show()

### Max T

In [35]:
b_df_no_time = b_df.drop(columns=['time'])

In [36]:
top_covariates = b_df_no_time.abs().max().sort_values(ascending=False).index
top_covariates_indexes = [i[4:] for i in top_covariates[:10]]
top_covariates_indexes

['321', '262', '196', '133', '185', '129', '81', '28', '254', '131']

In [37]:
top_kinases = [number_kinase_dict[int(i)] for i in top_covariates_indexes]

In [38]:
top_kinases

['PIM3',
 'FMS',
 'WNK3',
 'NEK8',
 'TSSK3_STK22C',
 'NEK3',
 'HIPK4',
 'CDC7_DBF4',
 'EPHB4',
 'NEK6']

### Var

In [39]:
fig = px.histogram(x=b_df_no_time.var())
fig.show()

In [40]:
top_covariates = b_df_no_time.var().sort_values(ascending=False).index.tolist()
top_covariates

['Cov_321',
 'Cov_254',
 'Cov_262',
 'Cov_304',
 'Cov_28',
 'Cov_51',
 'Cov_238',
 'Cov_196',
 'Cov_141',
 'Cov_277',
 'Cov_250',
 'Cov_168',
 'Cov_228',
 'Cov_255',
 'Cov_133',
 'Cov_317',
 'Cov_320',
 'Cov_185',
 'Cov_302',
 'Cov_96',
 'Cov_193',
 'Cov_91',
 'Cov_52',
 'Cov_15',
 'Cov_59',
 'Cov_76',
 'Cov_294',
 'Cov_72',
 'Cov_343',
 'Cov_170',
 'Cov_299',
 'Cov_129',
 'Cov_239',
 'Cov_165',
 'Cov_71',
 'Cov_205',
 'Cov_83',
 'Cov_362',
 'Cov_131',
 'Cov_177',
 'Cov_221',
 'Cov_292',
 'Cov_40',
 'Cov_137',
 'Cov_333',
 'Cov_43',
 'Cov_180',
 'Cov_81',
 'Cov_26',
 'Cov_34',
 'Cov_152',
 'Cov_98',
 'Cov_203',
 'Cov_218',
 'Cov_251',
 'Cov_84',
 'Cov_243',
 'Cov_284',
 'Cov_30',
 'Cov_252',
 'Cov_367',
 'Cov_191',
 'Cov_215',
 'Cov_227',
 'Cov_179',
 'Cov_271',
 'Cov_118',
 'Cov_316',
 'Cov_256',
 'Cov_108',
 'Cov_53',
 'Cov_229',
 'Cov_296',
 'Cov_93',
 'Cov_353',
 'Cov_138',
 'Cov_154',
 'Cov_57',
 'Cov_27',
 'Cov_103',
 'Cov_245',
 'Cov_173',
 'Cov_130',
 'Cov_202',
 'Cov_327',
 'C

In [41]:
top_covariates_indexes = [i[4:] for i in top_covariates[:10]]
top_covariates_indexes

['321', '254', '262', '304', '28', '51', '238', '196', '141', '277']

In [42]:
top_kinases = [number_kinase_dict[int(i)] for i in top_covariates_indexes]

In [43]:
top_kinases

['PIM3',
 'EPHB4',
 'FMS',
 'P38D_MAPK13',
 'CDC7_DBF4',
 'DAPK2',
 'DCAMKL2',
 'WNK3',
 'PDGFRA',
 'IRAK4']

In [44]:
for k, v in target_kinases_dict.items():
    print(f'{v} - {top_covariates.index(f"Cov_{k}")}')

FYN - 206
EPHB2 - 13
LATS2 - 223
LYN - 144
MINK_MINK1 - 26
YES_YES1 - 274


### Error

In [45]:
top_covariates = np.sqrt(b_df_no_time.pow(2).sum(axis=0)).sort_values(ascending=False).index.tolist()
top_covariates

['Cov_321',
 'Cov_185',
 'Cov_262',
 'Cov_133',
 'Cov_196',
 'Cov_131',
 'Cov_81',
 'Cov_254',
 'Cov_129',
 'Cov_28',
 'Cov_302',
 'Cov_317',
 'Cov_343',
 'Cov_294',
 'Cov_228',
 'Cov_277',
 'Cov_96',
 'Cov_40',
 'Cov_320',
 'Cov_205',
 'Cov_305',
 'Cov_141',
 'Cov_170',
 'Cov_180',
 'Cov_304',
 'Cov_282',
 'Cov_238',
 'Cov_353',
 'Cov_51',
 'Cov_43',
 'Cov_369',
 'Cov_193',
 'Cov_76',
 'Cov_11',
 'Cov_179',
 'Cov_199',
 'Cov_333',
 'Cov_327',
 'Cov_284',
 'Cov_250',
 'Cov_136',
 'Cov_125',
 'Cov_91',
 'Cov_168',
 'Cov_15',
 'Cov_108',
 'Cov_26',
 'Cov_355',
 'Cov_299',
 'Cov_255',
 'Cov_98',
 'Cov_341',
 'Cov_247',
 'Cov_83',
 'Cov_251',
 'Cov_187',
 'Cov_32',
 'Cov_56',
 'Cov_137',
 'Cov_248',
 'Cov_1',
 'Cov_50',
 'Cov_52',
 'Cov_316',
 'Cov_256',
 'Cov_192',
 'Cov_310',
 'Cov_34',
 'Cov_5',
 'Cov_234',
 'Cov_260',
 'Cov_362',
 'Cov_339',
 'Cov_72',
 'Cov_71',
 'Cov_165',
 'Cov_90',
 'Cov_292',
 'Cov_124',
 'Cov_59',
 'Cov_319',
 'Cov_49',
 'Cov_53',
 'Cov_30',
 'Cov_93',
 'Cov_84',

In [46]:
top_covariates_indexes = [i[4:] for i in top_covariates[:10]]
top_covariates_indexes

['321', '185', '262', '133', '196', '131', '81', '254', '129', '28']

In [47]:
top_kinases = [number_kinase_dict[int(i)] for i in top_covariates_indexes]
top_kinases

['PIM3',
 'TSSK3_STK22C',
 'FMS',
 'NEK8',
 'WNK3',
 'NEK6',
 'HIPK4',
 'EPHB4',
 'NEK3',
 'CDC7_DBF4']

In [54]:
for k, v in target_kinases_dict.items():
    print(f'{v} (Cov_{k}) - {top_covariates.index(f"Cov_{k}")}')

FYN (Cov_264) - 173
EPHB2 (Cov_255) - 49
LATS2 (Cov_281) - 167
LYN (Cov_285) - 152
MINK_MINK1 (Cov_294) - 13
YES_YES1 (Cov_361) - 281


### targers

In [49]:
kinase_number_dict['YES_YES1']

369

In [50]:
data

Unnamed: 0,drugNames,Raw Y (closest to 70),Raw Y (interpolation to 70),dose (label),dose (uM),drugShortNames,realNames,AKT1,AKT2,AKT3,...,TESK2,TNIK,TNK1,TRKB,TRKC,TXK,TYK1_LTK,TYRO3_SKY,ULK1,YES_YES1
0,NCGC00344999_A1,27.000000,27.654545,3,1.11,A1,NCGC00344999,0.825562,0.958012,0.929195,...,0.854304,0.027721,0.302221,0.000411,0.000200,0.025107,0.343913,0.228633,0.273383,0.001528
1,NCGC00344999_A1,50.433333,52.624242,4,0.37,A1,NCGC00344999,0.877345,0.984761,0.979264,...,0.952670,0.068760,0.577477,0.000973,0.000201,0.064854,0.566269,0.428022,0.534281,0.004563
2,NCGC00344999_A1,76.533333,78.551515,5,0.12,A1,NCGC00344999,0.915214,0.994564,0.994147,...,0.985187,0.160831,0.811382,0.003018,0.000210,0.157508,0.764517,0.653849,0.777644,0.014427
3,NCGC00344999_A1,73.566667,75.393939,6,0.04,A1,NCGC00344999,0.942042,0.998072,0.998365,...,0.994927,0.332354,0.930753,0.010412,0.000296,0.335096,0.889445,0.826574,0.914221,0.045557
4,NCGC00344999_A1,75.866667,77.103030,7,0.01,A1,NCGC00344999,0.960575,0.999315,0.999543,...,0.997752,0.563155,0.976166,0.036337,0.001109,0.575069,0.951738,0.922977,0.970018,0.134958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
402,NCGC00346549_H7,20.460000,19.076551,1,10.00,H7,AT-7867,0.004845,0.043636,0.005906,...,0.919266,0.568308,0.144326,0.481946,0.159750,0.978787,0.627206,0.565679,1.000000,0.749658
403,NCGC00346549_H7,40.060000,38.445977,2,3.30,H7,AT-7867,0.013398,0.107004,0.016757,...,0.952849,0.789877,0.344824,0.688925,0.386833,0.999763,0.734279,0.813559,1.000000,0.909249
404,NCGC00346549_H7,96.820000,93.884014,5,0.12,H7,AT-7867,0.239343,0.684571,0.298176,...,0.990581,0.987751,0.941170,0.966763,0.958432,1.000000,0.923971,0.992938,1.000000,0.996335
405,NCGC00346549_H7,90.820000,89.774728,6,0.04,H7,AT-7867,0.473748,0.850279,0.554458,...,0.994239,0.994974,0.980351,0.985151,0.987090,1.000000,0.951929,0.997117,1.000000,0.998138


In [51]:
b_df

Unnamed: 0,time,Cov_1,Cov_2,Cov_3,Cov_4,Cov_5,Cov_6,Cov_7,Cov_8,Cov_9,...,Cov_360,Cov_361,Cov_362,Cov_363,Cov_364,Cov_365,Cov_366,Cov_367,Cov_368,Cov_369
0,0.0,-2.678863,-0.883362,0.225737,-2.652871,-1.186284,3.450929,-0.717789,-0.024731,0.883328,...,-3.660269,-0.933872,-3.19788,0.387937,2.6759,0.164686,2.03239,-2.793127,-0.322112,-4.517528
1,0.01,-3.28272,-1.680727,-0.741973,-1.334847,-2.245328,3.522465,0.005151,-0.333761,0.526528,...,-3.176789,-0.735346,-0.9942,1.09305,2.671451,0.63385,2.023566,-0.99607,0.339642,-4.893842
2,0.04,-4.515799,-3.309736,-2.7242,1.379531,-4.411193,3.661431,1.483843,-0.970734,-0.207381,...,-2.181389,-0.338007,3.521169,2.533988,2.648933,1.607809,2.002136,2.697327,1.694297,-5.650445
3,0.12,-5.204241,-4.224405,-3.871641,3.045746,-5.642472,3.692317,2.325687,-1.36533,-0.652217,...,-1.571926,-0.16905,6.14458,3.34606,2.548485,2.255744,1.96767,4.91593,2.47265,-5.995171
4,0.37,-4.875195,-3.810635,-3.510226,2.916741,-5.161197,3.470008,2.000031,-1.349774,-0.598322,...,-1.617844,-0.48478,5.366653,2.999805,2.213568,2.399423,1.882032,4.556071,2.199126,-5.485724
5,1.11,-3.972488,-2.679427,-2.564981,2.502839,-3.921586,2.870447,1.145397,-1.295703,-0.467704,...,-1.693732,-1.334226,3.402518,2.124917,1.387546,2.678912,1.632808,3.576732,1.450152,-4.114504
6,3.3,-1.87659,-0.123186,-0.718096,1.13284,-1.620022,1.521839,-0.510948,-1.068513,-0.280301,...,-1.477487,-3.08456,0.096732,0.639834,0.14352,2.502435,0.950144,1.379042,-0.237806,-1.173505
7,10.0,0.559021,1.133619,0.053923,-0.592566,-0.154671,-1.080477,0.149677,0.16206,-0.035643,...,0.887789,-0.844424,0.375952,0.444107,0.522316,0.333128,-0.135998,-0.282442,-0.570537,-0.278346


In [52]:
i = kinase_number_dict["PLK2"]
fig = px.line(b_df, x='time', y=f'Cov_{i}')
fig.show()

### True vs Predicted value

In [83]:
y_predicted = pd.read_csv('./fof/data/r/result_y.csv')['x'].to_numpy().reshape(-1,1)
y_true = pd.read_csv('./fof/data/r/result_y_true.csv')['x'].to_numpy().reshape(-1,1)

In [86]:
data['y_predicted'] = y_predicted
data['y_standardized'] = y_true

In [87]:
data

Unnamed: 0,drugNames,Raw Y (closest to 70),Raw Y (interpolation to 70),dose (label),dose (uM),drugShortNames,realNames,AKT1,AKT2,AKT3,...,TRKC,TXK,TYK1_LTK,TYRO3_SKY,ULK1,YES_YES1,Y Predicted,y_predicted,Y_standardized,y_standardized
0,NCGC00344999_A1,27.000000,27.654545,3,1.11,A1,NCGC00344999,0.825562,0.958012,0.929195,...,0.000200,0.025107,0.343913,0.228633,0.273383,0.001528,-31.704154,-31.704154,-31.703909,-31.703909
1,NCGC00344999_A1,50.433333,52.624242,4,0.37,A1,NCGC00344999,0.877345,0.984761,0.979264,...,0.000201,0.064854,0.566269,0.428022,0.534281,0.004563,-10.792637,-10.792637,-10.793398,-10.793398
2,NCGC00344999_A1,76.533333,78.551515,5,0.12,A1,NCGC00344999,0.915214,0.994564,0.994147,...,0.000210,0.157508,0.764517,0.653849,0.777644,0.014427,10.680329,10.680329,10.680730,10.680730
3,NCGC00344999_A1,73.566667,75.393939,6,0.04,A1,NCGC00344999,0.942042,0.998072,0.998365,...,0.000296,0.335096,0.889445,0.826574,0.914221,0.045557,7.167360,7.167360,7.167576,7.167576
4,NCGC00344999_A1,75.866667,77.103030,7,0.01,A1,NCGC00344999,0.960575,0.999315,0.999543,...,0.001109,0.575069,0.951738,0.922977,0.970018,0.134958,8.229161,8.229161,8.229162,8.229162
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
402,NCGC00346549_H7,20.460000,19.076551,1,10.00,H7,AT-7867,0.004845,0.043636,0.005906,...,0.159750,0.978787,0.627206,0.565679,1.000000,0.749658,-29.770503,-29.770503,-29.770534,-29.770534
403,NCGC00346549_H7,40.060000,38.445977,2,3.30,H7,AT-7867,0.013398,0.107004,0.016757,...,0.386833,0.999763,0.734279,0.813559,1.000000,0.909249,-16.360166,-16.360166,-16.360355,-16.360355
404,NCGC00346549_H7,96.820000,93.884014,5,0.12,H7,AT-7867,0.239343,0.684571,0.298176,...,0.958432,1.000000,0.923971,0.992938,1.000000,0.996335,26.013233,26.013233,26.013229,26.013229
405,NCGC00346549_H7,90.820000,89.774728,6,0.04,H7,AT-7867,0.473748,0.850279,0.554458,...,0.987090,1.000000,0.951929,0.997117,1.000000,0.998138,21.548150,21.548150,21.548364,21.548364


In [93]:
np.sqrt(np.power((data['y_predicted'] - data['y_standardized']), 2).sum())

0.08705722435159739