# 02 BMRA preparation

Calculating global responses for modules using predicted pathway activity coefficients.

In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import bmra_prep
import bmra_prep.pathway_activity.prediction

In [2]:
data_dir = "01_outputs_2020"

out_dir = "02_outputs_2020"
os.makedirs(out_dir, exist_ok = True)

## Load Data

In [3]:
# load metdadata dict and extract used elements
with open(os.path.join(data_dir, "metadata.pickle"), "rb") as f:
    all_metadata = pickle.load(f)

n_modules = all_metadata["n_modules"]
n_genes = all_metadata["n_genes"]
n_experiments = all_metadata["n_experiments"]

modules = all_metadata["modules"]
genes = all_metadata["genes"]
exp_ids = all_metadata["exp_ids"]


In [4]:
# load data
Data_df = pd.read_csv(
    os.path.join(data_dir, "DATA.csv"),
    index_col = 0,
)
display(Data_df)

Unnamed: 0,LJP007_HUVEC_24H:B19,LJP007_HUVEC_24H:B20,LJP007_HUVEC_24H:B21,LJP007_HUVEC_24H:B22,LJP007_HUVEC_24H:B23,LJP007_HUVEC_24H:B24,LJP007_HUVEC_24H:C01,LJP007_HUVEC_24H:C02,LJP007_HUVEC_24H:C03,LJP007_HUVEC_24H:C04,...,REP.B011_HUVEC.A_24H:O09_2,REP.B011_HUVEC.A_24H:O10_2,REP.B011_HUVEC.A_24H:O11_2,REP.B011_HUVEC.A_24H:O12_2,siCDK2_STAT_vs_CTL_STAT,siCDK2_STAT_vs_CTL_STAT.1,siCDK2_STAT_vs_CTL_STAT.2,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.1,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.2
AARS,0.006899,-0.036417,-0.002468,-0.192234,0.193749,-0.987884,-0.107851,-0.026385,0.125566,-0.088968,...,0.328879,-0.603046,0.142453,-0.611021,0.0,0.0,0.0,0.000000,0.000000,0.000000
ABCB6,-0.153516,0.090083,0.100917,-0.050100,0.074066,-0.659458,0.085049,0.124600,-0.207017,-0.206216,...,-0.276004,0.646096,0.440571,-0.243154,0.0,0.0,0.0,-0.018243,-0.018243,-0.018243
ABCC5,0.430885,0.988469,-0.617515,-0.851815,-0.762765,-0.806991,-0.460982,-0.416399,-0.401315,-0.600982,...,-0.612189,1.197562,-0.605189,-0.966489,0.0,0.0,0.0,0.695906,0.695906,0.695906
ABCF1,0.869589,0.250189,0.483856,0.259573,0.420089,0.115573,0.727923,0.141889,-0.109810,0.062656,...,0.028747,0.226997,0.001048,0.286047,0.0,0.0,0.0,0.000000,0.000000,0.000000
ABCF3,-0.049203,-0.110871,0.241329,-0.277354,-0.815970,0.044405,-0.292220,-0.077737,-0.243837,-0.102671,...,-0.238903,-0.174853,-0.239252,-0.088803,0.0,0.0,0.0,0.404739,0.404739,0.404739
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF395,1.322245,0.406061,-0.584673,1.496211,2.218262,-0.601197,0.650295,-0.629755,0.837345,-0.014839,...,0.903966,0.226066,0.694516,0.293466,0.0,0.0,0.0,0.000000,0.000000,0.000000
ZNF451,-0.331278,-0.115078,0.114506,0.072972,-0.202944,-0.456144,-0.127895,0.128838,0.231789,0.071722,...,0.123271,-0.477355,0.139670,-0.051455,0.0,0.0,0.0,-0.327347,-0.327347,-0.327347
ZNF586,-0.805566,-0.388083,-0.484466,-0.304266,-0.467732,-0.605591,-0.890166,-0.414350,-0.539833,-0.558366,...,0.339700,0.567650,0.171450,0.292250,0.0,0.0,0.0,-0.556124,-0.556124,-0.556124
ZNF589,-0.300999,-0.058116,-0.237699,0.468434,0.532901,-0.904574,0.014768,0.084234,0.139318,0.111168,...,0.872654,-0.904821,-0.011846,0.653454,0.0,0.0,0.0,0.406653,0.406653,0.406653


In [5]:
display(Data_df.loc["CDK2", :])

#_no_cdk2
Data_df = Data_df.drop("CDK2", axis = 0)
display(Data_df)

genes = Data_df.index
print(len(genes), genes)

LJP007_HUVEC_24H:B19                               -0.321407
LJP007_HUVEC_24H:B20                               -0.082840
LJP007_HUVEC_24H:B21                                0.235610
LJP007_HUVEC_24H:B22                                0.507526
LJP007_HUVEC_24H:B23                                0.122760
                                                      ...   
siCDK2_STAT_vs_CTL_STAT.1                          -3.466803
siCDK2_STAT_vs_CTL_STAT.2                          -3.466803
siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT     -3.979420
siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.1   -3.979420
siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.2   -3.979420
Name: CDK2, Length: 435, dtype: float64

Unnamed: 0,LJP007_HUVEC_24H:B19,LJP007_HUVEC_24H:B20,LJP007_HUVEC_24H:B21,LJP007_HUVEC_24H:B22,LJP007_HUVEC_24H:B23,LJP007_HUVEC_24H:B24,LJP007_HUVEC_24H:C01,LJP007_HUVEC_24H:C02,LJP007_HUVEC_24H:C03,LJP007_HUVEC_24H:C04,...,REP.B011_HUVEC.A_24H:O09_2,REP.B011_HUVEC.A_24H:O10_2,REP.B011_HUVEC.A_24H:O11_2,REP.B011_HUVEC.A_24H:O12_2,siCDK2_STAT_vs_CTL_STAT,siCDK2_STAT_vs_CTL_STAT.1,siCDK2_STAT_vs_CTL_STAT.2,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.1,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.2
AARS,0.006899,-0.036417,-0.002468,-0.192234,0.193749,-0.987884,-0.107851,-0.026385,0.125566,-0.088968,...,0.328879,-0.603046,0.142453,-0.611021,0.0,0.0,0.0,0.000000,0.000000,0.000000
ABCB6,-0.153516,0.090083,0.100917,-0.050100,0.074066,-0.659458,0.085049,0.124600,-0.207017,-0.206216,...,-0.276004,0.646096,0.440571,-0.243154,0.0,0.0,0.0,-0.018243,-0.018243,-0.018243
ABCC5,0.430885,0.988469,-0.617515,-0.851815,-0.762765,-0.806991,-0.460982,-0.416399,-0.401315,-0.600982,...,-0.612189,1.197562,-0.605189,-0.966489,0.0,0.0,0.0,0.695906,0.695906,0.695906
ABCF1,0.869589,0.250189,0.483856,0.259573,0.420089,0.115573,0.727923,0.141889,-0.109810,0.062656,...,0.028747,0.226997,0.001048,0.286047,0.0,0.0,0.0,0.000000,0.000000,0.000000
ABCF3,-0.049203,-0.110871,0.241329,-0.277354,-0.815970,0.044405,-0.292220,-0.077737,-0.243837,-0.102671,...,-0.238903,-0.174853,-0.239252,-0.088803,0.0,0.0,0.0,0.404739,0.404739,0.404739
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF395,1.322245,0.406061,-0.584673,1.496211,2.218262,-0.601197,0.650295,-0.629755,0.837345,-0.014839,...,0.903966,0.226066,0.694516,0.293466,0.0,0.0,0.0,0.000000,0.000000,0.000000
ZNF451,-0.331278,-0.115078,0.114506,0.072972,-0.202944,-0.456144,-0.127895,0.128838,0.231789,0.071722,...,0.123271,-0.477355,0.139670,-0.051455,0.0,0.0,0.0,-0.327347,-0.327347,-0.327347
ZNF586,-0.805566,-0.388083,-0.484466,-0.304266,-0.467732,-0.605591,-0.890166,-0.414350,-0.539833,-0.558366,...,0.339700,0.567650,0.171450,0.292250,0.0,0.0,0.0,-0.556124,-0.556124,-0.556124
ZNF589,-0.300999,-0.058116,-0.237699,0.468434,0.532901,-0.904574,0.014768,0.084234,0.139318,0.111168,...,0.872654,-0.904821,-0.011846,0.653454,0.0,0.0,0.0,0.406653,0.406653,0.406653


977 Index(['AARS', 'ABCB6', 'ABCC5', 'ABCF1', 'ABCF3', 'ABHD4', 'ABHD6', 'ABL1',
       'ACAA1', 'ACAT2',
       ...
       'ZMIZ1', 'ZMYM2', 'ZNF131', 'ZNF274', 'ZNF318', 'ZNF395', 'ZNF451',
       'ZNF586', 'ZNF589', 'ZW10'],
      dtype='object', length=977)


In [6]:
x = Data_df.values
x.shape

(977, 435)

In [7]:
# load doses and perturbation matrix
inhib_conc_matrix = pd.read_csv(
    os.path.join(data_dir, "inhib_conc_annotated.csv"),
    index_col = 0,
).values

ic50_matrix = pd.read_csv(
    os.path.join(data_dir, "ic50_annotated.csv"),
    index_col = 0,
).values

pert_matrix = pd.read_csv(
    os.path.join(data_dir, "pert_annotated.csv"),
    index_col = 0,
).values

In [8]:
y_true = 1 / (1 + inhib_conc_matrix / ic50_matrix)

# change few values for additional experiments with siCDK2
y_true[1, 429] = pow(2,-3.46680304838829)
y_true[1, 430] = pow(2,-3.46680304838829)
y_true[1, 431] = pow(2,-3.46680304838829)
y_true[1, 432] = pow(2,-3.9794195625335)
y_true[1, 433] = pow(2,-3.9794195625335)
y_true[1, 434] = pow(2,-3.9794195625335)

y_true_df = pd.DataFrame(y_true)
display(y_true_df)

display(y_true.shape)
y_true

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,425,426,427,428,429,430,431,432,433,434
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.888889,0.961538,0.985222,0.995025,0.090446,0.090446,0.090446,0.063398,0.063398,0.063398
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.047619,0.130548,0.310559,0.574713,0.806452,0.925926,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0,1.0,0.000999,0.002994,0.008929,0.026316,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


(10, 435)

array([[1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 0.06339797, 0.06339797,
        0.06339797],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        1.        ]])

## Run models

In [9]:
a_coeffs = bmra_prep.pathway_activity.prediction.predict_coeffs(
    x, y_true, pert_matrix, 200_000, 10, 10, 10, 100)

In [10]:
a_coeffs_df = pd.DataFrame(a_coeffs, index = modules, columns = genes)
a_coeffs_df.to_csv(os.path.join(out_dir, "a_coeffs.csv"))

display(a_coeffs_df.astype(bool).sum(axis = "columns"))
display(a_coeffs_df)

CDK1      977
CDK2      977
PAK       977
PI3K      977
ERK       977
BET       977
Aurora    977
TGFbR     977
VEGFR     977
PKC       977
dtype: int64

Unnamed: 0,AARS,ABCB6,ABCC5,ABCF1,ABCF3,ABHD4,ABHD6,ABL1,ACAA1,ACAT2,...,ZMIZ1,ZMYM2,ZNF131,ZNF274,ZNF318,ZNF395,ZNF451,ZNF586,ZNF589,ZW10
CDK1,3e-06,-1e-06,1.650118e-08,-9.753538e-07,1.596752e-06,8.424633e-07,-4.711912e-07,1.846161e-06,-3e-06,4.799547e-06,...,-4.741706e-06,1.576231e-07,-9.302753e-07,-1.977347e-06,3e-06,-1e-06,-5.975309e-08,9e-06,2.226739e-06,2.030874e-07
CDK2,1e-05,-8e-06,-6.827543e-06,-3.594457e-06,3.277888e-05,-2.671207e-05,4.039766e-06,8.676694e-05,-5e-06,0.0001296559,...,-1.614161e-06,2.424064e-05,1.714305e-05,-1.612732e-05,-1e-05,-0.000152,1.519752e-05,-1.9e-05,-7.539275e-05,-5.670155e-05
PAK,5.8e-05,-1.6e-05,2.419525e-05,7.245152e-05,1.072848e-05,-5.086661e-06,-1.304858e-05,5.131686e-05,-3e-06,5.799592e-05,...,6.460272e-06,5.811955e-06,-5.850897e-05,3.159624e-05,-2e-06,-0.000102,4.812807e-05,0.000187,8.720961e-07,9.25074e-05
PI3K,2e-05,8e-06,9.192569e-06,-9.344267e-06,-7.684031e-06,-1.377765e-05,7.067576e-06,-3.064829e-06,-1.4e-05,-1.122273e-05,...,-1.272188e-06,1.755408e-06,2.088218e-06,2.361379e-06,2e-06,-0.022952,5.670486e-06,6e-06,1.458751e-05,-9.659409e-07
ERK,1.8e-05,-4.4e-05,7.732725e-06,-4.038513e-05,1.751656e-05,-4.307593e-05,-1.432547e-06,0.01248449,0.000456,2.537973e-05,...,0.0001982442,-4.852546e-05,2.648105e-05,-8.680478e-05,5e-06,-4.1e-05,1.436406e-05,0.000187,-2.104862e-05,3.130586e-05
BET,-3e-06,-4e-06,-3.111687e-06,2.606648e-05,-6.092715e-07,4.804615e-08,-6.644645e-07,-1.76867e-07,3e-06,0.000981117,...,4.456806e-07,-1.168603e-05,-1.831635e-05,4.756291e-06,-6e-06,2.4e-05,-1.334466e-06,-7e-06,-4.658228e-06,6.325307e-06
Aurora,-9e-06,6e-06,-9.209875e-06,8.056622e-06,4.030957e-06,-0.0001518841,0.0002793493,0.01622645,-1.9e-05,0.0002556361,...,0.05603362,-1.159328e-05,8.850427e-06,-4.775656e-07,-9e-06,-2.2e-05,2.373604e-05,0.000406,2.085541e-05,-4.401755e-05
TGFbR,-1.5e-05,1.4e-05,1.128903e-05,1.141992e-05,-5.250191e-06,1.708113e-05,-1.113325e-05,1.295713e-05,6e-06,7.640237e-07,...,-1.869105e-05,-1.263048e-05,-1.952758e-05,-4.294147e-06,-1e-05,1.4e-05,-1.495913e-07,1.3e-05,7.473158e-06,-9.665407e-06
VEGFR,-1e-06,-2e-05,0.0001764608,-0.0001403436,2.240661e-05,7.762068e-05,-4.272717e-05,3.214363e-05,4e-06,0.0002672383,...,-3.227249e-05,5.265293e-05,0.0007128896,-8.730072e-06,-4e-05,-6e-05,-5.88498e-06,0.018685,0.0001586983,-6.936117e-05
PKC,-3e-06,-4e-06,2.660349e-06,-5.137403e-06,-4.862074e-06,2.290939e-05,1.439884e-05,-1.709836e-05,8e-06,-1.831236e-05,...,1.303858e-05,1.164412e-06,-7.41615e-06,-1.04935e-05,-4.7e-05,-7e-06,-8.238154e-06,0.006489,-4.047508e-05,-1.057441e-05


In [11]:
R_global = bmra_prep.pathway_activity.calc_global_response_from_pathway_activity(
    bmra_prep.pathway_activity.calc_pathway_activity(x, a_coeffs),
    modules,
    Data_df.columns
)
R_global_df = R_global.dataframe

R_global_df.to_csv(os.path.join(out_dir, "R_global_annotated.csv"))
display(R_global_df)

Unnamed: 0,LJP007_HUVEC_24H:B19,LJP007_HUVEC_24H:B20,LJP007_HUVEC_24H:B21,LJP007_HUVEC_24H:B22,LJP007_HUVEC_24H:B23,LJP007_HUVEC_24H:B24,LJP007_HUVEC_24H:C01,LJP007_HUVEC_24H:C02,LJP007_HUVEC_24H:C03,LJP007_HUVEC_24H:C04,...,REP.B011_HUVEC.A_24H:O09_2,REP.B011_HUVEC.A_24H:O10_2,REP.B011_HUVEC.A_24H:O11_2,REP.B011_HUVEC.A_24H:O12_2,siCDK2_STAT_vs_CTL_STAT,siCDK2_STAT_vs_CTL_STAT.1,siCDK2_STAT_vs_CTL_STAT.2,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.1,siCDK2_PSS_vs_CTL_STAT_ADJ_CTL_PSS_vs_CTL_STAT.2
CDK1,-0.241295,-0.049477,-0.118508,-0.064121,-0.059576,-0.085875,-0.034793,0.063252,0.001644,-0.064251,...,0.071982,-0.000142,-0.066307,-0.405803,-0.020968,-0.020968,-0.020968,-0.035646,-0.035646,-0.035646
CDK2,0.086828,0.034993,0.000712,-0.136315,-0.037666,0.006159,-0.139798,-0.084356,-0.065997,0.148656,...,-0.079839,-0.065996,-0.010269,-0.00283,-1.3835,-1.3835,-1.3835,-1.692456,-1.692456,-1.692456
PAK,-0.204719,0.053047,-0.012458,-0.235998,-0.010762,0.074992,-0.605308,-0.008513,0.148121,-0.03117,...,-0.048521,0.014415,-0.082089,0.115057,-0.042846,-0.042846,-0.042846,-0.081844,-0.081844,-0.081844
PI3K,-1.698457,-1.133142,-0.800476,-0.439644,-0.241075,-0.051966,-0.684202,-0.026276,-0.009121,-0.090468,...,-0.353995,0.063328,-0.43755,-0.038424,-0.013391,-0.013391,-0.013391,-0.28683,-0.28683,-0.28683
ERK,-0.173479,0.240846,-0.067897,-0.044627,-0.114116,0.103609,-1.98319,-0.149826,-0.041433,0.102618,...,-0.241006,0.176456,0.295115,0.127864,-0.199221,-0.199221,-0.199221,-0.325842,-0.325842,-0.325842
BET,-0.201354,-0.009742,-0.359697,-0.08766,0.423612,0.091018,0.334342,-0.014117,-0.171522,0.027898,...,-0.150613,0.537878,0.402017,0.372687,0.145809,0.145809,0.145809,0.51654,0.51654,0.51654
Aurora,-0.174423,0.115198,0.103587,0.379479,0.236328,-0.329926,-0.370444,-0.110243,-0.154645,-0.312533,...,-0.423675,0.058188,0.479511,-0.215884,0.00608,0.00608,0.00608,0.005779,0.005779,0.005779
TGFbR,0.048681,-0.100195,0.085931,0.108692,0.182509,0.090444,0.473756,0.222745,-0.086762,-0.10911,...,0.099206,0.064085,0.075046,0.090124,0.207455,0.207455,0.207455,0.189289,0.189289,0.189289
VEGFR,-1.102487,-0.044102,-0.567581,-0.286735,-0.374989,0.31545,-1.971104,-1.261839,-1.493498,-1.69093,...,0.04771,0.08788,0.342904,0.034504,-0.179015,-0.179015,-0.179015,-0.798205,-0.798205,-0.798205
PKC,0.043441,-0.023725,0.010902,0.015498,0.006574,0.017204,-0.007446,-0.012668,-0.004032,0.025198,...,0.003333,0.008737,0.001421,0.021584,5.2e-05,5.2e-05,5.2e-05,-0.070781,-0.070781,-0.070781
