In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_rel
from qgrid import show_grid
from sklearn.preprocessing import MinMaxScaler

In [2]:
def get_dl_test_score(scores_df, metric='r2'):
    max_epoch = scores_df[scores_df['run'] == 'test'].groupby(
        ['epoch']).median()[metric].reset_index()
    epoch = max_epoch[max_epoch[metric] ==
                      max_epoch[metric].max()]['epoch'].values[0]
    print(epoch)
    tmp_test = scores_df[(scores_df['run'] == 'test')
                         & (scores_df['epoch'] == epoch)][[
                             'drug_id', 'mae', 'rmse', 'corr', 'r2'
                         ]].rename(
                             columns={
                                 'mae': 'test_mae',
                                 'corr': 'test_corr',
                                 'rmse': 'test_rmse',
                                 'r2': 'test_r2',
                             }).reset_index(drop=True)
    tmp_test['model'] = 'DL'
    return tmp_test

In [3]:
def get_dl_test_score_cls(scores_df, metric='auc'):
    max_epoch = scores_df[scores_df['run'] == 'test'].groupby(
        ['epoch']).median()[metric].reset_index()
    epoch = max_epoch[max_epoch[metric] ==
                      max_epoch[metric].max()]['epoch'].values[0]
    tmp_test = scores_df[(scores_df['run'] == 'test')
                         & (scores_df['epoch'] == epoch)][[
                             'drug_id', 'accuracy', 'auc'
                         ]].rename(
                             columns={
                                 'accuracy': 'test_acc',
                                 'auc': 'test_auc'
                             }).reset_index(drop=True)
    tmp_test['model'] = 'DL'
    return tmp_test

# DL

## mutation

In [6]:
dl_mutation = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation/scores_202107061708_DOIT_LRP.csv.gz"
)

In [15]:
print(dl_mutation.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.01372
dtype: float64
corr    0.151319
dtype: float64
mae    1.044603
dtype: float64


## cnv

In [16]:
dl_cnv = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/cnv/scores_202107061709_DOIT_LRP.csv.gz"
)

In [17]:
print(dl_cnv.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.072722
dtype: float64
corr    0.29056
dtype: float64
mae    1.011733
dtype: float64


## rna

In [18]:
dl_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/rna/scores_202107061710_DOIT_LRP.csv.gz"
)

In [19]:
print(dl_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.216283
dtype: float64
corr    0.481545
dtype: float64
mae    0.91897
dtype: float64


## mutation cnv

In [30]:
dl_mutation_cnv = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation_cnv/scores_202107071113_DOIT_LRP.csv.gz"
)

In [31]:
print(dl_mutation_cnv.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.080649
dtype: float64
corr    0.306296
dtype: float64
mae    1.006256
dtype: float64


## mutation rna

In [32]:
dl_mutation_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation_rna/scores_202107071113_DOIT_LRP.csv.gz"
)

In [33]:
print(dl_mutation_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.222617
dtype: float64
corr    0.488953
dtype: float64
mae    0.914403
dtype: float64


In [59]:
rf_mutation_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_rf/mutation_rna/scores_202107101028_ec_rf_lrp_drug_mutation_rna.csv"
)

In [60]:
print(rf_mutation_rna.groupby(['drug_id'])['r2'].mean().median())
print(rf_mutation_rna.groupby(['drug_id'])['corr'].mean().median())
print(rf_mutation_rna.groupby(['drug_id'])['mae'].mean().median())

0.20586930699746028
0.47464646051493437
0.9275357736091712


## cnv rna

In [28]:
dl_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/cnv_rna/scores_202107071123_DOIT_LRP.csv.gz"
)

In [29]:
print(dl_cnv_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.220573
dtype: float64
corr    0.486118
dtype: float64
mae    0.915394
dtype: float64


## mutation cnv rna

### DL

In [40]:
dl_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation_cnv_rna/scores_202107081045_DOIT_LRP.csv.gz"
)

In [41]:
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.221218
dtype: float64
corr    0.490467
dtype: float64
mae    0.915229
dtype: float64


In [24]:
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.223548
dtype: float64
corr    0.488805
dtype: float64
mae    0.915843
dtype: float64


### RF

In [61]:
rf_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_rf/mutation_cnv_rna/scores_202107101239_ec_rf_lrp_drug_mutation_cnv_rna.csv"
)

In [63]:
print(rf_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(rf_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(rf_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.20550677398248285
0.47485134869361156
0.9294312362792185


In [71]:
rf_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_rf/mutation_cnv_rna/scores_202107121639_ec_rf_drug_mutation_cnv_rna.csv"
)

In [72]:
print(rf_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(rf_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(rf_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.20471526066506052
0.4735326781295234
0.9288508839850542


### EN

In [64]:
en_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_en/mutation_cnv_rna/scores_202107110149_ec_en_lrp_drug_mutation_cnv_rna.csv"
)

In [65]:
print(en_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(en_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(en_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.17117615136314104
0.43910758231574215
0.9477204437076046


### SVML

In [66]:
svml_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_svml/mutation_cnv_rna/scores_202107110205_ec_svml_lrp_drug_mutation_cnv_rna.csv"
)

In [67]:
print(svml_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(svml_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(svml_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

-0.09942953007242837
0.3824298132459269
1.0914120870999893


### mixomics

In [68]:
mixomics_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/mixOmics/sanger_mutation_cnv_rna_ctd2.csv"
)

In [70]:
print(mixomics_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(mixomics_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(mixomics_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.08115820467385244
0.35061272507510033
1.0113337753428384


### pca_rf

In [73]:
pca_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/pca_rf/mutation_cnv_rna/scores_202107121818_pca_rf_lrp_drug_mutation_cnv_rna.csv"
)

In [74]:
print(pca_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(pca_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(pca_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.13294702383675439
0.4122781361428156
0.9800546307137779


### moCluster_rf

In [75]:
moCluster_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/moCluster_rf/mutation_cnv_rna/scores_202107121932_moCluster_rf_lrp_drug_mutation_cnv_rna.csv"
)

In [76]:
print(moCluster_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(moCluster_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(moCluster_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.12960323626430884
0.3959262454767463
0.9786557913847929
