In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_rel
from qgrid import show_grid
from sklearn.preprocessing import MinMaxScaler

In [2]:
def get_dl_test_score(scores_df, metric='r2'):
    max_epoch = scores_df[scores_df['run'] == 'test'].groupby(
        ['epoch']).median()[metric].reset_index()
    epoch = max_epoch[max_epoch[metric] ==
                      max_epoch[metric].max()]['epoch'].values[0]
    print(epoch)
    tmp_test = scores_df[(scores_df['run'] == 'test')
                         & (scores_df['epoch'] == epoch)][[
                             'drug_id', 'mae', 'rmse', 'corr', 'r2'
                         ]].rename(
                             columns={
                                 'mae': 'test_mae',
                                 'corr': 'test_corr',
                                 'rmse': 'test_rmse',
                                 'r2': 'test_r2',
                             }).reset_index(drop=True)
    tmp_test['model'] = 'DL'
    return tmp_test

In [3]:
def get_dl_test_score_cls(scores_df, metric='auc'):
    max_epoch = scores_df[scores_df['run'] == 'test'].groupby(
        ['epoch']).median()[metric].reset_index()
    epoch = max_epoch[max_epoch[metric] ==
                      max_epoch[metric].max()]['epoch'].values[0]
    tmp_test = scores_df[(scores_df['run'] == 'test')
                         & (scores_df['epoch'] == epoch)][[
                             'drug_id', 'accuracy', 'auc'
                         ]].rename(
                             columns={
                                 'accuracy': 'test_acc',
                                 'auc': 'test_auc'
                             }).reset_index(drop=True)
    tmp_test['model'] = 'DL'
    return tmp_test

# DL

## mutation

In [6]:
dl_mutation = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation/sc"
)

In [15]:
print(dl_mutation.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.01372
dtype: float64
corr    0.151319
dtype: float64
mae    1.044603
dtype: float64


## cnv

In [16]:
dl_cnv = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/cnv/scores_202107061709_DOIT_LRP.csv.gz"
)

In [17]:
print(dl_cnv.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.072722
dtype: float64
corr    0.29056
dtype: float64
mae    1.011733
dtype: float64


## rna

In [18]:
dl_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/rna/scores_202107061710_DOIT_LRP.csv.gz"
)

In [19]:
print(dl_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.216283
dtype: float64
corr    0.481545
dtype: float64
mae    0.91897
dtype: float64


## mutation cnv

In [30]:
dl_mutation_cnv = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation_cnv/scores_202107071113_DOIT_LRP.csv.gz"
)

In [31]:
print(dl_mutation_cnv.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.080649
dtype: float64
corr    0.306296
dtype: float64
mae    1.006256
dtype: float64


## mutation rna

In [34]:
dl_mutation_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation_rna/scores_202107151210_DOIT_LRP.csv.gz"
)

In [35]:
print(dl_mutation_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.163933
dtype: float64
corr    0.421864
dtype: float64
mae    0.880644
dtype: float64


## cnv rna

In [28]:
dl_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/cnv_rna/scores_202107071123_DOIT_LRP.csv.gz"
)

In [29]:
print(dl_cnv_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_cnv_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.220573
dtype: float64
corr    0.486118
dtype: float64
mae    0.915394
dtype: float64


## mutation cnv rna

### DL

In [23]:
dl_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/DeepMultiOmicNet/mutation_cnv_rna/scores_202107142106_DOIT_LRP.csv.gz"
)

In [24]:
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).max()['r2'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).max()['corr'].reset_index().groupby('drug_id').mean().median())
print(dl_mutation_cnv_rna.groupby(['drug_id',
                     'run']).min()['mae'].reset_index().groupby('drug_id').mean().median())

r2    0.158727
dtype: float64
corr    0.414859
dtype: float64
mae    0.88423
dtype: float64


### RF

In [10]:
rf_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_rf/mutation_cnv_rna/scores_202107131442_ec_rf_lrp_drug_mutation_cnv_rna.csv"
)

In [5]:
print(rf_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(rf_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(rf_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.10742289371921449
0.36278892579705785
0.8699715028691898


### EN

In [20]:
en_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_en/mutation_cnv_rna/scores_202107131309_ec_en_lrp_drug_mutation_cnv_rna.csv"
)

In [21]:
print(en_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(en_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(en_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.09379873899421615
0.3444979183491435
0.8807976081624196


### SVML

In [25]:
svml_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/ec_svml/mutation_cnv_rna/scores_202107131721_ec_svml_lrp_drug_mutation_cnv_rna.csv"
)

In [26]:
print(svml_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(svml_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(svml_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

-0.17870422146055567
0.26419208871528427
1.0305523418313904


### mixomics

In [28]:
mixomics_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/mixOmics/sanger_mutation_cnv_rna_ctd2_50_comp.csv"
)

In [29]:
print(mixomics_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(mixomics_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(mixomics_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.07189429343095705
0.2940086328484131
0.9580974571832607


### pca_rf

In [36]:
pca_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/pca_rf/mutation_cnv_rna/scores_202107131920_pca_rf_lrp_drug_mutation_cnv_rna.csv"
)

In [37]:
print(pca_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(pca_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(pca_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.04277511339905521
0.2596523140673153
0.9256506429336924


### moCluster_rf

In [32]:
moCluster_mutation_cnv_rna = pd.read_csv(
    f"../../work_dirs/sanger_ctd2/moCluster_rf/mutation_cnv_rna/scores_202107131722_moCluster_rf_lrp_drug_mutation_cnv_rna.csv"
)

In [33]:
print(moCluster_mutation_cnv_rna.groupby(['drug_id'])['r2'].mean().median())
print(moCluster_mutation_cnv_rna.groupby(['drug_id'])['corr'].mean().median())
print(moCluster_mutation_cnv_rna.groupby(['drug_id'])['mae'].mean().median())

0.042669287612932136
0.25527278426222344
0.9252425867286256
