In [1]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from collections import Counter

sns.set_style('ticks')

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
mpl.rc("savefig", dpi=300)

from scipy.special import xlogy

##### Read files and select drugs

In [2]:
# log2_median_ic50, log2_median_ic50_9f, log2_median_ic50_hn, log2_median_ic50_9f_hn, log2_median_ic50_3f_hn, log2_max_conc
ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn
model_name = 'hn_drug_cw_dw10_100000_model' # hn_drug_cw_dw10_100000_model | hn_drug_cw_dw1_100000_model | hn_drug_cw_dwsim10_100000_model

dosage_shifted = False

In [3]:
norm_type = 'patient_TPM'

current_dir = '../result/HN_model/{}/'.format(norm_type)

In [4]:
drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)
drug_info_df.index = drug_info_df.index.astype(str)

drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name'].values))

drug_info_df.head()

Unnamed: 0_level_0,Drug Name,Synonyms,Target,Target Pathway,Selleckchem Cat#,CAS number,PubCHEM,Others,entropy,max_conc,...,median_ic50_9f,log2_median_ic50_9f,log2_median_ic50_hn,median_ic50_hn,median_ic50_3f_hn,log2_median_ic50_3f_hn,median_ic50_9f_hn,log2_median_ic50_9f_hn,num_sensitive,num_sensitive_hn
Drug ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,AICA Ribonucleotide,"AICAR, N1-(b-D-Ribofuranosyl)-5-aminoimidazole...",AMPK agonist,Metabolism,S1802,2627-69-2,65110,,6.034272,2000.0,...,206.74838,7.691732,9.939784,982.139588,327.379863,8.354822,109.126621,6.769859,476,27
1003,Camptothecin,"7-Ethyl-10-Hydroxy-Camptothecin, SN-38, Irinot...",TOP1,DNA replication,S1288,7689-03-4,104842,"(SN-38, S4908, 86639-52-3) (Irinotecan, S1198,...",4.60953,0.1,...,0.002003,-8.963413,-7.587491,0.005199,0.001733,-9.172454,0.000578,-10.757416,688,30
1004,Vinblastine,Velban,Microtubule destabiliser,Mitosis,S1248,143-67-9,6710780,,4.297122,0.1,...,0.001599,-9.289051,-7.150982,0.007036,0.002345,-8.735945,0.000782,-10.320907,753,33
1006,Cytarabine,"Ara-Cytidine, Arabinosyl Cytosine, U-19920",Antimetabolite,DNA replication,S1648,147-94-4,6253,,6.646594,2.0,...,0.163032,-2.616771,-1.342632,0.394301,0.131434,-2.927594,0.043811,-4.512557,508,25
1007,Docetaxel,"RP-56976, Taxotere",Microtubule stabiliser,Mitosis,S1148,114977-28-5,148124,,4.220984,0.0125,...,0.000761,-10.358915,-9.792998,0.001127,0.000376,-11.37796,0.000125,-12.962923,584,32


In [5]:
tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302] + [1012]
[drug_id_name_dict[str(d)] for d in tested_drug_list]

['Afatinib',
 'Docetaxel',
 'Doxorubicin',
 'Epothilone B',
 'Gefitinib',
 'Obatoclax Mesylate',
 'PHA-793887',
 'PI-103',
 'Vorinostat']

In [10]:
if dosage_shifted:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name))
else:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name))


single_drug_pred_df.loc[:, 'drug_id'] = single_drug_pred_df.loc[:, 'drug_id'].values.astype(str)
single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df.loc[:, 'drug_id'].values]

patient_list = sorted(list(set(single_drug_pred_df['patient'])))
# sel_drug_id_list = sorted(list(set(single_drug_pred_df['drug_id'])))

single_drug_pred_df.loc[:, 'cluster_p'] = 1
single_drug_pred_df.loc[:, 'cluster_kill'] = single_drug_pred_df['kill']

single_drug_pred_df.head()

Unnamed: 0,patient,drug_id,kill,drug_name,cluster_p,cluster_kill
0,HN120,1001,39.049925,AICA Ribonucleotide,1,39.049925
1,HN120,1003,39.842087,Camptothecin,1,39.842087
2,HN120,1004,35.492878,Vinblastine,1,35.492878
3,HN120,1006,38.690576,Cytarabine,1,38.690576
4,HN120,1007,10.27645,Docetaxel,1,10.27645


##### List all drug pairs

In [11]:
drug_combi_list = []
n_drugs = len(tested_drug_list)

for p in patient_list:
    for x in range(0, n_drugs-1):
        for y in range(x+1, n_drugs):
            drug_x = str(tested_drug_list[x])
            drug_y = str(tested_drug_list[y])

            drug_combi_list += [[p, drug_x, drug_y]]

drug_combi_df = pd.DataFrame(drug_combi_list, columns=['patient', 'A', 'B'])

print (drug_combi_df.shape)
drug_combi_df.head()

(216, 3)


Unnamed: 0,patient,A,B
0,HN120,1032,1007
1,HN120,1032,133
2,HN120,1032,201
3,HN120,1032,1010
4,HN120,1032,182


##### Get pred and info for each drug

In [13]:
merge_df = pd.merge(drug_combi_df, single_drug_pred_df, how='left', left_on=['patient', 'A'], right_on=['patient', 'drug_id'])
drug_combi_pred_df = pd.merge(merge_df, single_drug_pred_df[['patient', 'drug_id', 'drug_name', 'cluster_kill', 'kill']], how='left', left_on=['patient', 'B'], right_on=['patient', 'drug_id'], suffixes=['_A', '_B'])

In [14]:
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,kill_A,drug_name_A,cluster_p,cluster_kill_A,drug_id_B,drug_name_B,cluster_kill_B,kill_B
0,HN120,1032,1007,1032,10.981192,Afatinib,1,10.981192,1007,Docetaxel,10.27645,10.27645
1,HN120,1032,133,1032,10.981192,Afatinib,1,10.981192,133,Doxorubicin,86.277729,86.277729
2,HN120,1032,201,1032,10.981192,Afatinib,1,10.981192,201,Epothilone B,77.297971,77.297971
3,HN120,1032,1010,1032,10.981192,Afatinib,1,10.981192,1010,Gefitinib,16.60154,16.60154
4,HN120,1032,182,1032,10.981192,Afatinib,1,10.981192,182,Obatoclax Mesylate,75.530122,75.530122


In [41]:
rows = []
for _, data in drug_combi_pred_df.iterrows():

    
    cluster_kill_A = data['cluster_kill_A']
    cluster_kill_B = data['cluster_kill_B']
    cluster_kill_C = cluster_kill_A + cluster_kill_B - np.multiply(cluster_kill_A/100, cluster_kill_B/100)*100
    kill_C = cluster_kill_C
    
    best_kill = np.max([data['kill_A'], data['kill_B']])
    improve = kill_C - best_kill
    improve_p = (kill_C - best_kill) / best_kill
    
    sum_kill_dif = np.sum(np.abs(cluster_kill_A - cluster_kill_B))
    
    ##### save output #####
    
    rows += [[kill_C, improve, improve_p, sum_kill_dif]]

In [42]:
drug_combi_pred_df = pd.concat([drug_combi_pred_df, pd.DataFrame(rows, columns=['kill_C', 'improve', 'improve_p', 'sum_kill_dif'])], axis=1)
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,kill_A,drug_name_A,cluster_p,cluster_kill_A,drug_id_B,drug_name_B,cluster_kill_B,kill_B,kill_C,improve,improve_p,sum_kill_dif,kill_C.1,improve.1,improve_p.1,sum_kill_dif.1
0,HN120,1032,1007,1032,10.981192,Afatinib,1,10.981192,1007,Docetaxel,10.27645,10.27645,20.129166,9.147974,0.833058,0.704742,20.129166,9.147974,0.833058,0.704742
1,HN120,1032,133,1032,10.981192,Afatinib,1,10.981192,133,Doxorubicin,86.277729,86.277729,87.784598,1.506869,0.017465,75.296537,87.784598,1.506869,0.017465,75.296537
2,HN120,1032,201,1032,10.981192,Afatinib,1,10.981192,201,Epothilone B,77.297971,77.297971,79.790924,2.492953,0.032251,66.316779,79.790924,2.492953,0.032251,66.316779
3,HN120,1032,1010,1032,10.981192,Afatinib,1,10.981192,1010,Gefitinib,16.60154,16.60154,25.759685,9.158145,0.551644,5.620348,25.759685,9.158145,0.551644,5.620348
4,HN120,1032,182,1032,10.981192,Afatinib,1,10.981192,182,Obatoclax Mesylate,75.530122,75.530122,78.217207,2.687084,0.035576,64.54893,78.217207,2.687084,0.035576,64.54893


In [44]:
drug_combi_pred_df = drug_combi_pred_df[['patient', 'drug_id_A', 'drug_name_A', 'drug_id_B', 'drug_name_B', 'cluster_p', 'cluster_kill_A', 'cluster_kill_B', 'kill_A', 'kill_B', 'kill_C', 'improve', 'improve_p', 'sum_kill_dif']]

drug_combi_pred_df.head()

Unnamed: 0,patient,drug_id_A,drug_name_A,drug_id_B,drug_name_B,cluster_p,cluster_kill_A,cluster_kill_B,kill_A,kill_B,kill_C,kill_C.1,improve,improve.1,improve_p,improve_p.1,sum_kill_dif,sum_kill_dif.1
0,HN120,1032,Afatinib,1007,Docetaxel,1,10.981192,10.27645,10.981192,10.27645,20.129166,20.129166,9.147974,9.147974,0.833058,0.833058,0.704742,0.704742
1,HN120,1032,Afatinib,133,Doxorubicin,1,10.981192,86.277729,10.981192,86.277729,87.784598,87.784598,1.506869,1.506869,0.017465,0.017465,75.296537,75.296537
2,HN120,1032,Afatinib,201,Epothilone B,1,10.981192,77.297971,10.981192,77.297971,79.790924,79.790924,2.492953,2.492953,0.032251,0.032251,66.316779,66.316779
3,HN120,1032,Afatinib,1010,Gefitinib,1,10.981192,16.60154,10.981192,16.60154,25.759685,25.759685,9.158145,9.158145,0.551644,0.551644,5.620348,5.620348
4,HN120,1032,Afatinib,182,Obatoclax Mesylate,1,10.981192,75.530122,10.981192,75.530122,78.217207,78.217207,2.687084,2.687084,0.035576,0.035576,64.54893,64.54893


In [46]:
if dosage_shifted:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)
else:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}.csv'.format(ref_type, model_name), index=False)