In [66]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from collections import Counter

sns.set_style('ticks')

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
mpl.rc("savefig", dpi=300)

from scipy.special import xlogy

##### Read files and select drugs

In [67]:
# log2_median_ic50, log2_median_ic50_9f, log2_median_ic50_hn, log2_median_ic50_9f_hn, log2_median_ic50_3f_hn, log2_max_conc
ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn
model_name = 'hn_drug_cw_dw10_100000_model' # hn_drug_cw_dw10_100000_model | hn_drug_cw_dw1_100000_model | hn_drug_cw_dwsim10_100000_model

dosage_shifted = False

In [68]:
norm_type = 'TPM'

current_dir = '../result/HN_model/{}/'.format(norm_type)

In [69]:
drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)
drug_info_df.index = drug_info_df.index.astype(str)

drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name'].values))

drug_info_df.head()

Unnamed: 0_level_0,Drug Name,Synonyms,Target,Target Pathway,Selleckchem Cat#,CAS number,PubCHEM,Others,entropy,max_conc,...,median_ic50_9f,log2_median_ic50_9f,log2_median_ic50_hn,median_ic50_hn,median_ic50_3f_hn,log2_median_ic50_3f_hn,median_ic50_9f_hn,log2_median_ic50_9f_hn,num_sensitive,num_sensitive_hn
Drug ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,AICA Ribonucleotide,"AICAR, N1-(b-D-Ribofuranosyl)-5-aminoimidazole...",AMPK agonist,Metabolism,S1802,2627-69-2,65110,,6.034272,2000.0,...,206.74838,7.691732,9.939784,982.139588,327.379863,8.354822,109.126621,6.769859,476,27
1003,Camptothecin,"7-Ethyl-10-Hydroxy-Camptothecin, SN-38, Irinot...",TOP1,DNA replication,S1288,7689-03-4,104842,"(SN-38, S4908, 86639-52-3) (Irinotecan, S1198,...",4.60953,0.1,...,0.002003,-8.963413,-7.587491,0.005199,0.001733,-9.172454,0.000578,-10.757416,688,30
1004,Vinblastine,Velban,Microtubule destabiliser,Mitosis,S1248,143-67-9,6710780,,4.297122,0.1,...,0.001599,-9.289051,-7.150982,0.007036,0.002345,-8.735945,0.000782,-10.320907,753,33
1006,Cytarabine,"Ara-Cytidine, Arabinosyl Cytosine, U-19920",Antimetabolite,DNA replication,S1648,147-94-4,6253,,6.646594,2.0,...,0.163032,-2.616771,-1.342632,0.394301,0.131434,-2.927594,0.043811,-4.512557,508,25
1007,Docetaxel,"RP-56976, Taxotere",Microtubule stabiliser,Mitosis,S1148,114977-28-5,148124,,4.220984,0.0125,...,0.000761,-10.358915,-9.792998,0.001127,0.000376,-11.37796,0.000125,-12.962923,584,32


In [70]:
tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302] + [1012]
[drug_id_name_dict[str(d)] for d in tested_drug_list]

['Afatinib',
 'Docetaxel',
 'Doxorubicin',
 'Epothilone B',
 'Gefitinib',
 'Obatoclax Mesylate',
 'PHA-793887',
 'PI-103',
 'Vorinostat']

In [71]:
if dosage_shifted:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name))
else:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name))


single_drug_pred_df.loc[:, 'drug_id'] = single_drug_pred_df.loc[:, 'drug_id'].values.astype(str)
single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df.loc[:, 'drug_id'].values]

patient_list = sorted(list(set(single_drug_pred_df['patient'])))
# sel_drug_id_list = sorted(list(set(single_drug_pred_df['drug_id'])))

single_drug_pred_df.head()

Unnamed: 0,patient,drug_id,cluster,cluster_p,cluster_delta,delta,cluster_kill,kill,drug_name
0,HN120,1001,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,1.1490924101634|1.0228224713013|0.807486458736...,0.908454,31.07768069322|32.982724341963|36.361526571858...,32.378064,AICA Ribonucleotide
1,HN120,1003,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.82259227449208|0.80678235428939|0.6355808967...,0.66398,36.119586501242|36.372820715572|39.16096594173...,36.265572,Camptothecin
2,HN120,1004,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,1.1329744265808|0.94920159776164|0.94811144743...,0.937689,31.317486231647|34.120332036415|34.13731950223...,31.90644,Vinblastine
3,HN120,1006,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.64345405665561|0.73736230277016|1.0642299051...,0.782631,39.031023104429|37.493555437562|32.35143074316...,34.372665,Cytarabine
4,HN120,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,3.6652578613737|3.5146840869921|4.191433189523...,3.646273,7.3063024840794|8.0454082266986|5.189316535910...,6.310247,Docetaxel


##### List all drug pairs

In [72]:
drug_combi_list = []
n_drugs = len(tested_drug_list)

for p in patient_list:
    for x in range(0, n_drugs-1):
        for y in range(x+1, n_drugs):
            drug_x = str(tested_drug_list[x])
            drug_y = str(tested_drug_list[y])

            drug_combi_list += [[p, drug_x, drug_y]]

drug_combi_df = pd.DataFrame(drug_combi_list, columns=['patient', 'A', 'B'])

print (drug_combi_df.shape)
drug_combi_df.head()

(216, 3)


Unnamed: 0,patient,A,B
0,HN120,1032,1007
1,HN120,1032,133
2,HN120,1032,201
3,HN120,1032,1010
4,HN120,1032,182


##### Get pred and info for each drug

In [73]:
merge_df = pd.merge(drug_combi_df, single_drug_pred_df, how='left', left_on=['patient', 'A'], right_on=['patient', 'drug_id'])
drug_combi_pred_df = pd.merge(merge_df, single_drug_pred_df[['patient', 'drug_id', 'drug_name', 'cluster_delta', 'delta', 'cluster_kill', 'kill']], how='left', left_on=['patient', 'B'], right_on=['patient', 'drug_id'], suffixes=['_A', '_B'])

In [74]:
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,drug_name_A,drug_id_B,drug_name_B,cluster_delta_B,delta_B,cluster_kill_B,kill_B
0,HN120,1032,1007,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,Afatinib,1007,Docetaxel,3.6652578613737|3.5146840869921|4.191433189523...,3.646273,7.3063024840794|8.0454082266986|5.189316535910...,6.310247
1,HN120,1032,133,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,Afatinib,133,Doxorubicin,-2.2488107673639|-1.5744259519154|-0.701068604...,-1.362472,82.617489566863|74.862811767297|61.91504134721...,68.514219
2,HN120,1032,201,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,Afatinib,201,Epothilone B,-1.1345482908374|-0.50348795527066|1.789914949...,0.318228,68.705974268278|58.637293992614|22.43186009135...,43.029539
3,HN120,1032,1010,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,Afatinib,1010,Gefitinib,1.6079293707945|1.607251437639|1.5826831158258...,1.51187,24.702700340069|24.711441876835|25.02963574733...,23.696096
4,HN120,1032,182,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,Afatinib,182,Obatoclax Mesylate,-0.085939217579996|0.30168298492716|1.03796021...,0.453389,51.48877295384|44.791198983324|32.751208206546...,39.943988


In [75]:
rows = []
for _, data in drug_combi_pred_df.iterrows():
    
    cluster_p = np.array([float(p) for p in data['cluster_p'].split('|')])
    
    cluster_kill_A = np.array([float(k) for k in data['cluster_kill_A'].split('|')])
    cluster_kill_B = np.array([float(k) for k in data['cluster_kill_B'].split('|')])
    
    cluster_kill_C = cluster_kill_A + cluster_kill_B - np.multiply(cluster_kill_A/100, cluster_kill_B/100)*100
    kill_C = np.sum(cluster_p * cluster_kill_C)
    
    best_kill = np.max([data['kill_A'], data['kill_B']])
    improve = kill_C - best_kill
    improve_p = (kill_C - best_kill) / best_kill
    
    ##### specificity (entropy) #####
    
    temp_A = np.sum(cluster_p[cluster_kill_A > cluster_kill_B])
    temp_B = np.sum(cluster_p[cluster_kill_A <= cluster_kill_B])
    if temp_A == 0 or temp_B == 0:
        entropy = 0
    else:
        entropy = -(temp_A * np.log2(temp_A) + temp_B * np.log2(temp_B))
    
    sum_kill_dif = np.sum(np.abs(cluster_kill_A - cluster_kill_B))
    
    ##### save output #####
    
    rows += [['|'.join(["{:.14}".format(k) for k in cluster_kill_C])] + [kill_C, improve, improve_p, entropy, sum_kill_dif]]

In [76]:
drug_combi_pred_df = pd.concat([drug_combi_pred_df, pd.DataFrame(rows, columns=['cluster_kill_C', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif'])], axis=1)
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,...,cluster_delta_B,delta_B,cluster_kill_B,kill_B,cluster_kill_C,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1032,1007,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,...,3.6652578613737|3.5146840869921|4.191433189523...,3.646273,7.3063024840794|8.0454082266986|5.189316535910...,6.310247,24.870042122951|25.506575599271|23.34290590166...,23.250498,5.106453,0.28144,0.0,49.024381
1,HN120,1032,133,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,...,-2.2488107673639|-1.5744259519154|-0.701068604...,-1.362472,82.617489566863|74.862811767297|61.91504134721...,68.514219,85.911153491157|79.636087821735|69.20724381990...,73.588564,5.074345,0.074063,0.0,212.249633
2,HN120,1032,201,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,...,-1.1345482908374|-0.50348795527066|1.789914949...,0.318228,68.705974268278|58.637293992614|22.43186009135...,43.029539,74.635612797475|66.491617726202|37.28398548817...,52.983812,9.954272,0.231336,0.0,102.716624
3,HN120,1032,1010,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,...,1.6079293707945|1.607251437639|1.5826831158258...,1.51187,24.702700340069|24.711441876835|25.02963574733...,23.696096,38.970144645114|39.007912442103|39.38435989358...,37.316629,13.620534,0.574801,0.0,23.47914
4,HN120,1032,182,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.0967882398606|2.0929630691675|2.078164480668...,1.980695,18.948148697871|18.988902061161|19.14719808199...,18.144044,...,-0.085939217579996|0.30168298492716|1.03796021...,0.453389,51.48877295384|44.791198983324|32.751208206546...,39.943988,60.680752389708|55.274744137522|45.62746757898...,50.471037,10.527049,0.263545,0.0,92.112932


In [77]:
drug_combi_pred_df = drug_combi_pred_df[['patient', 'drug_id_A', 'drug_name_A', 'drug_id_B', 'drug_name_B', 'cluster', 'cluster_p', 'cluster_kill_A', 'cluster_kill_B', 'cluster_kill_C', 'kill_A', 'kill_B', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif']]

drug_combi_pred_df.head()

Unnamed: 0,patient,drug_id_A,drug_name_A,drug_id_B,drug_name_B,cluster,cluster_p,cluster_kill_A,cluster_kill_B,cluster_kill_C,kill_A,kill_B,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1032,Afatinib,1007,Docetaxel,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,18.948148697871|18.988902061161|19.14719808199...,7.3063024840794|8.0454082266986|5.189316535910...,24.870042122951|25.506575599271|23.34290590166...,18.144044,6.310247,23.250498,5.106453,0.28144,0.0,49.024381
1,HN120,1032,Afatinib,133,Doxorubicin,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,18.948148697871|18.988902061161|19.14719808199...,82.617489566863|74.862811767297|61.91504134721...,85.911153491157|79.636087821735|69.20724381990...,18.144044,68.514219,73.588564,5.074345,0.074063,0.0,212.249633
2,HN120,1032,Afatinib,201,Epothilone B,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,18.948148697871|18.988902061161|19.14719808199...,68.705974268278|58.637293992614|22.43186009135...,74.635612797475|66.491617726202|37.28398548817...,18.144044,43.029539,52.983812,9.954272,0.231336,0.0,102.716624
3,HN120,1032,Afatinib,1010,Gefitinib,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,18.948148697871|18.988902061161|19.14719808199...,24.702700340069|24.711441876835|25.02963574733...,38.970144645114|39.007912442103|39.38435989358...,18.144044,23.696096,37.316629,13.620534,0.574801,0.0,23.47914
4,HN120,1032,Afatinib,182,Obatoclax Mesylate,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,18.948148697871|18.988902061161|19.14719808199...,51.48877295384|44.791198983324|32.751208206546...,60.680752389708|55.274744137522|45.62746757898...,18.144044,39.943988,50.471037,10.527049,0.263545,0.0,92.112932


In [78]:
if dosage_shifted:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)
else:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}.csv'.format(ref_type, model_name), index=False)