In [39]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from collections import Counter

sns.set_style('ticks')

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
mpl.rc("savefig", dpi=300)

from scipy.special import xlogy

##### Read files and select drugs

In [40]:
# log2_median_ic50, log2_median_ic50_9f, log2_median_ic50_hn, log2_median_ic50_9f_hn, log2_median_ic50_3f_hn, log2_max_conc
ref_type = 'log2_median_ic50_3f_hn'
model_name = 'hn_drug_cw_dw10_100000_model'

dosage_shifted = True

In [41]:
drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)
drug_info_df.index = drug_info_df.index.astype(str)

drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name'].values))

drug_info_df.head()

Unnamed: 0_level_0,Drug Name,Synonyms,Target,Target Pathway,Selleckchem Cat#,CAS number,PubCHEM,Others,entropy,max_conc,...,median_ic50_9f,log2_median_ic50_9f,log2_median_ic50_hn,median_ic50_hn,median_ic50_3f_hn,log2_median_ic50_3f_hn,median_ic50_9f_hn,log2_median_ic50_9f_hn,num_sensitive,num_sensitive_hn
Drug ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,AICA Ribonucleotide,"AICAR, N1-(b-D-Ribofuranosyl)-5-aminoimidazole...",AMPK agonist,Metabolism,S1802,2627-69-2,65110,,6.034272,2000.0,...,206.74838,7.691732,9.939784,982.139588,327.379863,8.354822,109.126621,6.769859,476,27
1003,Camptothecin,"7-Ethyl-10-Hydroxy-Camptothecin, SN-38, Irinot...",TOP1,DNA replication,S1288,7689-03-4,104842,"(SN-38, S4908, 86639-52-3) (Irinotecan, S1198,...",4.60953,0.1,...,0.002003,-8.963413,-7.587491,0.005199,0.001733,-9.172454,0.000578,-10.757416,688,30
1004,Vinblastine,Velban,Microtubule destabiliser,Mitosis,S1248,143-67-9,6710780,,4.297122,0.1,...,0.001599,-9.289051,-7.150982,0.007036,0.002345,-8.735945,0.000782,-10.320907,753,33
1006,Cytarabine,"Ara-Cytidine, Arabinosyl Cytosine, U-19920",Antimetabolite,DNA replication,S1648,147-94-4,6253,,6.646594,2.0,...,0.163032,-2.616771,-1.342632,0.394301,0.131434,-2.927594,0.043811,-4.512557,508,25
1007,Docetaxel,"RP-56976, Taxotere",Microtubule stabiliser,Mitosis,S1148,114977-28-5,148124,,4.220984,0.0125,...,0.000761,-10.358915,-9.792998,0.001127,0.000376,-11.37796,0.000125,-12.962923,584,32


In [42]:
tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302] + [1012]
[drug_id_name_dict[str(d)] for d in tested_drug_list]

['Afatinib',
 'Docetaxel',
 'Doxorubicin',
 'Epothilone B',
 'Gefitinib',
 'Obatoclax Mesylate',
 'PHA-793887',
 'PI-103',
 'Vorinostat']

In [43]:
if dosage_shifted:
    single_drug_pred_df = pd.read_csv('../result/HN_model/pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name))
else:
    single_drug_pred_df = pd.read_csv('../result/HN_model/pred_drug_kill_{}_{}.csv'.format(ref_type, model_name))


single_drug_pred_df.loc[:, 'drug_id'] = single_drug_pred_df.loc[:, 'drug_id'].values.astype(str)
single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df.loc[:, 'drug_id'].values]

patient_list = sorted(list(set(single_drug_pred_df['patient'])))
# sel_drug_id_list = sorted(list(set(single_drug_pred_df['drug_id'])))

single_drug_pred_df.head()

Unnamed: 0,patient,drug_id,cluster,cluster_p,cluster_delta,delta,cluster_kill,kill,drug_name
0,HN120,1001,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,-1.2991015777105|-1.1638956239544|-1.292777424...,-1.208002,71.104578969666|69.141672394507|71.014431055831,67.991019,AICA Ribonucleotide
1,HN120,1003,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,-0.78861207024316|-1.0406025152884|-1.50794919...,-0.957514,63.335203321631|67.289117456436|73.985801765811,64.13998,Camptothecin
2,HN120,1004,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,-0.58469351140377|-0.81619966009906|-1.4011502...,-0.768665,59.995525136093|63.778112248989|72.535928733901,61.148613,Vinblastine
3,HN120,1006,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,-0.43746034342087|-1.2751652027675|-1.47224412...,-0.86454,57.523055924516|70.76250201914|73.506645025849,62.480461,Cytarabine
4,HN120,1007,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.7611698644563|1.0573505059397|0.38271304027375,1.250823,22.78045044477|32.455877004255|43.406708149367,28.443744,Docetaxel


##### List all drug pairs

In [44]:
drug_combi_list = []
n_drugs = len(tested_drug_list)

for p in patient_list:
    for x in range(0, n_drugs-1):
        for y in range(x+1, n_drugs):
            drug_x = str(tested_drug_list[x])
            drug_y = str(tested_drug_list[y])

            drug_combi_list += [[p, drug_x, drug_y]]

drug_combi_df = pd.DataFrame(drug_combi_list, columns=['patient', 'A', 'B'])

print (drug_combi_df.shape)
drug_combi_df.head()

(216, 3)


Unnamed: 0,patient,A,B
0,HN120,1032,1007
1,HN120,1032,133
2,HN120,1032,201
3,HN120,1032,1010
4,HN120,1032,182


##### Get pred and info for each drug

In [45]:
merge_df = pd.merge(drug_combi_df, single_drug_pred_df, how='left', left_on=['patient', 'A'], right_on=['patient', 'drug_id'])
drug_combi_pred_df = pd.merge(merge_df, single_drug_pred_df[['patient', 'drug_id', 'drug_name', 'cluster_delta', 'delta', 'cluster_kill', 'kill']], how='left', left_on=['patient', 'B'], right_on=['patient', 'drug_id'], suffixes=['_A', '_B'])

In [46]:
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,drug_name_A,drug_id_B,drug_name_B,cluster_delta_B,delta_B,cluster_kill_B,kill_B
0,HN120,1032,1007,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,Afatinib,1007,Docetaxel,1.7611698644563|1.0573505059397|0.38271304027375,1.250823,22.78045044477|32.455877004255|43.406708149367,28.443744
1,HN120,1032,133,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,Afatinib,133,Doxorubicin,-3.1186731202366|-4.6563108365294|-4.008918840587,-3.667503,89.675699916349|96.185645671284|94.151779845606,89.486481
2,HN120,1032,201,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,Afatinib,201,Epothilone B,-1.4317857153812|-4.1661723310812|-2.896131883...,-2.530503,72.956924968619|94.723857265114|88.157660962992,80.139367
3,HN120,1032,1010,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,Afatinib,1010,Gefitinib,0.61471842708116|0.21531805694042|0.1652704098...,0.389644,39.506030659477|46.275732909702|47.13920992054,41.611585
4,HN120,1032,182,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,Afatinib,182,Obatoclax Mesylate,-2.3556308484611|-3.5632517344402|-2.799391492...,-2.750473,83.655269675006|92.200154592723|87.439501796093,84.251789


In [47]:
rows = []
for _, data in drug_combi_pred_df.iterrows():
    
    cluster_p = np.array([float(p) for p in data['cluster_p'].split('|')])
    
    cluster_kill_A = np.array([float(k) for k in data['cluster_kill_A'].split('|')])
    cluster_kill_B = np.array([float(k) for k in data['cluster_kill_B'].split('|')])
    
    cluster_kill_C = cluster_kill_A + cluster_kill_B - np.multiply(cluster_kill_A/100, cluster_kill_B/100)*100
    kill_C = np.sum(cluster_p * cluster_kill_C)
    
    best_kill = np.max([data['kill_A'], data['kill_B']])
    improve = kill_C - best_kill
    improve_p = (kill_C - best_kill) / best_kill
    
    ##### specificity (entropy) #####
    
    temp_A = np.sum(cluster_p[cluster_kill_A > cluster_kill_B])
    temp_B = np.sum(cluster_p[cluster_kill_A <= cluster_kill_B])
    if temp_A == 0 or temp_B == 0:
        entropy = 0
    else:
        entropy = -(temp_A * np.log2(temp_A) + temp_B * np.log2(temp_B))
    
    sum_kill_dif = np.sum(np.abs(cluster_kill_A - cluster_kill_B))
    
    ##### save output #####
    
    rows += [['|'.join(["{:.14}".format(k) for k in cluster_kill_C])] + [kill_C, improve, improve_p, entropy, sum_kill_dif]]

In [48]:
drug_combi_pred_df = pd.concat([drug_combi_pred_df, pd.DataFrame(rows, columns=['cluster_kill_C', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif'])], axis=1)
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,...,cluster_delta_B,delta_B,cluster_kill_B,kill_B,cluster_kill_C,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1032,1007,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,...,1.7611698644563|1.0573505059397|0.38271304027375,1.250823,22.78045044477|32.455877004255|43.406708149367,28.443744,46.299854565226|55.127697468644|62.042944722818,50.117609,19.274729,0.624933,0.660621,19.264042
1,HN120,1032,133,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,...,-3.1186731202366|-4.6563108365294|-4.008918840587,-3.667503,89.675699916349|96.185645671284|94.151779845606,89.486481,92.820258351704|97.465969594265|96.077605517994,91.70668,2.220199,0.02481,0.0,183.059232
2,HN120,1032,201,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,...,-1.4317857153812|-4.1661723310812|-2.896131883...,-2.530503,72.956924968619|94.723857265114|88.157660962992,80.139367,81.193660536054|96.494844221853|92.057356928005,85.244676,5.105309,0.063705,0.0,158.88455
3,HN120,1032,1010,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,...,0.61471842708116|0.21531805694042|0.1652704098...,0.389644,39.506030659477|46.275732909702|47.13920992054,41.611585,57.931184910767|64.308788696492|64.546329336396,59.115759,17.504173,0.420656,0.0,35.96708
4,HN120,1032,182,1032,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,1.19107448642|0.9849249760157|1.026257522231,1.055307,30.457836462299|33.565940986186|32.930115856554,30.84288,...,-2.3556308484611|-3.5632517344402|-2.799391492...,-2.750473,83.655269675006|92.200154592723|87.439501796093,84.251789,88.633520907596|94.818246099143|91.5756884068,88.133728,3.881939,0.046075,0.0,166.341033


In [49]:
drug_combi_pred_df = drug_combi_pred_df[['patient', 'drug_id_A', 'drug_name_A', 'drug_id_B', 'drug_name_B', 'cluster', 'cluster_p', 'cluster_kill_A', 'cluster_kill_B', 'cluster_kill_C', 'kill_A', 'kill_B', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif']]

drug_combi_pred_df.head()

Unnamed: 0,patient,drug_id_A,drug_name_A,drug_id_B,drug_name_B,cluster,cluster_p,cluster_kill_A,cluster_kill_B,cluster_kill_C,kill_A,kill_B,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1032,Afatinib,1007,Docetaxel,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,30.457836462299|33.565940986186|32.930115856554,22.78045044477|32.455877004255|43.406708149367,46.299854565226|55.127697468644|62.042944722818,30.84288,28.443744,50.117609,19.274729,0.624933,0.660621,19.264042
1,HN120,1032,Afatinib,133,Doxorubicin,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,30.457836462299|33.565940986186|32.930115856554,89.675699916349|96.185645671284|94.151779845606,92.820258351704|97.465969594265|96.077605517994,30.84288,89.486481,91.70668,2.220199,0.02481,0.0,183.059232
2,HN120,1032,Afatinib,201,Epothilone B,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,30.457836462299|33.565940986186|32.930115856554,72.956924968619|94.723857265114|88.157660962992,81.193660536054|96.494844221853|92.057356928005,30.84288,80.139367,85.244676,5.105309,0.063705,0.0,158.88455
3,HN120,1032,Afatinib,1010,Gefitinib,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,30.457836462299|33.565940986186|32.930115856554,39.506030659477|46.275732909702|47.13920992054,57.931184910767|64.308788696492|64.546329336396,30.84288,41.611585,59.115759,17.504173,0.420656,0.0,35.96708
4,HN120,1032,Afatinib,182,Obatoclax Mesylate,A2|E1|E2,0.47428571428571|0.33714285714286|0.1542857142...,30.457836462299|33.565940986186|32.930115856554,83.655269675006|92.200154592723|87.439501796093,88.633520907596|94.818246099143|91.5756884068,30.84288,84.251789,88.133728,3.881939,0.046075,0.0,166.341033


In [50]:
if dosage_shifted:
    drug_combi_pred_df.to_csv('../result/HN_model/pred_combi_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)
else:
    drug_combi_pred_df.to_csv('../result/HN_model/pred_combi_kill_{}_{}.csv'.format(ref_type, model_name), index=False)