In [106]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from collections import Counter

sns.set_style('ticks')

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
mpl.rc("savefig", dpi=300)

from scipy.special import xlogy

##### Read files and select drugs

In [107]:
# log2_median_ic50, log2_median_ic50_9f, log2_median_ic50_hn, log2_median_ic50_9f_hn, log2_median_ic50_3f_hn, log2_max_conc
ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn
model_name = 'hn_drug_cw_dw10_100000_model'

dosage_shifted = False

In [108]:
current_dir = '../result/HN_model/TMM/'
# current_dir = '../result/HN_model/TMM_p95/'
# current_dir = '../result/HN_model/mat_norm/'
# current_dir = '../result/HN_model/mat_norm_p95/'
# current_dir = '../result/HN_model/mat_norm_log2_p95/'

In [109]:
drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)
drug_info_df.index = drug_info_df.index.astype(str)

drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name'].values))

drug_info_df.head()

Unnamed: 0_level_0,Drug Name,Synonyms,Target,Target Pathway,Selleckchem Cat#,CAS number,PubCHEM,Others,entropy,max_conc,...,median_ic50_9f,log2_median_ic50_9f,log2_median_ic50_hn,median_ic50_hn,median_ic50_3f_hn,log2_median_ic50_3f_hn,median_ic50_9f_hn,log2_median_ic50_9f_hn,num_sensitive,num_sensitive_hn
Drug ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,AICA Ribonucleotide,"AICAR, N1-(b-D-Ribofuranosyl)-5-aminoimidazole...",AMPK agonist,Metabolism,S1802,2627-69-2,65110,,6.034272,2000.0,...,206.74838,7.691732,9.939784,982.139588,327.379863,8.354822,109.126621,6.769859,476,27
1003,Camptothecin,"7-Ethyl-10-Hydroxy-Camptothecin, SN-38, Irinot...",TOP1,DNA replication,S1288,7689-03-4,104842,"(SN-38, S4908, 86639-52-3) (Irinotecan, S1198,...",4.60953,0.1,...,0.002003,-8.963413,-7.587491,0.005199,0.001733,-9.172454,0.000578,-10.757416,688,30
1004,Vinblastine,Velban,Microtubule destabiliser,Mitosis,S1248,143-67-9,6710780,,4.297122,0.1,...,0.001599,-9.289051,-7.150982,0.007036,0.002345,-8.735945,0.000782,-10.320907,753,33
1006,Cytarabine,"Ara-Cytidine, Arabinosyl Cytosine, U-19920",Antimetabolite,DNA replication,S1648,147-94-4,6253,,6.646594,2.0,...,0.163032,-2.616771,-1.342632,0.394301,0.131434,-2.927594,0.043811,-4.512557,508,25
1007,Docetaxel,"RP-56976, Taxotere",Microtubule stabiliser,Mitosis,S1148,114977-28-5,148124,,4.220984,0.0125,...,0.000761,-10.358915,-9.792998,0.001127,0.000376,-11.37796,0.000125,-12.962923,584,32


In [110]:
tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302] + [1012]
[drug_id_name_dict[str(d)] for d in tested_drug_list]

['Afatinib',
 'Docetaxel',
 'Doxorubicin',
 'Epothilone B',
 'Gefitinib',
 'Obatoclax Mesylate',
 'PHA-793887',
 'PI-103',
 'Vorinostat']

In [111]:
if dosage_shifted:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name))
else:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name))


single_drug_pred_df.loc[:, 'drug_id'] = single_drug_pred_df.loc[:, 'drug_id'].values.astype(str)
single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df.loc[:, 'drug_id'].values]

patient_list = sorted(list(set(single_drug_pred_df['patient'])))
# sel_drug_id_list = sorted(list(set(single_drug_pred_df['drug_id'])))

single_drug_pred_df.head()

Unnamed: 0,patient,drug_id,cluster,cluster_p,cluster_delta,delta,cluster_kill,kill,drug_name
0,HN120,1001,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.92135645703131|0.80215122911062|0.7541206387...,0.778419,34.555502207301|36.447143381563|37.22172227549...,34.397431,AICA Ribonucleotide
1,HN120,1003,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,1.0331982865939|1.1136227466817|1.545528432576...,1.198257,32.82394722215|31.606727585492|25.51599915092|...,28.089133,Camptothecin
2,HN120,1004,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,1.2259174280292|1.0336259642601|1.569763346432...,1.279129,29.948714769591|32.817411024087|25.19805576509...,26.92619,Vinblastine
3,HN120,1006,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.80494890047428|0.93639930344007|1.9116774819...,1.241399,36.402237164711|34.320082354212|20.99753745300...,27.852111,Cytarabine
4,HN120,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,3.0211910963965|2.6922616957096|4.045923540528...,3.252627,10.966865579688|13.398973100733|5.708576760594...,8.564893,Docetaxel


##### List all drug pairs

In [112]:
drug_combi_list = []
n_drugs = len(tested_drug_list)

for p in patient_list:
    for x in range(0, n_drugs-1):
        for y in range(x+1, n_drugs):
            drug_x = str(tested_drug_list[x])
            drug_y = str(tested_drug_list[y])

            drug_combi_list += [[p, drug_x, drug_y]]

drug_combi_df = pd.DataFrame(drug_combi_list, columns=['patient', 'A', 'B'])

print (drug_combi_df.shape)
drug_combi_df.head()

(216, 3)


Unnamed: 0,patient,A,B
0,HN120,1032,1007
1,HN120,1032,133
2,HN120,1032,201
3,HN120,1032,1010
4,HN120,1032,182


##### Get pred and info for each drug

In [113]:
merge_df = pd.merge(drug_combi_df, single_drug_pred_df, how='left', left_on=['patient', 'A'], right_on=['patient', 'drug_id'])
drug_combi_pred_df = pd.merge(merge_df, single_drug_pred_df[['patient', 'drug_id', 'drug_name', 'cluster_delta', 'delta', 'cluster_kill', 'kill']], how='left', left_on=['patient', 'B'], right_on=['patient', 'drug_id'], suffixes=['_A', '_B'])

In [114]:
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,drug_name_A,drug_id_B,drug_name_B,cluster_delta_B,delta_B,cluster_kill_B,kill_B
0,HN120,1032,1007,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,Afatinib,1007,Docetaxel,3.0211910963965|2.6922616957096|4.045923540528...,3.252627,10.966865579688|13.398973100733|5.708576760594...,8.564893
1,HN120,1032,133,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,Afatinib,133,Doxorubicin,-2.7687641637171|-2.4038647234774|-1.348018671...,-1.971728,87.20448532958|84.107280954653|71.796210371005...,76.009638
2,HN120,1032,201,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,Afatinib,201,Epothilone B,-2.266143477672|-1.6956751071344|0.32833731970...,-0.943108,82.789348726345|76.411112770076|44.33477944134...,61.321592
3,HN120,1032,1010,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,Afatinib,1010,Gefitinib,2.2120065715556|2.054458223014|2.7066706527924...,2.280762,17.751923626333|19.402871920459|13.28350439875...,15.290017
4,HN120,1032,182,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,Afatinib,182,Obatoclax Mesylate,-1.807312490317|-1.1619505525366|-0.6407362704...,-1.100298,77.777269427724|69.112899383857|60.92413858820...,65.180902


In [115]:
rows = []
for _, data in drug_combi_pred_df.iterrows():
    
    cluster_p = np.array([float(p) for p in data['cluster_p'].split('|')])
    
    cluster_kill_A = np.array([float(k) for k in data['cluster_kill_A'].split('|')])
    cluster_kill_B = np.array([float(k) for k in data['cluster_kill_B'].split('|')])
    
    cluster_kill_C = cluster_kill_A + cluster_kill_B - np.multiply(cluster_kill_A/100, cluster_kill_B/100)*100
    kill_C = np.sum(cluster_p * cluster_kill_C)
    
    best_kill = np.max([data['kill_A'], data['kill_B']])
    improve = kill_C - best_kill
    improve_p = (kill_C - best_kill) / best_kill
    
    ##### specificity (entropy) #####
    
    temp_A = np.sum(cluster_p[cluster_kill_A > cluster_kill_B])
    temp_B = np.sum(cluster_p[cluster_kill_A <= cluster_kill_B])
    if temp_A == 0 or temp_B == 0:
        entropy = 0
    else:
        entropy = -(temp_A * np.log2(temp_A) + temp_B * np.log2(temp_B))
    
    sum_kill_dif = np.sum(np.abs(cluster_kill_A - cluster_kill_B))
    
    ##### save output #####
    
    rows += [['|'.join(["{:.14}".format(k) for k in cluster_kill_C])] + [kill_C, improve, improve_p, entropy, sum_kill_dif]]

In [116]:
drug_combi_pred_df = pd.concat([drug_combi_pred_df, pd.DataFrame(rows, columns=['cluster_kill_C', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif'])], axis=1)
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,...,cluster_delta_B,delta_B,cluster_kill_B,kill_B,cluster_kill_C,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1032,1007,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,...,3.0211910963965|2.6922616957096|4.045923540528...,3.252627,10.966865579688|13.398973100733|5.708576760594...,8.564893,21.273450195084|24.387467007429|14.87089524639...,17.993416,7.594844,0.730374,0.726218,8.655712
1,HN120,1032,133,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,...,-2.7687641637171|-2.4038647234774|-1.348018671...,-1.971728,87.20448532958|84.107280954653|71.796210371005...,76.009638,88.6857097693|86.123851111422|74.536778857614|...,78.030314,2.020676,0.026584,0.0,277.130336
2,HN120,1032,201,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,...,-2.266143477672|-1.6956751071344|0.32833731970...,-0.943108,82.789348726345|76.411112770076|44.33477944134...,61.321592,84.7816747833|79.404222123078|49.74378125528|6...,64.847745,3.526153,0.057503,0.0,216.072352
3,HN120,1032,1010,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,...,2.2120065715556|2.054458223014|2.7066706527924...,2.280762,17.751923626333|19.402871920459|13.28350439875...,15.290017,27.273061617464|29.629552625174|21.70976548778...,23.989611,8.699594,0.568972,0.0,21.104018
4,HN120,1032,182,1032,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,2.9332839202109|2.7826317223779|3.215867841156...,2.882937,11.57612239814|12.688641578671|9.7170221543259...,10.398572,...,-1.807312490317|-1.1619505525366|-0.6407362704...,-1.100298,77.777269427724|69.112899383857|60.92413858820...,65.180902,80.34979991898|73.032052875083|64.72114869858|...,68.394327,3.213426,0.0493,0.0,229.128029


In [117]:
drug_combi_pred_df = drug_combi_pred_df[['patient', 'drug_id_A', 'drug_name_A', 'drug_id_B', 'drug_name_B', 'cluster', 'cluster_p', 'cluster_kill_A', 'cluster_kill_B', 'cluster_kill_C', 'kill_A', 'kill_B', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif']]

drug_combi_pred_df.head()

Unnamed: 0,patient,drug_id_A,drug_name_A,drug_id_B,drug_name_B,cluster,cluster_p,cluster_kill_A,cluster_kill_B,cluster_kill_C,kill_A,kill_B,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1032,Afatinib,1007,Docetaxel,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,11.57612239814|12.688641578671|9.7170221543259...,10.966865579688|13.398973100733|5.708576760594...,21.273450195084|24.387467007429|14.87089524639...,10.398572,8.564893,17.993416,7.594844,0.730374,0.726218,8.655712
1,HN120,1032,Afatinib,133,Doxorubicin,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,11.57612239814|12.688641578671|9.7170221543259...,87.20448532958|84.107280954653|71.796210371005...,88.6857097693|86.123851111422|74.536778857614|...,10.398572,76.009638,78.030314,2.020676,0.026584,0.0,277.130336
2,HN120,1032,Afatinib,201,Epothilone B,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,11.57612239814|12.688641578671|9.7170221543259...,82.789348726345|76.411112770076|44.33477944134...,84.7816747833|79.404222123078|49.74378125528|6...,10.398572,61.321592,64.847745,3.526153,0.057503,0.0,216.072352
3,HN120,1032,Afatinib,1010,Gefitinib,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,11.57612239814|12.688641578671|9.7170221543259...,17.751923626333|19.402871920459|13.28350439875...,27.273061617464|29.629552625174|21.70976548778...,10.398572,15.290017,23.989611,8.699594,0.568972,0.0,21.104018
4,HN120,1032,Afatinib,182,Obatoclax Mesylate,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,11.57612239814|12.688641578671|9.7170221543259...,77.777269427724|69.112899383857|60.92413858820...,80.34979991898|73.032052875083|64.72114869858|...,10.398572,65.180902,68.394327,3.213426,0.0493,0.0,229.128029


In [118]:
if dosage_shifted:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)
else:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}.csv'.format(ref_type, model_name), index=False)