In [11]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from collections import Counter

sns.set_style('ticks')

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
mpl.rc("savefig", dpi=300)

from scipy.special import xlogy

##### Read files and select drugs

In [12]:
ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn
model_name = 'RWEN' 

dosage_shifted = False

In [13]:
norm_type = 'TPM'

current_dir = '../result/HN_model/{}/'.format(norm_type)

In [14]:
drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)
drug_info_df.index = drug_info_df.index.astype(str)

drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name'].values))

drug_info_df.head()

Unnamed: 0_level_0,Drug Name,Synonyms,Target,Target Pathway,Selleckchem Cat#,CAS number,PubCHEM,Others,entropy,max_conc,...,median_ic50_9f,log2_median_ic50_9f,log2_median_ic50_hn,median_ic50_hn,median_ic50_3f_hn,log2_median_ic50_3f_hn,median_ic50_9f_hn,log2_median_ic50_9f_hn,num_sensitive,num_sensitive_hn
Drug ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,AICA Ribonucleotide,"AICAR, N1-(b-D-Ribofuranosyl)-5-aminoimidazole...",AMPK agonist,Metabolism,S1802,2627-69-2,65110,,6.034272,2000.0,...,206.74838,7.691732,9.939784,982.139588,327.379863,8.354822,109.126621,6.769859,476,27
1003,Camptothecin,"7-Ethyl-10-Hydroxy-Camptothecin, SN-38, Irinot...",TOP1,DNA replication,S1288,7689-03-4,104842,"(SN-38, S4908, 86639-52-3) (Irinotecan, S1198,...",4.60953,0.1,...,0.002003,-8.963413,-7.587491,0.005199,0.001733,-9.172454,0.000578,-10.757416,688,30
1004,Vinblastine,Velban,Microtubule destabiliser,Mitosis,S1248,143-67-9,6710780,,4.297122,0.1,...,0.001599,-9.289051,-7.150982,0.007036,0.002345,-8.735945,0.000782,-10.320907,753,33
1006,Cytarabine,"Ara-Cytidine, Arabinosyl Cytosine, U-19920",Antimetabolite,DNA replication,S1648,147-94-4,6253,,6.646594,2.0,...,0.163032,-2.616771,-1.342632,0.394301,0.131434,-2.927594,0.043811,-4.512557,508,25
1007,Docetaxel,"RP-56976, Taxotere",Microtubule stabiliser,Mitosis,S1148,114977-28-5,148124,,4.220984,0.0125,...,0.000761,-10.358915,-9.792998,0.001127,0.000376,-11.37796,0.000125,-12.962923,584,32


In [21]:
tested_drug_list = [1007, 133, 201, 1010] + [182, 301, 302] + [1012]
[drug_id_name_dict[str(d)] for d in tested_drug_list]

['Docetaxel',
 'Doxorubicin',
 'Epothilone B',
 'Gefitinib',
 'Obatoclax Mesylate',
 'PHA-793887',
 'PI-103',
 'Vorinostat']

In [22]:
if dosage_shifted:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name))
else:
    single_drug_pred_df = pd.read_csv(current_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name))


single_drug_pred_df.loc[:, 'drug_id'] = single_drug_pred_df.loc[:, 'drug_id'].values.astype(str)
single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df.loc[:, 'drug_id'].values]

patient_list = sorted(list(set(single_drug_pred_df['patient'])))
# sel_drug_id_list = sorted(list(set(single_drug_pred_df['drug_id'])))

single_drug_pred_df.head()

Unnamed: 0,patient,drug_id,cluster,cluster_p,cluster_delta,delta,cluster_kill,kill,drug_name
0,HN120,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel
1,HN120,133,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-2.8334341716821|-2.6720860912117|-3.778959838...,-3.028104,87.69637799287|86.437921582584|93.209818273866...,85.297489,Doxorubicin
2,HN120,201,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-4.0213711434516|-3.6385077224033|-3.038971912...,-3.319798,94.199124702299|92.56712528151|89.152897129961...,87.040929,Epothilone B
3,HN120,1010,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,1.8712695534725|1.0992860185533|3.087297545393...,2.176924,21.465935078167|31.821936069363|10.52739493194...,17.366424,Gefitinib
4,HN120,182,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-1.690692592809|-1.3450796341807|-0.8269725475...,-1.173359,76.348806194752|71.754940580896|63.95043861070...,66.444651,Obatoclax Mesylate


In [23]:
single_drug_pred_df

Unnamed: 0,patient,drug_id,cluster,cluster_p,cluster_delta,delta,cluster_kill,kill,drug_name
0,HN120,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel
1,HN120,133,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-2.8334341716821|-2.6720860912117|-3.778959838...,-3.028104,87.69637799287|86.437921582584|93.209818273866...,85.297489,Doxorubicin
2,HN120,201,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-4.0213711434516|-3.6385077224033|-3.038971912...,-3.319798,94.199124702299|92.56712528151|89.152897129961...,87.040929,Epothilone B
3,HN120,1010,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,1.8712695534725|1.0992860185533|3.087297545393...,2.176924,21.465935078167|31.821936069363|10.52739493194...,17.366424,Gefitinib
4,HN120,182,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-1.690692592809|-1.3450796341807|-0.8269725475...,-1.173359,76.348806194752|71.754940580896|63.95043861070...,66.444651,Obatoclax Mesylate
5,HN120,301,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-1.5438814859345|-3.2564531480835|-3.508445283...,-2.726141,74.462298740991|90.526989849554|91.92254116809...,81.894659,PHA-793887
6,HN120,302,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-2.9431734242158|-5.9272591669807|-5.722303936...,-4.37904,88.493859956725|98.383266503135|98.14105898102...,89.751533,PI-103
7,HN120,1012,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,-1.5582956426338|-1.3535012950427|-0.684705475...,-1.075217,74.651824912916|71.873099117706|61.64723300190...,65.000026,Vorinostat
8,HN137,1007,E1|E2|E3|F1|F2|F3,0.34090909090909|0.085227272727273|0.073863636...,0.79370397506051|1.2332333929925|1.36138879750...,0.556959,36.582875652674|29.8424353741|28.016510025631|...,39.870749,Docetaxel
9,HN137,133,E1|E2|E3|F1|F2|F3,0.34090909090909|0.085227272727273|0.073863636...,-2.3211293510138|-3.0366930917662|-1.619429529...,-2.528622,83.325642371615|89.137612549776|75.44527042929...,83.764686,Doxorubicin


##### List all drug pairs

In [24]:
drug_combi_list = []
n_drugs = len(tested_drug_list)

for p in patient_list:
    for x in range(0, n_drugs-1):
        for y in range(x+1, n_drugs):
            drug_x = str(tested_drug_list[x])
            drug_y = str(tested_drug_list[y])

            drug_combi_list += [[p, drug_x, drug_y]]

drug_combi_df = pd.DataFrame(drug_combi_list, columns=['patient', 'A', 'B'])

print (drug_combi_df.shape)
drug_combi_df.head()

(168, 3)


Unnamed: 0,patient,A,B
0,HN120,1007,133
1,HN120,1007,201
2,HN120,1007,1010
3,HN120,1007,182
4,HN120,1007,301


##### Get pred and info for each drug

In [25]:
merge_df = pd.merge(drug_combi_df, single_drug_pred_df, how='left', left_on=['patient', 'A'], right_on=['patient', 'drug_id'])
drug_combi_pred_df = pd.merge(merge_df, single_drug_pred_df[['patient', 'drug_id', 'drug_name', 'cluster_delta', 'delta', 'cluster_kill', 'kill']], how='left', left_on=['patient', 'B'], right_on=['patient', 'drug_id'], suffixes=['_A', '_B'])

In [26]:
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,drug_name_A,drug_id_B,drug_name_B,cluster_delta_B,delta_B,cluster_kill_B,kill_B
0,HN120,1007,133,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel,133,Doxorubicin,-2.8334341716821|-2.6720860912117|-3.778959838...,-3.028104,87.69637799287|86.437921582584|93.209818273866...,85.297489
1,HN120,1007,201,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel,201,Epothilone B,-4.0213711434516|-3.6385077224033|-3.038971912...,-3.319798,94.199124702299|92.56712528151|89.152897129961...,87.040929
2,HN120,1007,1010,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel,1010,Gefitinib,1.8712695534725|1.0992860185533|3.087297545393...,2.176924,21.465935078167|31.821936069363|10.52739493194...,17.366424
3,HN120,1007,182,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel,182,Obatoclax Mesylate,-1.690692592809|-1.3450796341807|-0.8269725475...,-1.173359,76.348806194752|71.754940580896|63.95043861070...,66.444651
4,HN120,1007,301,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,Docetaxel,301,PHA-793887,-1.5438814859345|-3.2564531480835|-3.508445283...,-2.726141,74.462298740991|90.526989849554|91.92254116809...,81.894659


In [27]:
rows = []
for _, data in drug_combi_pred_df.iterrows():
    
    cluster_p = np.array([float(p) for p in data['cluster_p'].split('|')])
    
    cluster_kill_A = np.array([float(k) for k in data['cluster_kill_A'].split('|')])
    cluster_kill_B = np.array([float(k) for k in data['cluster_kill_B'].split('|')])
    
    cluster_kill_C = cluster_kill_A + cluster_kill_B - np.multiply(cluster_kill_A/100, cluster_kill_B/100)*100
    kill_C = np.sum(cluster_p * cluster_kill_C)
    
    best_kill = np.max([data['kill_A'], data['kill_B']])
    improve = kill_C - best_kill
    improve_p = (kill_C - best_kill) / best_kill
    
    ##### specificity (entropy) #####
    
    temp_A = np.sum(cluster_p[cluster_kill_A > cluster_kill_B])
    temp_B = np.sum(cluster_p[cluster_kill_A <= cluster_kill_B])
    if temp_A == 0 or temp_B == 0:
        entropy = 0
    else:
        entropy = -(temp_A * np.log2(temp_A) + temp_B * np.log2(temp_B))
    
    sum_kill_dif = np.sum(np.abs(cluster_kill_A - cluster_kill_B))
    
    ##### save output #####
    
    rows += [['|'.join(["{:.14}".format(k) for k in cluster_kill_C])] + [kill_C, improve, improve_p, entropy, sum_kill_dif]]

In [28]:
drug_combi_pred_df = pd.concat([drug_combi_pred_df, pd.DataFrame(rows, columns=['cluster_kill_C', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif'])], axis=1)
drug_combi_pred_df.head()

Unnamed: 0,patient,A,B,drug_id_A,cluster,cluster_p,cluster_delta_A,delta_A,cluster_kill_A,kill_A,...,cluster_delta_B,delta_B,cluster_kill_B,kill_B,cluster_kill_C,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1007,133,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,...,-2.8334341716821|-2.6720860912117|-3.778959838...,-3.028104,87.69637799287|86.437921582584|93.209818273866...,85.297489,93.726706155852|94.500965518187|95.98514681760...,90.008341,4.710851,0.055228,0.0,169.970362
1,HN120,1007,201,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,...,-4.0213711434516|-3.6385077224033|-3.038971912...,-3.319798,94.199124702299|92.56712528151|89.152897129961...,87.040929,97.042285980937|96.986182123569|93.58639764972...,90.673498,3.632569,0.041734,0.0,178.638303
2,HN120,1007,1010,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,...,1.8712695534725|1.0992860185533|3.087297545393...,2.176924,21.465935078167|31.821936069363|10.52739493194...,17.366424,59.957542116904|72.355747185743|47.09723720467...,53.049766,8.722693,0.19678,0.0,111.481073
3,HN120,1007,182,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,...,-1.690692592809|-1.3450796341807|-0.8269725475...,-1.173359,76.348806194752|71.754940580896|63.95043861070...,66.444651,87.940877213293|88.547437132715|78.68485673796...,79.552638,13.107986,0.197277,0.0,91.931882
4,HN120,1007,301,1007,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,0.056986626834624|-0.5521462503121|0.532688663...,0.187198,49.012625383709|59.452863251762|40.87266962319...,44.327073,...,-1.5438814859345|-3.2564531480835|-3.508445283...,-2.726141,74.462298740991|90.526989849554|91.92254116809...,81.894659,86.97899659068|96.158965620124|95.224014230406...,88.213137,6.318478,0.077154,0.0,163.355724


In [29]:
drug_combi_pred_df = drug_combi_pred_df[['patient', 'drug_id_A', 'drug_name_A', 'drug_id_B', 'drug_name_B', 'cluster', 'cluster_p', 'cluster_kill_A', 'cluster_kill_B', 'cluster_kill_C', 'kill_A', 'kill_B', 'kill_C', 'improve', 'improve_p', 'kill_entropy', 'sum_kill_dif']]

drug_combi_pred_df.head()

Unnamed: 0,patient,drug_id_A,drug_name_A,drug_id_B,drug_name_B,cluster,cluster_p,cluster_kill_A,cluster_kill_B,cluster_kill_C,kill_A,kill_B,kill_C,improve,improve_p,kill_entropy,sum_kill_dif
0,HN120,1007,Docetaxel,133,Doxorubicin,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,49.012625383709|59.452863251762|40.87266962319...,87.69637799287|86.437921582584|93.209818273866...,93.726706155852|94.500965518187|95.98514681760...,44.327073,85.297489,90.008341,4.710851,0.055228,0.0,169.970362
1,HN120,1007,Docetaxel,201,Epothilone B,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,49.012625383709|59.452863251762|40.87266962319...,94.199124702299|92.56712528151|89.152897129961...,97.042285980937|96.986182123569|93.58639764972...,44.327073,87.040929,90.673498,3.632569,0.041734,0.0,178.638303
2,HN120,1007,Docetaxel,1010,Gefitinib,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,49.012625383709|59.452863251762|40.87266962319...,21.465935078167|31.821936069363|10.52739493194...,59.957542116904|72.355747185743|47.09723720467...,44.327073,17.366424,53.049766,8.722693,0.19678,0.0,111.481073
3,HN120,1007,Docetaxel,182,Obatoclax Mesylate,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,49.012625383709|59.452863251762|40.87266962319...,76.348806194752|71.754940580896|63.95043861070...,87.940877213293|88.547437132715|78.68485673796...,44.327073,66.444651,79.552638,13.107986,0.197277,0.0,91.931882
4,HN120,1007,Docetaxel,301,PHA-793887,D1|D2|G1|G2,0.31318681318681|0.17582417582418|0.3406593406...,49.012625383709|59.452863251762|40.87266962319...,74.462298740991|90.526989849554|91.92254116809...,86.97899659068|96.158965620124|95.224014230406...,44.327073,81.894659,88.213137,6.318478,0.077154,0.0,163.355724


In [30]:
if dosage_shifted:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)
else:
    drug_combi_pred_df.to_csv(current_dir + 'pred_combi_kill_{}_{}.csv'.format(ref_type, model_name), index=False)