In [1]:
import os
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)

import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from collections import Counter

sns.set_style('ticks')

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 300
mpl.rc("savefig", dpi=300)

##### Read files and select drugs

In [2]:
# log2_median_ic50, log2_median_ic50_9f, log2_median_ic50_hn, log2_median_ic50_3f_hn, log2_median_ic50_9f_hn, log2_max_conc, log2_median_ic50_3f_hn
ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn
model_name = 'hn_drug_cw_dw10_100000_model'

# for each patient, if cell cluster is less than 5%, then we don't consider that cluster 
freq_cutoff = 0.05

# shift the dosage as GDSC experiment (Syto60) is less sensitive
dosage_shifted = False

In [3]:
drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)
drug_info_df.index = drug_info_df.index.astype(str)

drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name']))
print (drug_info_df.shape)

(81, 27)


In [4]:
tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302]
[d for d in tested_drug_list if d not in drug_info_df.index.astype(int)]

[]

##### Read predicted IC50

In [5]:
norm_type = 'patient_TPM'

In [6]:
cadrres_patient_df = pd.read_csv('../result/HN_model/{}/pred_gdsc_no_bias_{}.csv'.format(norm_type, model_name), index_col=0)
out_dir = '../result/HN_model/{}/'.format(norm_type)

In [7]:
cadrres_patient_df.head()

Unnamed: 0,1001,1003,1004,1006,1007,1010,1012,1014,1015,1016,...,299,301,302,303,305,306,308,328,331,346
HN120,10.582092,-6.99303,-6.289053,-0.678505,-6.666853,0.953735,0.099626,1.763727,2.467532,-1.884118,...,0.199231,2.577249,0.565079,2.664446,3.350329,2.539095,-1.974207,-2.879822,1.058784,-5.215218
HN137,10.342202,-7.301405,-7.577541,-1.305941,-8.006421,0.501566,0.019054,1.248602,2.418054,-1.999318,...,1.659174,2.934579,1.547792,3.141412,3.411621,2.536889,-2.161168,-3.057182,1.876328,-4.953847
HN148,10.686618,-5.308124,-4.918613,0.726391,-6.392437,-0.025692,0.962065,1.00888,2.257967,-1.227807,...,2.837512,3.486805,2.4872,4.079376,4.380871,3.243483,-0.153279,-0.816008,2.892805,-2.348328
HN159,11.391786,-5.094658,-5.172881,1.001772,-6.563778,1.594693,2.003104,3.03163,4.057887,-0.314321,...,0.28767,1.870677,0.331834,2.989429,3.922742,2.686959,0.606889,-1.700831,0.812231,-4.132381
HN160,10.757305,-5.201873,-6.008071,1.281516,-6.505566,1.036287,1.609897,3.500886,4.339465,-2.149608,...,0.01826,2.102799,-0.371211,3.355045,3.796809,2.410083,1.121997,-0.320819,1.262494,-2.120933


In [8]:
drug_list = drug_info_df.index
patient_list = cadrres_patient_df.index
print(len(drug_list), len(patient_list))

drug_info_df = drug_info_df.loc[drug_list]
cadrres_patient_df = cadrres_patient_df[drug_list]

81 6


In [9]:
if dosage_shifted:
    # Shift by 4 uM
    cadrres_patient_df = cadrres_patient_df - 2

##### List all pairs of patient and drug

In [10]:
pred_delta_df = pd.DataFrame(cadrres_patient_df.values - drug_info_df[ref_type].values, columns=drug_list, index=patient_list)
pred_cv_df = 100 / (1 + (np.power(2, -pred_delta_df)))
pred_kill_df = 100 - pred_cv_df

pred_kill_df.head()

Drug ID,1001,1003,1004,1006,1007,1010,1012,1014,1015,1016,...,299,301,302,303,305,306,308,328,331,346
HN120,39.049925,39.842087,35.492878,38.690576,10.27645,16.60154,60.440672,33.595293,49.59354,49.670575,...,97.186756,67.116298,84.901506,79.616156,77.146049,62.379669,81.139437,78.550861,78.60093,89.517887
HN137,43.071491,45.058686,57.338329,49.364222,22.472146,21.404472,61.767905,41.962207,50.450896,51.666222,...,92.624159,61.438141,73.995441,73.727766,76.388385,62.415545,83.04314,80.549491,67.576091,87.692203
HN148,37.339828,17.08044,17.546785,19.245484,8.650394,28.186133,45.662509,46.054172,53.220541,38.506675,...,84.729989,52.073647,59.73857,59.428795,62.299004,50.436243,54.907273,46.69321,50.744693,53.931666
HN159,26.767503,15.085639,20.243978,16.45141,9.636134,11.32051,28.996999,17.361429,24.626937,24.950061,...,97.014215,76.909501,86.859241,75.716769,69.419594,59.945694,41.824479,61.794804,81.335118,80.126241
HN160,36.200743,16.062505,31.169527,13.956344,9.290461,15.824357,34.910416,13.175963,21.185432,54.260814,...,97.510153,73.929604,91.496969,70.760664,71.240627,64.453888,33.469443,38.32672,76.130277,49.999347


In [11]:
single_drug_pred_df = pred_kill_df.stack().reset_index()
single_drug_pred_df.columns = ['patient', 'drug_id', 'kill']
single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df['drug_id'].values]
single_drug_pred_df.head()

Unnamed: 0,patient,drug_id,kill,drug_name
0,HN120,1001,39.049925,AICA Ribonucleotide
1,HN120,1003,39.842087,Camptothecin
2,HN120,1004,35.492878,Vinblastine
3,HN120,1006,38.690576,Cytarabine
4,HN120,1007,10.27645,Docetaxel


Save results

In [12]:
if dosage_shifted:
    single_drug_pred_df.to_csv(out_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)
else:
    single_drug_pred_df.to_csv(out_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name), index=False)