In [9]:
## Return predictive performance of a pathway
import time, os, math, random
from collections import defaultdict
import pandas as pd
import numpy as np
import scipy.stats as stat
from sklearn.preprocessing import StandardScaler # zscore standardization
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import lifelines; from lifelines.statistics import logrank_test; from lifelines import KaplanMeierFitter

## Initialize
#----------------------------------------------------------------------------------------------------------------------

test_types = {'coad':'IRINOTECAN'}
ML_algorithm_list = ['Ridge', 'SVR', 'LinearRegression']
pathway_source = 'reactome'
n_jobs = 10
network = 'STRING_700'
zscore_cutoff = -1.2816
testing_pathway_num = 10
tmp_dir = 'results/multi_pathway_predictions'

start_time = time.ctime()



## Parse proximal pathways
#--------------------------------------------------------------------------------------------------------------------
def return_proximal_pathways(network, pathway_source, zscore_cutoff):
	'''
	output = { drug : [ proximal pathways ] }
	'''
	output = defaultdict(list)
	if pathway_source.lower() == 'reactome':
		df = pd.read_csv('../data/coad_blca_organoid_drugs_zscore_result_reactome.txt', sep='\t')
		tmp_drugs = df.columns
		for tmp_drug in tmp_drugs:
			for drug in tmp_drug.split('_'):
				s= df[tmp_drug]
				z=pd.to_numeric(s, errors='coerce')
				pathway_list = list(df.loc[z<= zscore_cutoff, :]['Pathway'])
				output[drug.upper()] = pathway_list
	return output


"""
#Parse Drug IC50: 
#--------------------------------------------------------------------------------------------------------------------
def return_COAD_organoid_drug_response_IC50():
	'''
	output = { sample : { drug : IC50 } }
	output_drugList = [list of drugs tested ]
	---
	median IC50 values from di/triplicates are returned
	'''
	output = {} # { sample : { drug : IC50 } }
	drug_list =set() # [drugs]

	tmp = {} # { sample : { drug : [list of IC50]}}
	f = open('../data/Drug_IC50_67.txt', 'r')
	for line in f:
		line = line.strip().split('\t')
		if 'cell_line' in line[0]:
			sample_list=line[1:]
		else:
			drug, IC50_list = line[0].strip().upper(), list(map(float, line[1:]))
			for index, IC50 in enumerate(IC50_list):
				if IC50=='':
					pass
				else:
					sample = sample_list[index]
					if not sample in output:
						output[sample] = defaultdict(list)
					output[sample][drug.upper()].append(IC50)
			drug_list.add(drug.upper())
	f.close()
	return output, list(drug_list)
"""
## Output file
f=open('%s/multi_pathway_coefficients.txt'%(tmp_dir), 'w')
s='\t'.join(['cancer_type', 'drug', 'ML', 'pathway_rank', 'pathway', '5yr_responder', '5yr_nonresponder', 'pvalue'])
print(s, file=f)

fr=open('%s/multi_pathway_coefficients.txt'%(tmp_dir), 'w')
m='\t'.join(['cancer', 'drug','num_samples','ML', 'pathway', 'reg_coef', 'abs_reg_coef'])
print(m, file=f)

## Analyze
for ML in ML_algorithm_list:
	# set directory
	fo_dir = tmp_dir
	if os.path.isdir('%s/%s'%(fo_dir, ML)) == False:
		os.mkdir('%s/%s'%(fo_dir, ML))
	fo_dir = '%s/%s'%(fo_dir, ML)

	# cancer specific analysis
	for cancer_type in test_types:
		drug = test_types[cancer_type]
		print ('\n\n-----------------\ntesting for %s / %s / %s,  '%(cancer_type, drug, ML), time.ctime())


		## Import data
		c_dir = os.getcwd()
		os.chdir('../utilities')
		with open("Patient_clinical_Data.py") as f:
			code = compile(f.read(), "Patient_clinical_Data.py", 'exec')
			exec(code, globals())
		with open("Preclinical_Model_data.py") as f:
			code = compile(f.read(), "Preclinical_Model_data.py", 'exec')
			exec(code, globals())
            
		with open("LUAD_Organoid.py") as f:
			code = compile(f.read(), "LUAD_Organoid.py", 'exec')
			exec(code, globals())

		with open("GSEA.py") as f:
			code = compile(f.read(), "GSEA.py", 'exec')
			exec(code, globals())
		with open("parse_Drugbank.py") as f:
			code = compile(f.read(), "parse_Drugbank.py", 'exec')
			exec(code, globals())
            
		nes_dic = parse_ssGSEA_NES(cancer_type, 'organoid', pathway_source) 
		response_dic, drugList = return_COAD_organoid_drug_response_IC50()
		network_dic = return_proximal_pathways(network, pathway_source, zscore_cutoff)# { drug : [ pathways ] }

		drugPat, patDrug = parse_TCGA_drug_treatment_data(cancer_type)
		surDic = parse_TCGA_survival_data_boolean_format(cancer_type)
		patNES = parse_ssGSEA_NES(cancer_type, 'TCGA', pathway_source)
		#print(response_dic.keys())
		#print(nes_dic.keys())
		#print(surDic.keys())
        
		if pathway_source.lower() == 'reactome':
			feature_list = reactome_genes_uniprot()
			feature_list = feature_list.keys()
		#print(set(feature_list))
		os.chdir(c_dir)

		## feature_list // proximal pathways only
		for sample in nes_dic:
			feature_list = list(set(feature_list) & set(nes_dic[sample].keys()))
		for pat in list(set(surDic.keys())&set(patNES.keys())):
			feature_list = list(set(feature_list)&set(patNES[pat].keys()))
		feature_list = list(set(feature_list) & set(network_dic[drug]))
		print(len(feature_list))

		expList, samples, responses = [], [], []
		for sample in list(set(nes_dic.keys())&set(response_dic.keys())):
			if drug in response_dic[sample].keys():
				samples.append(sample)#; samples = sorted(samples, reverse=True)
				responses.append(response_dic[sample][drug])
                
		for sample in samples:
			tmp = []
			for feature in feature_list:
				if feature in network_dic[drug]: # proximal pathways only
					tmp.append(float(nes_dic[sample][feature]))
			expList.append(tmp)
		#print(expList)
		scaler = StandardScaler()
		scaler.fit(expList)
		scaled_expList = scaler.transform(expList) # scaled expression
		scaled_expList = np.array(scaled_expList)
        
		# regression (organoid)
		if ML == 'Ridge':
			regr = RidgeCV(cv=3, alphas=np.arange(0.1,1,0.1)).fit(scaled_expList, responses)
		if ML == 'SVR':
			regr = SVR(kernel='linear').fit(scaled_expList, responses)
		if ML == 'LinearRegression':
			regr = LinearRegression().fit(scaled_expList, responses)
		feature_importance = list(regr.coef_)
		if ML == 'SVR':
			feature_importance = feature_importance[0]
        
		#print(feature_importance)
		 #feature ranks
		coef_dic = {} # { feature : coefficient }
		abs_coef_dic = {}
		for feature, coef in zip(feature_list, feature_importance):
			# print regression coefficients
			pr='\t'.join(map(str, [cancer_type, drug, len(samples), ML, feature, coef, np.abs(coef)]))
			print(pr, file=fr)
			# coefficents
			coef_dic[feature] = coef
			abs_coef_dic[feature] = np.abs(coef)
		r = {key: rank for rank, key in enumerate(sorted(set(abs_coef_dic.values()), reverse=True), 1)}
		feature_rank_dic = {k: r[v] for k,v in abs_coef_dic.items()}
		feature_dict = {value:key for key, value in feature_rank_dic.items()}
		print(feature_dict[1])
		print(feature_dict[2])
		print(feature_dict[3])
		print(feature_dict[4])
		print(feature_dict[5])
		print(feature_dict[6])
		print(feature_dict[7])
		print(feature_dict[8])
		print(feature_dict[9])
		print(feature_dict[10])

        
		# scale expressions (patient)
		pat_expDic = {} # { pat : { feature : scaled expression } }
		pat_expList, pat_samples = [], []
		for pat in list(set(surDic.keys())&set(patNES.keys())):

			pat_samples.append(pat)
			tmp = []
			for feature in feature_list:
				tmp.append(patNES[pat][feature])
			pat_expList.append(tmp)
			#print(pat_expList)

		scaler = StandardScaler()
		scaler.fit(pat_expList)
		scaled_pat_expList = scaler.transform(pat_expList)

		for p_index, pat in enumerate(pat_samples):
			pat_expDic[pat] = {}
			for f_index, feature in enumerate(feature_list):
				pat_expDic[pat][feature] = scaled_pat_expList[p_index][f_index]        
		#print(pat_expDic) 
        
		# multi-pathway prediction
		testing_pathways = np.arange(2, np.min([testing_pathway_num, len(feature_list)])+1)
		for pathway_num in testing_pathways:
			pred_response = {} # { pat : predicted drug response }
			month_dic, status_dic = defaultdict(list), defaultdict(list)
			fiveYear_dic = {} # { predicted response : 5 year survival }
            
			# features
			features_used = []; coef_used = []
			for rank in np.arange(1, pathway_num+1):
				for fi_index, fi in enumerate(feature_importance):
					feature = feature_list[fi_index]
					if rank == feature_rank_dic[feature]:
						features_used.append(feature); coef_used.append(fi)
		print(features_used)
		print(coef_used)
        
        
		# predicted drug response (patient)
		pred_response = {} # { pat : predicted drug response }
		month_dic, status_dic = defaultdict(list), defaultdict(list)
		fiveYear_dic = {} # { predicted response : 5 year survival }

		for pat in list(set(pat_expDic.keys())&set(drugPat[drug])&set(surDic.keys())):
			print(pat)
			pred_r = 0
			for feature, coef in zip(features_used, coef_used):
				pred_r += coef * pat_expDic[pat][feature]
			pred_response[pat] = pred_r
		#print(pred_response)
        
        
		# classify patients
		response_cutoff = np.median(list(pred_response.values()))
		print(response_cutoff)
		resic=[]
		resicsam=[]
		nonresic=[]
		nonresicsam=[]
		for pat in pred_response:
			if pred_response[pat] <= response_cutoff:
				cls = 'Responder'
				resic.append(pred_response[pat])
				resicsam.append(pat)
			else:
				cls = 'Nonresponder'
				nonresic.append(pred_response[pat])
				nonresicsam.append(pat)
                
			month_dic[cls].append(surDic[pat]['months'])
			status_dic[cls].append(surDic[pat]['status'])  


		print('Rseponder IC50:') 
		print(resicsam)
		for sam,binny,sur in zip(resicsam, resic,month_dic['Responder']):
			print(sam,binny,sur) 

        
		print('Nonrseponder IC50:') 
		print(nonresicsam)
		for sam1,binny1,sur1 in zip(resicsam, resic,month_dic['Nonresponder']):
			print(sam1,binny1,sur1) 

		#print(status_dic['Responder'])
        
		# logrank Test
		results = logrank_test(month_dic['Responder'], month_dic['Nonresponder'], event_observed_A=status_dic['Responder'], event_observed_B=status_dic['Nonresponder'])
		pvalue = results.p_value
		
		for cls in month_dic:
			kmf = KaplanMeierFitter()
			kmf.fit(month_dic[cls], status_dic[cls])
			fiveYear_dic[cls] = kmf.predict(60)

		# draw survival plot
		f = plt.figure(figsize=(4,4))
		ax = f.add_subplot(1,1,1)
		plt.title('%s / %s / %s / %s\npvalue=%.4f\n'%(cancer_type, drug, ML, pathway_num, pvalue), fontsize=8)
		
		c1 = KaplanMeierFitter()
		ax = c1.fit(month_dic['Responder'], status_dic['Responder'], label='Responder (n=%s)'%len(month_dic['Responder'])).plot(ax=ax, ci_show=True, c='b')
		

		c2 = KaplanMeierFitter()
		ax = c1.fit(month_dic['Nonresponder'], status_dic['Nonresponder'], label='Nonresponder (n=%s)'%len(month_dic['Nonresponder'])).plot(ax=ax, ci_show=True, c='r')
		
		plt.xlabel('Survival (months)')
		plt.ylabel('Percent survival')
		ymin, ymax = 0, 1.1
		plt.ylim(ymin, ymax)
		plt.plot([60, 60], [ymin, ymax], c='k', linestyle='--')
		plt.tight_layout()
		plt.savefig('%s/%s_%s_rank_%s.jpg'%(fo_dir, cancer_type, drug, pathway_num), format='jpg')
		plt.savefig('%s/%s_%s_rank_%s.eps'%(fo_dir, cancer_type, drug, pathway_num), format='eps', dpi=300)
		plt.close()
		
		

print('process complete, start time: %s - end time: %s '%(start_time, time.ctime()))



-----------------
testing for coad / IRINOTECAN / Ridge,   Tue Apr 19 13:30:22 2022
76
REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION
REACTOME_REGULATION_OF_THE_FANCONI_ANEMIA_PATHWAY
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_LATE_ENDOSOME_MEMBRANE
REACTOME_P53_INDEPENDENT_G1_S_DNA_DAMAGE_CHECKPOINT
REACTOME_INTERFERON_GAMMA_SIGNALING
REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_EARLY_ENDOSOME_MEMBRANE
REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING
REACTOME_TELOMERE_MAINTENANCE
REACTOME_FANCONI_ANEMIA_PATHWAY
['REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION', 'REACTOME_REGULATION_OF_THE_FANCONI_ANEMIA_PATHWAY', 'REACTOME_SYNTHESIS_OF_PIPS_AT_THE_LATE_ENDOSOME_MEMBRANE', 'REACTOME_P53_INDEPENDENT_G1_S_DNA_DAMAGE_CHECKPOINT', 'REACTOME_INTERFERON_GAMMA_SIGNALING', 'REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY', 'REACTOME_SYNTHESIS_OF_PIPS_AT_THE_EARLY_ENDOSOME_MEMBRANE', 'REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SI

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.




-----------------
testing for coad / IRINOTECAN / SVR,   Tue Apr 19 13:30:28 2022


The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


76
REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION
REACTOME_REGULATION_OF_THE_FANCONI_ANEMIA_PATHWAY
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_LATE_ENDOSOME_MEMBRANE
REACTOME_P53_INDEPENDENT_G1_S_DNA_DAMAGE_CHECKPOINT
REACTOME_INTERFERON_GAMMA_SIGNALING
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_EARLY_ENDOSOME_MEMBRANE
REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY
REACTOME_TELOMERE_MAINTENANCE
REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING
REACTOME_FANCONI_ANEMIA_PATHWAY
['REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION', 'REACTOME_REGULATION_OF_THE_FANCONI_ANEMIA_PATHWAY', 'REACTOME_SYNTHESIS_OF_PIPS_AT_THE_LATE_ENDOSOME_MEMBRANE', 'REACTOME_P53_INDEPENDENT_G1_S_DNA_DAMAGE_CHECKPOINT', 'REACTOME_INTERFERON_GAMMA_SIGNALING', 'REACTOME_SYNTHESIS_OF_PIPS_AT_THE_EARLY_ENDOSOME_MEMBRANE', 'REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY', 'REACTOME_TELOMERE_MAINTENANCE', 'REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING', 'REACTOME_FANCONI_ANEMIA_PATHWAY']
[0.84502

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.




-----------------
testing for coad / IRINOTECAN / LinearRegression,   Tue Apr 19 13:30:35 2022


The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


76
REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION
REACTOME_REGULATION_OF_THE_FANCONI_ANEMIA_PATHWAY
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_LATE_ENDOSOME_MEMBRANE
REACTOME_INTERFERON_GAMMA_SIGNALING
REACTOME_P53_INDEPENDENT_G1_S_DNA_DAMAGE_CHECKPOINT
REACTOME_SYNTHESIS_OF_PIPS_AT_THE_EARLY_ENDOSOME_MEMBRANE
REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY
REACTOME_TELOMERE_MAINTENANCE
REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING
REACTOME_FANCONI_ANEMIA_PATHWAY
['REACTOME_HYALURONAN_UPTAKE_AND_DEGRADATION', 'REACTOME_REGULATION_OF_THE_FANCONI_ANEMIA_PATHWAY', 'REACTOME_SYNTHESIS_OF_PIPS_AT_THE_LATE_ENDOSOME_MEMBRANE', 'REACTOME_INTERFERON_GAMMA_SIGNALING', 'REACTOME_P53_INDEPENDENT_G1_S_DNA_DAMAGE_CHECKPOINT', 'REACTOME_SYNTHESIS_OF_PIPS_AT_THE_EARLY_ENDOSOME_MEMBRANE', 'REACTOME_DOWNREGULATION_OF_SMAD2_3_SMAD4_TRANSCRIPTIONAL_ACTIVITY', 'REACTOME_TELOMERE_MAINTENANCE', 'REACTOME_DOWNREGULATION_OF_TGF_BETA_RECEPTOR_SIGNALING', 'REACTOME_FANCONI_ANEMIA_PATHWAY']
[0.90515

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


process complete, start time: Tue Apr 19 13:30:22 2022 - end time: Tue Apr 19 13:30:41 2022 
