In [None]:
import sys, os, pickle
import pandas as pd
import numpy as np
np.set_printoptions(precision=2)
from collections import Counter

import importlib
from ipywidgets import widgets
import warnings
warnings.filterwarnings('ignore')

scriptpath = ''
sys.path.append(os.path.abspath(scriptpath))

from cadrres_sc import pp, model, evaluation, utility

In [None]:
cell_line_obs_df = pd.read_csv('DRUGS_CELL_LINE_IC50_CTRPV2.csv', index_col=0)
cell_line_obs_df
cell_line_obs_df.index = cell_line_obs_df.index.astype(str)

# cell lines list which your model will be trained on
cell_line_sample_list = cell_line_obs_df.index.astype(str)

gene_exp_df = pd.read_csv('Training_data_CCLE_CTRPV2_CaDRReS-Sc.csv', sep=',', index_col=0)
print("Dataframe shape:", gene_exp_df.shape, "\n")
gene_exp_df.head(2)



cell_line_log2_mean_fc_exp_df, cell_line_mean_exp_df = pp.gexp.normalize_log2_mean_fc(gene_exp_df)
ess_gene_list = utility.get_gene_list('essential_genes.txt')
dataset_drug_df = pd.read_csv('Drug_conc_CTRPV2.csv', index_col=0)


In [None]:
kernel_feature_df = pp.gexp.calculate_kernel_feature(cell_line_log2_mean_fc_exp_df, cell_line_log2_mean_fc_exp_df, ess_gene_list).loc[cell_line_sample_list]


In [None]:
#Model training

# kernel feature based only on training samples
X_train = kernel_feature_df.loc[cell_line_sample_list, cell_line_sample_list]
# observed drug response
Y_train = cell_line_obs_df.loc[cell_line_sample_list]
obj_function = widgets.Dropdown(options=['cadrres-wo-sample-bias', 'cadrres-wo-sample-bias-weight'], description='Objetice function')
model_spec_name = obj_function.value # cadrres-wo-sample-bias | cadrres-wo-sample-bias-weight

indication_specific_degree = 1 # multiply by 1 = no indication-specific
# indication_specific_degree = 10

indication_specific_degree

output_dir = ''

sample_weights_logistic_x0_df = model.get_sample_weights_logistic_x0(dataset_drug_df, 'log2_max_conc', X_train.index)
indication_weight_df = pd.DataFrame(np.ones(Y_train.shape), index=Y_train.index, columns=Y_train.columns)


if model_spec_name in ['cadrres', 'cadrres-wo-sample-bias']:
	cadrres_model_dict, cadrres_output_dict = model.train_model(Y_train, X_train, Y_train, X_train, 10, 0.0, 100000, 0.01, model_spec_name=model_spec_name, save_interval=5000, output_dir=output_dir)
elif model_spec_name in ['cadrres-wo-sample-bias-weight']:
	cadrres_model_dict, cadrres_output_dict = model.train_model_logistic_weight(Y_train, X_train, Y_test, X_test, sample_weights_logistic_x0_df, indication_weight_df, 10, 0.0, 100000, 0.01, model_spec_name=model_spec_name, save_interval=5000, output_dir=output_dir)



print("Saving " + output_dir + "{}_param_dict.pickle".format(model_spec_name))
pickle.dump(cadrres_model_dict, open(output_dir + '{}_param_dict.pickle'.format(model_spec_name), 'wb'))
print("Saving " + output_dir + '{}_output_dict.pickle'.format(model_spec_name))
pickle.dump(cadrres_output_dict, open(output_dir + '{}_output_dict.pickle'.format(model_spec_name), 'wb'))

In [None]:
##testing
gene_exp_df1 = pd.read_csv('Test_data_CCLE_CTRP2_CaDRReS.csv', sep=',', index_col=0)
gene_exp_df1 = gene_exp_df1.groupby(gene_exp_df1.index).mean()
print("Dataframe shape:", gene_exp_df1.shape, "\n")
gene_exp_df1.head(2)
cell_line_log2_mean_fc_exp_df1, cell_line_mean_exp_df1 = pp.gexp.normalize_log2_mean_fc(gene_exp_df1)
kernel_df = pp.gexp.calculate_kernel_feature(cell_line_log2_mean_fc_exp_df1, cell_line_log2_mean_fc_exp_df, ess_gene_list)

In [None]:
model_dir = ''
obj_function = widgets.Dropdown(options=['cadrres-wo-sample-bias', 'cadrres-wo-sample-bias-weight'], description='Objetice function')
#choose which model you have trained previously
display(obj_function)

model_spec_name = obj_function.value
model_file = model_dir + '{}_param_dict.pickle'.format(model_spec_name)

cadrres_model = model.load_model(model_file)


In [None]:
pred_df, P_test_df= model.predict_from_model(cadrres_model, kernel_df, model_spec_name)
pred_df.to_csv("Test_predictions_CTRPV2.csv")