In [1]:
#Use the L1000_CTRP_24h model to predict the cell viability value of NCI60_L1000_24h
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_NCI60_matched\L1000_NCI60.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['sig_id','pert_id','pert_iname','pert_type','cell_id','pert_idose','pert_itime','inchi_key','pubchem_cid','SID','NSC','CONCUNIT','LCONC','PANEL','PANELNBR','CELLNBR','NLOGGI50','INDN','TOTN','STDDEV','Delta','IsEffective']]
    sig_GE_info = pd.read_csv(r"LINCS_NCI60_matched\L1000_NCI60_gene_expression.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='sig_id',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    meta_data = pd.DataFrame(sig_viability).loc[:,'sig_id':'IsEffective']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,meta_data
    
#Retrieving the data
gene_expression,meta_data = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)

#Create a new file to record the predicted cell viability values
cell_viability_file = open(r"cell_viability_pred\L1000_CTRP_24h_clpred_all.txt","w",encoding="utf-8")
cell_viability_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_idose\tpert_itime\tinchi_key\tpubchem_cid\tSID\tNSC\tCONCUNIT\tLCONC\tPANEL\tPANELNBR\tCELLNBR\tNLOGGI50\tINDN\tTOTN\tSTDDEV\tDelta\tIsEffective\tL1000_CTRP_24h_pred\n")
for index in range(len(meta_data)):
    cell_viability_file.write(str(meta_data.at[index,'sig_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_iname'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_type'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'cell_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_idose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_itime'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'inchi_key'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pubchem_cid'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'SID'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'NSC'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'CONCUNIT'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'LCONC'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'PANEL'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'PANELNBR'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'CELLNBR'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'NLOGGI50'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'INDN'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'TOTN'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'STDDEV'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'Delta'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'IsEffective'])+"\t")
    cell_viability_file.write(str(cl_prediction[index])+"\n")

cell_viability_file.close()

Model Loaded Successfully!


In [2]:
#Obtain the lowest cell viability value and output to the file: L1000_CTRP_24h_clpred_min.txt
import pandas as pd

predcv_file = pd.read_table(r"cell_viability_pred\L1000_CTRP_24h_clpred_all.txt")
min_predcv_file = open(r"cell_viability_pred\L1000_CTRP_24h_clpred_min.txt","w",encoding="utf-8")
min_predcv_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_idose\tpert_itime\tinchi_key\tpubchem_cid\tSID\tNSC\tCONCUNIT\tLCONC\tPANEL\tPANELNBR\tCELLNBR\tNLOGGI50\tINDN\tTOTN\tSTDDEV\tDelta\tIsEffective\tL1000_CTRP_24h_minpred\n")
for drug in predcv_file['pert_id'].unique():
    for cell_line in predcv_file[predcv_file['pert_id'] == drug]['cell_id'].unique():
        selected_info_filter = predcv_file[predcv_file['pert_id'] == drug][predcv_file['cell_id'] == cell_line]
        selected_info = selected_info_filter.reset_index(drop=False)
        min_cvpred = selected_info['L1000_CTRP_24h_pred'].min()
        min_cvpred_index = selected_info['L1000_CTRP_24h_pred'].argmin()
        if len(selected_info[selected_info['L1000_CTRP_24h_pred'] == min_cvpred]['IsEffective'].unique()) == 1:
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'sig_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_iname'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_type'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'cell_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_idose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_itime'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'inchi_key'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pubchem_cid'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'SID'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'NSC'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'CONCUNIT'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'LCONC'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'PANEL'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'PANELNBR'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'CELLNBR'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'NLOGGI50'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'INDN'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'TOTN'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'STDDEV'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'Delta'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'IsEffective'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_CTRP_24h_pred'])+"\n")
        
min_predcv_file.close()

  if __name__ == '__main__':
will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  if sys.path[0] == '':


1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


In [7]:
selected_info

Unnamed: 0,sig_id,pert_id,pert_iname,pert_type,cell_id,pert_idose,pert_itime,...,NLOGGI50,INDN,TOTN,STDDEV,Delta,IsEffective,L1000_CTRP_24h_pred
0,REP.A028_MCF7_24H:L19,BRD-A96292436,eflornithine,trt_cp,MCF7,10.0 um,24 h,...,2.301,2,3,0,-0.001,1,0.977796
1,REP.A028_MCF7_24H:L20,BRD-A96292436,eflornithine,trt_cp,MCF7,3.33 um,24 h,...,2.301,2,3,0,-0.001,1,0.947096
2,REP.A028_MCF7_24H:L21,BRD-A96292436,eflornithine,trt_cp,MCF7,1.11 um,24 h,...,2.301,2,3,0,-0.001,1,0.992117
3,REP.A028_MCF7_24H:L22,BRD-A96292436,eflornithine,trt_cp,MCF7,0.37 um,24 h,...,2.301,2,3,0,-0.001,1,0.97259
4,REP.A028_MCF7_24H:L23,BRD-A96292436,eflornithine,trt_cp,MCF7,0.12 um,24 h,...,2.301,2,3,0,-0.001,1,0.947762
5,REP.A028_MCF7_24H:L24,BRD-A96292436,eflornithine,trt_cp,MCF7,0.04 um,24 h,...,2.301,2,3,0,-0.001,1,1.027409
