In [1]:
#Testing the CTRP-L1000-3h data by using the CTRP-L1000-24h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CTRP_withoutdose\LINCS_CTRP_withoutdose_3h.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','dose','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"LINCS_CTRP_withoutdose\LINCS_gene_expression_3h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'780':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.4965149, 2.2118678941024386e-71)
MSE_Test: 0.060


In [2]:
#Testing the CTRP-L1000-6h data by using the CTRP-L1000-24h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CTRP_withoutdose\LINCS_CTRP_withoutdose_6h.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','dose','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"LINCS_CTRP_withoutdose\LINCS_gene_expression_6h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.503472, 0.0)
MSE_Test: 0.093


In [4]:
#Testing the Achilles-L1000-96h data by using the CTRP-L1000-24h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_96h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_96h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.33191863, 0.0)
MSE_Test: 4.608


In [5]:
#Testing the Achilles-L1000-120h data by using the CTRP-L1000-24h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_120h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_120h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.28972447, 1.3938022536323912e-279)
MSE_Test: 3.898


In [6]:
#Testing the Achilles-L1000-144h data by using the CTRP-L1000-24h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_144h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_144h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.16607329, 1.026515000939485e-65)
MSE_Test: 4.070
