In [1]:
#Testing the CTRP-L1000-3h data by using the CTRP-L1000-6h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CTRP_withoutdose\LINCS_CTRP_withoutdose_3h.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','dose','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"LINCS_CTRP_withoutdose\LINCS_gene_expression_3h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'780':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'4609': 0.0, '5427': 3.5188933726509135, '55818': 5.022672047181096, '5997': 5.962213254698173, '665': 6.481106627349087, '6804': 8.45843458016799, '1831': 10.549122768892376, '3725': 13.962213254698172, '23212': 17.075573490603652, '4144': 20.87908241503415, '10695': 20.939541207517077, '79073': 21.617138910435663, '9761': 22.90175446221525, '23210': 23.87152506597379, '23636': 23.98488530187927, '1647': 26.52645072171128, '230': 28.299730249900318, '8870': 30.136032283086575, '79071': 31.863967716913425, '4864': 33.022672047181096, '1050': 36.6095815613753, '1958': 36.939541207517074, '4616': 39.68515505197895, '54541': 40.68515505197895, '622': 40.730499146341145, '8396': 46.79595157537013, '6709': 47.31484494802105, '9531': 47.488663976409455, '11041': 58.81362998600516, '26036': 59.780836877249406, '991': 61.17125531587411, '7538': 61.8060726369448, '3312': 63.14358963214694, '5092': 66.30985131147499, '8318': 68.55668011795274, '80746': 70.46855564174265, '6810': 70.67759770291859, '10493': 74.17881266493447, '5696': 75.56423746701311, '22823': 82.68259133946465, '6182': 87.9118755237899, '23338': 91.79851528788444, '4043': 99.67759770291859, '1950': 101.92186279688204, '3162': 102.6296898960421, '9903': 102.86653142942772, '23300': 103.934547570971, '6772': 105.3904184386247, '10285': 105.97233431627284, '10915': 106.00256371251429, '79850': 106.1712553158741, '3337': 106.42820518392652, '874': 108.6322536085564, '22809': 108.79338786285585, '11188': 111.85897408036735, '22827': 114.78839422630976, '5467': 115.56923110355918, '4303': 119.07813720311796, '51719': 120.11079652339119, '51070': 126.07300977808936, '9686': 126.1964910755695, '8878': 127.0025637125143, '6856': 127.72550550979508, '949': 132.22928418432525, '25932': 132.63468353258816, '5359': 134.89419711315486, '5048': 136.31740866053536, '3066': 138.02010833466682, '79143': 138.9319838584567, '22889': 141.75560111755402, '25805': 143.51133602359056, '10112': 143.94709855657746, '5985': 143.95209219312352, '2274': 146.23684153338561, '5909': 149.81106627349087, '80347': 150.09824553778478, '8508': 150.16126804278196, '51282': 150.40553313674542, '3682': 152.72037808476645, '890': 152.83630203318626, '11344': 153.94709855657746, '3978': 153.97989166533318, '8826': 155.65735557976924, '6603': 156.11835387245154, '10782': 156.41052677329148, '25793': 156.48367033986338, '392': 158.71538444822042, '148022': 158.9798916653332, '1677': 159.5668011795274, '24149': 161.56180754298134, '3108': 166.55411640543844, '4793': 169.03022939624148, '2771': 169.60445413634673, '5708': 170.29473661335425, '10775': 177.82861089564338, '7398': 179.392982151139, '10559': 182.0025637125143, '9143': 182.83886574570056, '211': 182.9319838584567, '993': 183.84142945821486, '9688': 188.61970262294997, '10237': 190.14601955617871, '8624': 190.3425106317482, '8480': 191.94966226909173, '4927': 194.53144435825735, '5110': 194.6952761135536, '11065': 196.55425019392098, '8726': 196.81619369851947, '11044': 197.94709855657746, '54505': 201.8287446841259, '55324': 202.1763827409027, '6390': 207.41309048580578, '1019': 209.77341331667157, '4216': 213.20404842462986, '55556': 213.80350892443047, '23077': 214.8967608256692, '4846': 215.28218562774782, '10953': 215.83373832067196, '10320': 216.05802886845115, '6622': 216.10337296281335, '1021': 217.74548005597933, '4791': 218.62982368452464, '3098': 219.29217290083994, '3383': 221.01754462215249, '6657': 221.4836703398634, '22883': 221.64224088164852, '3028': 221.82618097161162, '10058': 221.91943287285025, '4775': 222.31727487205282, '200081': 222.3904184386247, '8061': 222.42833897240905, '9653': 222.90175446221522, '66008': 222.91686916033598, '10589': 223.48367033986338, '9918': 226.3476380567768, '11073': 227.33751699520218, '11230': 228.35519540583718, '23': 228.8186236225512, '10857': 229.77584324070332, '10681': 229.8941971131549, '1759': 229.92186279688204, '51422': 229.99487257497142, '8720': 230.78083687724944, '5641': 230.78839422630978, '1906': 231.00755734906036, '8349': 231.76572217912866, '8985': 232.33751699520215, '10206': 233.40553313674542, '23588': 233.96221325469818, '9217': 234.33751699520212, '701': 234.3450743442625, '9276': 234.39541207517078, '10898': 235.39541207517078, '6117': 236.45087723110763, '836': 236.89919074970095, '58497': 237.39797578768508, '4860': 237.46599192922838, '9375': 237.90931181127564, '3638': 238.32240229708142, '2523': 238.39797578768508, '5580': 238.87651870251983, '1111': 239.01268477408897, '466': 239.21916312275062, '873': 239.44831351859335, '9868': 239.89163340064061, '7077': 239.96720689124425, '9170': 240.92186279688204, '7157': 241.3048576749289, '22908': 241.49365761295553, '29890': 241.52388700919695, '6850': 242.17138910435665, '7168': 242.38286108956433, '11157': 242.9974362874857, '84722': 243.53144435825735, '23368': 244.244398882446, '23244': 244.46585814074584, '8553': 245.92429272091383, '9897': 246.5390017073177, '868': 246.85128294282447, '3156': 247.01255098560645, '23585': 247.86140400439913, '23386': 248.50877231107623, '5743': 248.54898898040986, '3895': 249.4785429148348, '5289': 250.87395499000556, '51097': 252.3425106317482, '6275': 254.00499363654606, '1848': 254.65222815474064, '80204': 258.6574893682518, '51031': 259.8892034766088, '10644': 261.5693648920417, '5588': 262.23184789683955, '7994': 262.3375169952021}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-6h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.61481667, 1.9243081078359795e-118)
MSE_Test: 0.041


In [2]:
##Testing the CTRP-L1000-24h data by using the CTRP-L1000-6h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Testing the CTRP-L1000-24h data by using the CTRP-L1000-6h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CTRP_withoutdose\LINCS_CTRP_withoutdose_24h.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','dose','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"LINCS_CTRP_withoutdose\LINCS_gene_expression_24h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'4609': 0.0, '5427': 3.5188933726509135, '55818': 5.022672047181096, '5997': 5.962213254698173, '665': 6.481106627349087, '6804': 8.45843458016799, '1831': 10.549122768892376, '3725': 13.962213254698172, '23212': 17.075573490603652, '4144': 20.87908241503415, '10695': 20.939541207517077, '79073': 21.617138910435663, '9761': 22.90175446221525, '23210': 23.87152506597379, '23636': 23.98488530187927, '1647': 26.52645072171128, '230': 28.299730249900318, '8870': 30.136032283086575, '79071': 31.863967716913425, '4864': 33.022672047181096, '1050': 36.6095815613753, '1958': 36.939541207517074, '4616': 39.68515505197895, '54541': 40.68515505197895, '622': 40.730499146341145, '8396': 46.79595157537013, '6709': 47.31484494802105, '9531': 47.488663976409455, '11041': 58.81362998600516, '26036': 59.780836877249406, '991': 61.17125531587411, '7538': 61.8060726369448, '3312': 63.14358963214694, '5092': 66.30985131147499, '8318': 68.55668011795274, '80746': 70.46855564174265, '6810': 70.67759770291859, '10493': 74.17881266493447, '5696': 75.56423746701311, '22823': 82.68259133946465, '6182': 87.9118755237899, '23338': 91.79851528788444, '4043': 99.67759770291859, '1950': 101.92186279688204, '3162': 102.6296898960421, '9903': 102.86653142942772, '23300': 103.934547570971, '6772': 105.3904184386247, '10285': 105.97233431627284, '10915': 106.00256371251429, '79850': 106.1712553158741, '3337': 106.42820518392652, '874': 108.6322536085564, '22809': 108.79338786285585, '11188': 111.85897408036735, '22827': 114.78839422630976, '5467': 115.56923110355918, '4303': 119.07813720311796, '51719': 120.11079652339119, '51070': 126.07300977808936, '9686': 126.1964910755695, '8878': 127.0025637125143, '6856': 127.72550550979508, '949': 132.22928418432525, '25932': 132.63468353258816, '5359': 134.89419711315486, '5048': 136.31740866053536, '3066': 138.02010833466682, '79143': 138.9319838584567, '22889': 141.75560111755402, '25805': 143.51133602359056, '10112': 143.94709855657746, '5985': 143.95209219312352, '2274': 146.23684153338561, '5909': 149.81106627349087, '80347': 150.09824553778478, '8508': 150.16126804278196, '51282': 150.40553313674542, '3682': 152.72037808476645, '890': 152.83630203318626, '11344': 153.94709855657746, '3978': 153.97989166533318, '8826': 155.65735557976924, '6603': 156.11835387245154, '10782': 156.41052677329148, '25793': 156.48367033986338, '392': 158.71538444822042, '148022': 158.9798916653332, '1677': 159.5668011795274, '24149': 161.56180754298134, '3108': 166.55411640543844, '4793': 169.03022939624148, '2771': 169.60445413634673, '5708': 170.29473661335425, '10775': 177.82861089564338, '7398': 179.392982151139, '10559': 182.0025637125143, '9143': 182.83886574570056, '211': 182.9319838584567, '993': 183.84142945821486, '9688': 188.61970262294997, '10237': 190.14601955617871, '8624': 190.3425106317482, '8480': 191.94966226909173, '4927': 194.53144435825735, '5110': 194.6952761135536, '11065': 196.55425019392098, '8726': 196.81619369851947, '11044': 197.94709855657746, '54505': 201.8287446841259, '55324': 202.1763827409027, '6390': 207.41309048580578, '1019': 209.77341331667157, '4216': 213.20404842462986, '55556': 213.80350892443047, '23077': 214.8967608256692, '4846': 215.28218562774782, '10953': 215.83373832067196, '10320': 216.05802886845115, '6622': 216.10337296281335, '1021': 217.74548005597933, '4791': 218.62982368452464, '3098': 219.29217290083994, '3383': 221.01754462215249, '6657': 221.4836703398634, '22883': 221.64224088164852, '3028': 221.82618097161162, '10058': 221.91943287285025, '4775': 222.31727487205282, '200081': 222.3904184386247, '8061': 222.42833897240905, '9653': 222.90175446221522, '66008': 222.91686916033598, '10589': 223.48367033986338, '9918': 226.3476380567768, '11073': 227.33751699520218, '11230': 228.35519540583718, '23': 228.8186236225512, '10857': 229.77584324070332, '10681': 229.8941971131549, '1759': 229.92186279688204, '51422': 229.99487257497142, '8720': 230.78083687724944, '5641': 230.78839422630978, '1906': 231.00755734906036, '8349': 231.76572217912866, '8985': 232.33751699520215, '10206': 233.40553313674542, '23588': 233.96221325469818, '9217': 234.33751699520212, '701': 234.3450743442625, '9276': 234.39541207517078, '10898': 235.39541207517078, '6117': 236.45087723110763, '836': 236.89919074970095, '58497': 237.39797578768508, '4860': 237.46599192922838, '9375': 237.90931181127564, '3638': 238.32240229708142, '2523': 238.39797578768508, '5580': 238.87651870251983, '1111': 239.01268477408897, '466': 239.21916312275062, '873': 239.44831351859335, '9868': 239.89163340064061, '7077': 239.96720689124425, '9170': 240.92186279688204, '7157': 241.3048576749289, '22908': 241.49365761295553, '29890': 241.52388700919695, '6850': 242.17138910435665, '7168': 242.38286108956433, '11157': 242.9974362874857, '84722': 243.53144435825735, '23368': 244.244398882446, '23244': 244.46585814074584, '8553': 245.92429272091383, '9897': 246.5390017073177, '868': 246.85128294282447, '3156': 247.01255098560645, '23585': 247.86140400439913, '23386': 248.50877231107623, '5743': 248.54898898040986, '3895': 249.4785429148348, '5289': 250.87395499000556, '51097': 252.3425106317482, '6275': 254.00499363654606, '1848': 254.65222815474064, '80204': 258.6574893682518, '51031': 259.8892034766088, '10644': 261.5693648920417, '5588': 262.23184789683955, '7994': 262.3375169952021}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-6h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.7415961, 0.0)
MSE_Test: 0.044


In [3]:
##Testing the Achilles-L1000-96h data by using the CTRP-L1000-6h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_96h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_96h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'4609': 0.0, '5427': 3.5188933726509135, '55818': 5.022672047181096, '5997': 5.962213254698173, '665': 6.481106627349087, '6804': 8.45843458016799, '1831': 10.549122768892376, '3725': 13.962213254698172, '23212': 17.075573490603652, '4144': 20.87908241503415, '10695': 20.939541207517077, '79073': 21.617138910435663, '9761': 22.90175446221525, '23210': 23.87152506597379, '23636': 23.98488530187927, '1647': 26.52645072171128, '230': 28.299730249900318, '8870': 30.136032283086575, '79071': 31.863967716913425, '4864': 33.022672047181096, '1050': 36.6095815613753, '1958': 36.939541207517074, '4616': 39.68515505197895, '54541': 40.68515505197895, '622': 40.730499146341145, '8396': 46.79595157537013, '6709': 47.31484494802105, '9531': 47.488663976409455, '11041': 58.81362998600516, '26036': 59.780836877249406, '991': 61.17125531587411, '7538': 61.8060726369448, '3312': 63.14358963214694, '5092': 66.30985131147499, '8318': 68.55668011795274, '80746': 70.46855564174265, '6810': 70.67759770291859, '10493': 74.17881266493447, '5696': 75.56423746701311, '22823': 82.68259133946465, '6182': 87.9118755237899, '23338': 91.79851528788444, '4043': 99.67759770291859, '1950': 101.92186279688204, '3162': 102.6296898960421, '9903': 102.86653142942772, '23300': 103.934547570971, '6772': 105.3904184386247, '10285': 105.97233431627284, '10915': 106.00256371251429, '79850': 106.1712553158741, '3337': 106.42820518392652, '874': 108.6322536085564, '22809': 108.79338786285585, '11188': 111.85897408036735, '22827': 114.78839422630976, '5467': 115.56923110355918, '4303': 119.07813720311796, '51719': 120.11079652339119, '51070': 126.07300977808936, '9686': 126.1964910755695, '8878': 127.0025637125143, '6856': 127.72550550979508, '949': 132.22928418432525, '25932': 132.63468353258816, '5359': 134.89419711315486, '5048': 136.31740866053536, '3066': 138.02010833466682, '79143': 138.9319838584567, '22889': 141.75560111755402, '25805': 143.51133602359056, '10112': 143.94709855657746, '5985': 143.95209219312352, '2274': 146.23684153338561, '5909': 149.81106627349087, '80347': 150.09824553778478, '8508': 150.16126804278196, '51282': 150.40553313674542, '3682': 152.72037808476645, '890': 152.83630203318626, '11344': 153.94709855657746, '3978': 153.97989166533318, '8826': 155.65735557976924, '6603': 156.11835387245154, '10782': 156.41052677329148, '25793': 156.48367033986338, '392': 158.71538444822042, '148022': 158.9798916653332, '1677': 159.5668011795274, '24149': 161.56180754298134, '3108': 166.55411640543844, '4793': 169.03022939624148, '2771': 169.60445413634673, '5708': 170.29473661335425, '10775': 177.82861089564338, '7398': 179.392982151139, '10559': 182.0025637125143, '9143': 182.83886574570056, '211': 182.9319838584567, '993': 183.84142945821486, '9688': 188.61970262294997, '10237': 190.14601955617871, '8624': 190.3425106317482, '8480': 191.94966226909173, '4927': 194.53144435825735, '5110': 194.6952761135536, '11065': 196.55425019392098, '8726': 196.81619369851947, '11044': 197.94709855657746, '54505': 201.8287446841259, '55324': 202.1763827409027, '6390': 207.41309048580578, '1019': 209.77341331667157, '4216': 213.20404842462986, '55556': 213.80350892443047, '23077': 214.8967608256692, '4846': 215.28218562774782, '10953': 215.83373832067196, '10320': 216.05802886845115, '6622': 216.10337296281335, '1021': 217.74548005597933, '4791': 218.62982368452464, '3098': 219.29217290083994, '3383': 221.01754462215249, '6657': 221.4836703398634, '22883': 221.64224088164852, '3028': 221.82618097161162, '10058': 221.91943287285025, '4775': 222.31727487205282, '200081': 222.3904184386247, '8061': 222.42833897240905, '9653': 222.90175446221522, '66008': 222.91686916033598, '10589': 223.48367033986338, '9918': 226.3476380567768, '11073': 227.33751699520218, '11230': 228.35519540583718, '23': 228.8186236225512, '10857': 229.77584324070332, '10681': 229.8941971131549, '1759': 229.92186279688204, '51422': 229.99487257497142, '8720': 230.78083687724944, '5641': 230.78839422630978, '1906': 231.00755734906036, '8349': 231.76572217912866, '8985': 232.33751699520215, '10206': 233.40553313674542, '23588': 233.96221325469818, '9217': 234.33751699520212, '701': 234.3450743442625, '9276': 234.39541207517078, '10898': 235.39541207517078, '6117': 236.45087723110763, '836': 236.89919074970095, '58497': 237.39797578768508, '4860': 237.46599192922838, '9375': 237.90931181127564, '3638': 238.32240229708142, '2523': 238.39797578768508, '5580': 238.87651870251983, '1111': 239.01268477408897, '466': 239.21916312275062, '873': 239.44831351859335, '9868': 239.89163340064061, '7077': 239.96720689124425, '9170': 240.92186279688204, '7157': 241.3048576749289, '22908': 241.49365761295553, '29890': 241.52388700919695, '6850': 242.17138910435665, '7168': 242.38286108956433, '11157': 242.9974362874857, '84722': 243.53144435825735, '23368': 244.244398882446, '23244': 244.46585814074584, '8553': 245.92429272091383, '9897': 246.5390017073177, '868': 246.85128294282447, '3156': 247.01255098560645, '23585': 247.86140400439913, '23386': 248.50877231107623, '5743': 248.54898898040986, '3895': 249.4785429148348, '5289': 250.87395499000556, '51097': 252.3425106317482, '6275': 254.00499363654606, '1848': 254.65222815474064, '80204': 258.6574893682518, '51031': 259.8892034766088, '10644': 261.5693648920417, '5588': 262.23184789683955, '7994': 262.3375169952021}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-6h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.24614531, 0.0)
MSE_Test: 4.325


In [4]:
##Testing the Achilles-L1000-120h data by using the CTRP-L1000-6h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_120h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_120h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'4609': 0.0, '5427': 3.5188933726509135, '55818': 5.022672047181096, '5997': 5.962213254698173, '665': 6.481106627349087, '6804': 8.45843458016799, '1831': 10.549122768892376, '3725': 13.962213254698172, '23212': 17.075573490603652, '4144': 20.87908241503415, '10695': 20.939541207517077, '79073': 21.617138910435663, '9761': 22.90175446221525, '23210': 23.87152506597379, '23636': 23.98488530187927, '1647': 26.52645072171128, '230': 28.299730249900318, '8870': 30.136032283086575, '79071': 31.863967716913425, '4864': 33.022672047181096, '1050': 36.6095815613753, '1958': 36.939541207517074, '4616': 39.68515505197895, '54541': 40.68515505197895, '622': 40.730499146341145, '8396': 46.79595157537013, '6709': 47.31484494802105, '9531': 47.488663976409455, '11041': 58.81362998600516, '26036': 59.780836877249406, '991': 61.17125531587411, '7538': 61.8060726369448, '3312': 63.14358963214694, '5092': 66.30985131147499, '8318': 68.55668011795274, '80746': 70.46855564174265, '6810': 70.67759770291859, '10493': 74.17881266493447, '5696': 75.56423746701311, '22823': 82.68259133946465, '6182': 87.9118755237899, '23338': 91.79851528788444, '4043': 99.67759770291859, '1950': 101.92186279688204, '3162': 102.6296898960421, '9903': 102.86653142942772, '23300': 103.934547570971, '6772': 105.3904184386247, '10285': 105.97233431627284, '10915': 106.00256371251429, '79850': 106.1712553158741, '3337': 106.42820518392652, '874': 108.6322536085564, '22809': 108.79338786285585, '11188': 111.85897408036735, '22827': 114.78839422630976, '5467': 115.56923110355918, '4303': 119.07813720311796, '51719': 120.11079652339119, '51070': 126.07300977808936, '9686': 126.1964910755695, '8878': 127.0025637125143, '6856': 127.72550550979508, '949': 132.22928418432525, '25932': 132.63468353258816, '5359': 134.89419711315486, '5048': 136.31740866053536, '3066': 138.02010833466682, '79143': 138.9319838584567, '22889': 141.75560111755402, '25805': 143.51133602359056, '10112': 143.94709855657746, '5985': 143.95209219312352, '2274': 146.23684153338561, '5909': 149.81106627349087, '80347': 150.09824553778478, '8508': 150.16126804278196, '51282': 150.40553313674542, '3682': 152.72037808476645, '890': 152.83630203318626, '11344': 153.94709855657746, '3978': 153.97989166533318, '8826': 155.65735557976924, '6603': 156.11835387245154, '10782': 156.41052677329148, '25793': 156.48367033986338, '392': 158.71538444822042, '148022': 158.9798916653332, '1677': 159.5668011795274, '24149': 161.56180754298134, '3108': 166.55411640543844, '4793': 169.03022939624148, '2771': 169.60445413634673, '5708': 170.29473661335425, '10775': 177.82861089564338, '7398': 179.392982151139, '10559': 182.0025637125143, '9143': 182.83886574570056, '211': 182.9319838584567, '993': 183.84142945821486, '9688': 188.61970262294997, '10237': 190.14601955617871, '8624': 190.3425106317482, '8480': 191.94966226909173, '4927': 194.53144435825735, '5110': 194.6952761135536, '11065': 196.55425019392098, '8726': 196.81619369851947, '11044': 197.94709855657746, '54505': 201.8287446841259, '55324': 202.1763827409027, '6390': 207.41309048580578, '1019': 209.77341331667157, '4216': 213.20404842462986, '55556': 213.80350892443047, '23077': 214.8967608256692, '4846': 215.28218562774782, '10953': 215.83373832067196, '10320': 216.05802886845115, '6622': 216.10337296281335, '1021': 217.74548005597933, '4791': 218.62982368452464, '3098': 219.29217290083994, '3383': 221.01754462215249, '6657': 221.4836703398634, '22883': 221.64224088164852, '3028': 221.82618097161162, '10058': 221.91943287285025, '4775': 222.31727487205282, '200081': 222.3904184386247, '8061': 222.42833897240905, '9653': 222.90175446221522, '66008': 222.91686916033598, '10589': 223.48367033986338, '9918': 226.3476380567768, '11073': 227.33751699520218, '11230': 228.35519540583718, '23': 228.8186236225512, '10857': 229.77584324070332, '10681': 229.8941971131549, '1759': 229.92186279688204, '51422': 229.99487257497142, '8720': 230.78083687724944, '5641': 230.78839422630978, '1906': 231.00755734906036, '8349': 231.76572217912866, '8985': 232.33751699520215, '10206': 233.40553313674542, '23588': 233.96221325469818, '9217': 234.33751699520212, '701': 234.3450743442625, '9276': 234.39541207517078, '10898': 235.39541207517078, '6117': 236.45087723110763, '836': 236.89919074970095, '58497': 237.39797578768508, '4860': 237.46599192922838, '9375': 237.90931181127564, '3638': 238.32240229708142, '2523': 238.39797578768508, '5580': 238.87651870251983, '1111': 239.01268477408897, '466': 239.21916312275062, '873': 239.44831351859335, '9868': 239.89163340064061, '7077': 239.96720689124425, '9170': 240.92186279688204, '7157': 241.3048576749289, '22908': 241.49365761295553, '29890': 241.52388700919695, '6850': 242.17138910435665, '7168': 242.38286108956433, '11157': 242.9974362874857, '84722': 243.53144435825735, '23368': 244.244398882446, '23244': 244.46585814074584, '8553': 245.92429272091383, '9897': 246.5390017073177, '868': 246.85128294282447, '3156': 247.01255098560645, '23585': 247.86140400439913, '23386': 248.50877231107623, '5743': 248.54898898040986, '3895': 249.4785429148348, '5289': 250.87395499000556, '51097': 252.3425106317482, '6275': 254.00499363654606, '1848': 254.65222815474064, '80204': 258.6574893682518, '51031': 259.8892034766088, '10644': 261.5693648920417, '5588': 262.23184789683955, '7994': 262.3375169952021}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-6h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.13470206, 6.184232752394345e-60)
MSE_Test: 3.614


In [5]:
##Testing the Achilles-L1000-144h data by using the CTRP-L1000-6h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_144h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_144h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'4609': 0.0, '5427': 3.5188933726509135, '55818': 5.022672047181096, '5997': 5.962213254698173, '665': 6.481106627349087, '6804': 8.45843458016799, '1831': 10.549122768892376, '3725': 13.962213254698172, '23212': 17.075573490603652, '4144': 20.87908241503415, '10695': 20.939541207517077, '79073': 21.617138910435663, '9761': 22.90175446221525, '23210': 23.87152506597379, '23636': 23.98488530187927, '1647': 26.52645072171128, '230': 28.299730249900318, '8870': 30.136032283086575, '79071': 31.863967716913425, '4864': 33.022672047181096, '1050': 36.6095815613753, '1958': 36.939541207517074, '4616': 39.68515505197895, '54541': 40.68515505197895, '622': 40.730499146341145, '8396': 46.79595157537013, '6709': 47.31484494802105, '9531': 47.488663976409455, '11041': 58.81362998600516, '26036': 59.780836877249406, '991': 61.17125531587411, '7538': 61.8060726369448, '3312': 63.14358963214694, '5092': 66.30985131147499, '8318': 68.55668011795274, '80746': 70.46855564174265, '6810': 70.67759770291859, '10493': 74.17881266493447, '5696': 75.56423746701311, '22823': 82.68259133946465, '6182': 87.9118755237899, '23338': 91.79851528788444, '4043': 99.67759770291859, '1950': 101.92186279688204, '3162': 102.6296898960421, '9903': 102.86653142942772, '23300': 103.934547570971, '6772': 105.3904184386247, '10285': 105.97233431627284, '10915': 106.00256371251429, '79850': 106.1712553158741, '3337': 106.42820518392652, '874': 108.6322536085564, '22809': 108.79338786285585, '11188': 111.85897408036735, '22827': 114.78839422630976, '5467': 115.56923110355918, '4303': 119.07813720311796, '51719': 120.11079652339119, '51070': 126.07300977808936, '9686': 126.1964910755695, '8878': 127.0025637125143, '6856': 127.72550550979508, '949': 132.22928418432525, '25932': 132.63468353258816, '5359': 134.89419711315486, '5048': 136.31740866053536, '3066': 138.02010833466682, '79143': 138.9319838584567, '22889': 141.75560111755402, '25805': 143.51133602359056, '10112': 143.94709855657746, '5985': 143.95209219312352, '2274': 146.23684153338561, '5909': 149.81106627349087, '80347': 150.09824553778478, '8508': 150.16126804278196, '51282': 150.40553313674542, '3682': 152.72037808476645, '890': 152.83630203318626, '11344': 153.94709855657746, '3978': 153.97989166533318, '8826': 155.65735557976924, '6603': 156.11835387245154, '10782': 156.41052677329148, '25793': 156.48367033986338, '392': 158.71538444822042, '148022': 158.9798916653332, '1677': 159.5668011795274, '24149': 161.56180754298134, '3108': 166.55411640543844, '4793': 169.03022939624148, '2771': 169.60445413634673, '5708': 170.29473661335425, '10775': 177.82861089564338, '7398': 179.392982151139, '10559': 182.0025637125143, '9143': 182.83886574570056, '211': 182.9319838584567, '993': 183.84142945821486, '9688': 188.61970262294997, '10237': 190.14601955617871, '8624': 190.3425106317482, '8480': 191.94966226909173, '4927': 194.53144435825735, '5110': 194.6952761135536, '11065': 196.55425019392098, '8726': 196.81619369851947, '11044': 197.94709855657746, '54505': 201.8287446841259, '55324': 202.1763827409027, '6390': 207.41309048580578, '1019': 209.77341331667157, '4216': 213.20404842462986, '55556': 213.80350892443047, '23077': 214.8967608256692, '4846': 215.28218562774782, '10953': 215.83373832067196, '10320': 216.05802886845115, '6622': 216.10337296281335, '1021': 217.74548005597933, '4791': 218.62982368452464, '3098': 219.29217290083994, '3383': 221.01754462215249, '6657': 221.4836703398634, '22883': 221.64224088164852, '3028': 221.82618097161162, '10058': 221.91943287285025, '4775': 222.31727487205282, '200081': 222.3904184386247, '8061': 222.42833897240905, '9653': 222.90175446221522, '66008': 222.91686916033598, '10589': 223.48367033986338, '9918': 226.3476380567768, '11073': 227.33751699520218, '11230': 228.35519540583718, '23': 228.8186236225512, '10857': 229.77584324070332, '10681': 229.8941971131549, '1759': 229.92186279688204, '51422': 229.99487257497142, '8720': 230.78083687724944, '5641': 230.78839422630978, '1906': 231.00755734906036, '8349': 231.76572217912866, '8985': 232.33751699520215, '10206': 233.40553313674542, '23588': 233.96221325469818, '9217': 234.33751699520212, '701': 234.3450743442625, '9276': 234.39541207517078, '10898': 235.39541207517078, '6117': 236.45087723110763, '836': 236.89919074970095, '58497': 237.39797578768508, '4860': 237.46599192922838, '9375': 237.90931181127564, '3638': 238.32240229708142, '2523': 238.39797578768508, '5580': 238.87651870251983, '1111': 239.01268477408897, '466': 239.21916312275062, '873': 239.44831351859335, '9868': 239.89163340064061, '7077': 239.96720689124425, '9170': 240.92186279688204, '7157': 241.3048576749289, '22908': 241.49365761295553, '29890': 241.52388700919695, '6850': 242.17138910435665, '7168': 242.38286108956433, '11157': 242.9974362874857, '84722': 243.53144435825735, '23368': 244.244398882446, '23244': 244.46585814074584, '8553': 245.92429272091383, '9897': 246.5390017073177, '868': 246.85128294282447, '3156': 247.01255098560645, '23585': 247.86140400439913, '23386': 248.50877231107623, '5743': 248.54898898040986, '3895': 249.4785429148348, '5289': 250.87395499000556, '51097': 252.3425106317482, '6275': 254.00499363654606, '1848': 254.65222815474064, '80204': 258.6574893682518, '51031': 259.8892034766088, '10644': 261.5693648920417, '5588': 262.23184789683955, '7994': 262.3375169952021}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-6h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.038339153, 8.616394847978306e-05)
MSE_Test: 3.832
