In [8]:
#Testing the CTRP-L1000-6h data by using the CTRP-L1000-3h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CTRP_withoutdose\LINCS_CTRP_withoutdose_6h.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','dose','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"LINCS_CTRP_withoutdose\LINCS_gene_expression_6h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'993': 0.0, '1831': 1.0, '481': 8.55272889553975, '8878': 11.30666071635425, '1514': 13.693339283645752, '8607': 16.693339283645752, '10898': 20.72849188067225, '1026': 20.84967695500974, '1062': 20.869102268778253, '10904': 23.332100656496493, '3383': 25.0, '23378': 25.884829552036244, '55818': 31.015727283257988, '54915': 34.115170447963756, '23212': 39.712764597414264, '9448': 40.281220776212, '47': 44.44727110446024, '1662': 50.0, '54541': 52.687324657272015, '58478': 56.88482955203625, '5427': 59.00000000000001, '23338': 60.27752274570148, '6908': 66.16235229773773, '85377': 68.8945422089205, '23670': 72.91998214906275, '4783': 73.64847402973498, '57761': 76.41813313380749, '2115': 78.18177761150623, '2958': 78.75023379030398, '595': 80.4884383278605, '2274': 82.67761200038775, '23097': 90.36355522301247, '8480': 94.8242370148675, '8503': 97.29463146360678, '8349': 101.23034089592751, '5583': 101.40240585054948, '51001': 104.38298053678098, '991': 105.281220776212, '6919': 108.54069964279229, '1647': 109.65587009075604, '5054': 111.02913797065277, '2920': 111.20721755164848, '51282': 111.95513474608924, '11142': 117.47640907511303, '6839': 120.25809743193297, '291': 120.95513474608924, '5366': 121.29093343309624, '54681': 121.53330358177125, '11319': 122.43755844757601, '5927': 122.83025164124123, '10190': 127.04116722340024, '5993': 127.64847402973498, '80204': 129.52359092488697, '5019': 129.53562017763446, '55556': 130.67761200038777, '3162': 132.16373373238503, '847': 134.95143671557872, '26136': 136.88714614789944, '5699': 139.2460681791855, '23658': 140.23265749179075, '4775': 140.26781008881724, '8869': 140.32376943425953, '30836': 143.16836692411144, '6894': 146.25809743193295, '3480': 146.32978406063327, '24149': 149.1600357018745, '3909': 155.23867211816446, '1633': 158.2798393415647, '4216': 159.39407462831258, '4850': 161.43016238655494, '10165': 164.20490095578526, '7296': 164.88621098668355, '11072': 164.88852758254674, '10559': 165.76132788183554, '10491': 166.01710871790527, '2542': 167.19750489476425, '5106': 167.6447759992245, '9801': 168.59759414945052, '1429': 173.31267534272797, '6659': 176.3658718188757, '7158': 178.07030519405302, '2548': 178.83025164124123, '8446': 179.93570943232072, '9943': 179.97086202934722, '79073': 180.91860071441545, '6304': 181.00971265688423, '10765': 181.48380513613407, '29890': 182.90055683529425, '8731': 184.382980536781, '3251': 185.4593003572077, '9133': 186.172064954622, '2961': 186.83025164124126, '10270': 187.01942531376852, '4864': 188.31267534272797, '976': 191.78168835681998, '8870': 192.04254865804754, '9710': 192.57585223981877, '8243': 194.26317689709077, '23530': 194.71508119327748, '3566': 197.19888632941152, '7159': 198.89824023943103, '3978': 199.93570943232075, '9805': 200.6498554643823, '2673': 202.312675342728, '4144': 203.00463319172644, '3638': 205.66558274764031, '10617': 206.2678100888172, '2184': 206.30897731221745, '26020': 206.5952775535873, '3303': 207.11655188261105, '8974': 207.87511689515196, '5770': 208.0666071635425, '2817': 208.14662501447975, '5257': 208.28723540258574, '7485': 208.69935391001948, '836': 209.523590924887, '25793': 210.0328360011633, '5921': 210.70536853639322, '3628': 211.20721755164848, '3611': 211.75161522495125, '7750': 211.8654042382677, '22908': 212.1369123575955, '7867': 212.2775227457015, '5529': 212.45096913497076, '2356': 212.96716399883672, '65123': 213.031454566516, '5566': 213.0666071635425, '2109': 213.9671639988367, '5300': 216.0023165958632, '9917': 217.00231659586322, '10775': 217.09806173005845, '823': 217.9088880575312, '10099': 218.22200967369056, '8720': 219.20859898629575, '5796': 219.4398750434392, '6499': 219.61332143270852, '23224': 219.68964125313525, '7466': 220.48474029734996, '10682': 220.8654042382677, '178': 221.281220776212, '10434': 221.40472244641268, '4927': 221.96716399883672, '10285': 222.7553132554618, '4817': 222.96716399883672, '5836': 223.92229874492597, '7165': 224.9079528963153, '9053': 225.00231659586322, '1846': 225.20490095578526, '57804': 225.7562484166777, '9517': 225.7950990442147, '11065': 226.29926465533322, '9267': 226.48474029734996, '6793': 228.23265749179072, '9915': 228.5101802374922, '2956': 229.2992646553332, '4331': 229.51757629851323, '4792': 230.23497408765394, '1459': 230.37696591040728, '4016': 230.41211850743375, '4925': 231.02543994014223}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-3h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.46052352, 0.0)
MSE_Test: 0.083


In [9]:
#Testing the CTRP-L1000-24h data by using the CTRP-L1000-3h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CTRP_withoutdose\LINCS_CTRP_withoutdose_24h.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','dose','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"LINCS_CTRP_withoutdose\LINCS_gene_expression_24h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'993': 0.0, '1831': 1.0, '481': 8.55272889553975, '8878': 11.30666071635425, '1514': 13.693339283645752, '8607': 16.693339283645752, '10898': 20.72849188067225, '1026': 20.84967695500974, '1062': 20.869102268778253, '10904': 23.332100656496493, '3383': 25.0, '23378': 25.884829552036244, '55818': 31.015727283257988, '54915': 34.115170447963756, '23212': 39.712764597414264, '9448': 40.281220776212, '47': 44.44727110446024, '1662': 50.0, '54541': 52.687324657272015, '58478': 56.88482955203625, '5427': 59.00000000000001, '23338': 60.27752274570148, '6908': 66.16235229773773, '85377': 68.8945422089205, '23670': 72.91998214906275, '4783': 73.64847402973498, '57761': 76.41813313380749, '2115': 78.18177761150623, '2958': 78.75023379030398, '595': 80.4884383278605, '2274': 82.67761200038775, '23097': 90.36355522301247, '8480': 94.8242370148675, '8503': 97.29463146360678, '8349': 101.23034089592751, '5583': 101.40240585054948, '51001': 104.38298053678098, '991': 105.281220776212, '6919': 108.54069964279229, '1647': 109.65587009075604, '5054': 111.02913797065277, '2920': 111.20721755164848, '51282': 111.95513474608924, '11142': 117.47640907511303, '6839': 120.25809743193297, '291': 120.95513474608924, '5366': 121.29093343309624, '54681': 121.53330358177125, '11319': 122.43755844757601, '5927': 122.83025164124123, '10190': 127.04116722340024, '5993': 127.64847402973498, '80204': 129.52359092488697, '5019': 129.53562017763446, '55556': 130.67761200038777, '3162': 132.16373373238503, '847': 134.95143671557872, '26136': 136.88714614789944, '5699': 139.2460681791855, '23658': 140.23265749179075, '4775': 140.26781008881724, '8869': 140.32376943425953, '30836': 143.16836692411144, '6894': 146.25809743193295, '3480': 146.32978406063327, '24149': 149.1600357018745, '3909': 155.23867211816446, '1633': 158.2798393415647, '4216': 159.39407462831258, '4850': 161.43016238655494, '10165': 164.20490095578526, '7296': 164.88621098668355, '11072': 164.88852758254674, '10559': 165.76132788183554, '10491': 166.01710871790527, '2542': 167.19750489476425, '5106': 167.6447759992245, '9801': 168.59759414945052, '1429': 173.31267534272797, '6659': 176.3658718188757, '7158': 178.07030519405302, '2548': 178.83025164124123, '8446': 179.93570943232072, '9943': 179.97086202934722, '79073': 180.91860071441545, '6304': 181.00971265688423, '10765': 181.48380513613407, '29890': 182.90055683529425, '8731': 184.382980536781, '3251': 185.4593003572077, '9133': 186.172064954622, '2961': 186.83025164124126, '10270': 187.01942531376852, '4864': 188.31267534272797, '976': 191.78168835681998, '8870': 192.04254865804754, '9710': 192.57585223981877, '8243': 194.26317689709077, '23530': 194.71508119327748, '3566': 197.19888632941152, '7159': 198.89824023943103, '3978': 199.93570943232075, '9805': 200.6498554643823, '2673': 202.312675342728, '4144': 203.00463319172644, '3638': 205.66558274764031, '10617': 206.2678100888172, '2184': 206.30897731221745, '26020': 206.5952775535873, '3303': 207.11655188261105, '8974': 207.87511689515196, '5770': 208.0666071635425, '2817': 208.14662501447975, '5257': 208.28723540258574, '7485': 208.69935391001948, '836': 209.523590924887, '25793': 210.0328360011633, '5921': 210.70536853639322, '3628': 211.20721755164848, '3611': 211.75161522495125, '7750': 211.8654042382677, '22908': 212.1369123575955, '7867': 212.2775227457015, '5529': 212.45096913497076, '2356': 212.96716399883672, '65123': 213.031454566516, '5566': 213.0666071635425, '2109': 213.9671639988367, '5300': 216.0023165958632, '9917': 217.00231659586322, '10775': 217.09806173005845, '823': 217.9088880575312, '10099': 218.22200967369056, '8720': 219.20859898629575, '5796': 219.4398750434392, '6499': 219.61332143270852, '23224': 219.68964125313525, '7466': 220.48474029734996, '10682': 220.8654042382677, '178': 221.281220776212, '10434': 221.40472244641268, '4927': 221.96716399883672, '10285': 222.7553132554618, '4817': 222.96716399883672, '5836': 223.92229874492597, '7165': 224.9079528963153, '9053': 225.00231659586322, '1846': 225.20490095578526, '57804': 225.7562484166777, '9517': 225.7950990442147, '11065': 226.29926465533322, '9267': 226.48474029734996, '6793': 228.23265749179072, '9915': 228.5101802374922, '2956': 229.2992646553332, '4331': 229.51757629851323, '4792': 230.23497408765394, '1459': 230.37696591040728, '4016': 230.41211850743375, '4925': 231.02543994014223}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-3h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.6424761, 0.0)
MSE_Test: 0.054


In [11]:
#Testing the Achilles-L1000-96h data by using the CTRP-L1000-3h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_96h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_96h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'993': 0.0, '1831': 1.0, '481': 8.55272889553975, '8878': 11.30666071635425, '1514': 13.693339283645752, '8607': 16.693339283645752, '10898': 20.72849188067225, '1026': 20.84967695500974, '1062': 20.869102268778253, '10904': 23.332100656496493, '3383': 25.0, '23378': 25.884829552036244, '55818': 31.015727283257988, '54915': 34.115170447963756, '23212': 39.712764597414264, '9448': 40.281220776212, '47': 44.44727110446024, '1662': 50.0, '54541': 52.687324657272015, '58478': 56.88482955203625, '5427': 59.00000000000001, '23338': 60.27752274570148, '6908': 66.16235229773773, '85377': 68.8945422089205, '23670': 72.91998214906275, '4783': 73.64847402973498, '57761': 76.41813313380749, '2115': 78.18177761150623, '2958': 78.75023379030398, '595': 80.4884383278605, '2274': 82.67761200038775, '23097': 90.36355522301247, '8480': 94.8242370148675, '8503': 97.29463146360678, '8349': 101.23034089592751, '5583': 101.40240585054948, '51001': 104.38298053678098, '991': 105.281220776212, '6919': 108.54069964279229, '1647': 109.65587009075604, '5054': 111.02913797065277, '2920': 111.20721755164848, '51282': 111.95513474608924, '11142': 117.47640907511303, '6839': 120.25809743193297, '291': 120.95513474608924, '5366': 121.29093343309624, '54681': 121.53330358177125, '11319': 122.43755844757601, '5927': 122.83025164124123, '10190': 127.04116722340024, '5993': 127.64847402973498, '80204': 129.52359092488697, '5019': 129.53562017763446, '55556': 130.67761200038777, '3162': 132.16373373238503, '847': 134.95143671557872, '26136': 136.88714614789944, '5699': 139.2460681791855, '23658': 140.23265749179075, '4775': 140.26781008881724, '8869': 140.32376943425953, '30836': 143.16836692411144, '6894': 146.25809743193295, '3480': 146.32978406063327, '24149': 149.1600357018745, '3909': 155.23867211816446, '1633': 158.2798393415647, '4216': 159.39407462831258, '4850': 161.43016238655494, '10165': 164.20490095578526, '7296': 164.88621098668355, '11072': 164.88852758254674, '10559': 165.76132788183554, '10491': 166.01710871790527, '2542': 167.19750489476425, '5106': 167.6447759992245, '9801': 168.59759414945052, '1429': 173.31267534272797, '6659': 176.3658718188757, '7158': 178.07030519405302, '2548': 178.83025164124123, '8446': 179.93570943232072, '9943': 179.97086202934722, '79073': 180.91860071441545, '6304': 181.00971265688423, '10765': 181.48380513613407, '29890': 182.90055683529425, '8731': 184.382980536781, '3251': 185.4593003572077, '9133': 186.172064954622, '2961': 186.83025164124126, '10270': 187.01942531376852, '4864': 188.31267534272797, '976': 191.78168835681998, '8870': 192.04254865804754, '9710': 192.57585223981877, '8243': 194.26317689709077, '23530': 194.71508119327748, '3566': 197.19888632941152, '7159': 198.89824023943103, '3978': 199.93570943232075, '9805': 200.6498554643823, '2673': 202.312675342728, '4144': 203.00463319172644, '3638': 205.66558274764031, '10617': 206.2678100888172, '2184': 206.30897731221745, '26020': 206.5952775535873, '3303': 207.11655188261105, '8974': 207.87511689515196, '5770': 208.0666071635425, '2817': 208.14662501447975, '5257': 208.28723540258574, '7485': 208.69935391001948, '836': 209.523590924887, '25793': 210.0328360011633, '5921': 210.70536853639322, '3628': 211.20721755164848, '3611': 211.75161522495125, '7750': 211.8654042382677, '22908': 212.1369123575955, '7867': 212.2775227457015, '5529': 212.45096913497076, '2356': 212.96716399883672, '65123': 213.031454566516, '5566': 213.0666071635425, '2109': 213.9671639988367, '5300': 216.0023165958632, '9917': 217.00231659586322, '10775': 217.09806173005845, '823': 217.9088880575312, '10099': 218.22200967369056, '8720': 219.20859898629575, '5796': 219.4398750434392, '6499': 219.61332143270852, '23224': 219.68964125313525, '7466': 220.48474029734996, '10682': 220.8654042382677, '178': 221.281220776212, '10434': 221.40472244641268, '4927': 221.96716399883672, '10285': 222.7553132554618, '4817': 222.96716399883672, '5836': 223.92229874492597, '7165': 224.9079528963153, '9053': 225.00231659586322, '1846': 225.20490095578526, '57804': 225.7562484166777, '9517': 225.7950990442147, '11065': 226.29926465533322, '9267': 226.48474029734996, '6793': 228.23265749179072, '9915': 228.5101802374922, '2956': 229.2992646553332, '4331': 229.51757629851323, '4792': 230.23497408765394, '1459': 230.37696591040728, '4016': 230.41211850743375, '4925': 231.02543994014223}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-3h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.35472777, 0.0)
MSE_Test: 4.327


In [12]:
##Testing the Achilles-L1000-120h data by using the CTRP-L1000-3h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_120h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_120h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'993': 0.0, '1831': 1.0, '481': 8.55272889553975, '8878': 11.30666071635425, '1514': 13.693339283645752, '8607': 16.693339283645752, '10898': 20.72849188067225, '1026': 20.84967695500974, '1062': 20.869102268778253, '10904': 23.332100656496493, '3383': 25.0, '23378': 25.884829552036244, '55818': 31.015727283257988, '54915': 34.115170447963756, '23212': 39.712764597414264, '9448': 40.281220776212, '47': 44.44727110446024, '1662': 50.0, '54541': 52.687324657272015, '58478': 56.88482955203625, '5427': 59.00000000000001, '23338': 60.27752274570148, '6908': 66.16235229773773, '85377': 68.8945422089205, '23670': 72.91998214906275, '4783': 73.64847402973498, '57761': 76.41813313380749, '2115': 78.18177761150623, '2958': 78.75023379030398, '595': 80.4884383278605, '2274': 82.67761200038775, '23097': 90.36355522301247, '8480': 94.8242370148675, '8503': 97.29463146360678, '8349': 101.23034089592751, '5583': 101.40240585054948, '51001': 104.38298053678098, '991': 105.281220776212, '6919': 108.54069964279229, '1647': 109.65587009075604, '5054': 111.02913797065277, '2920': 111.20721755164848, '51282': 111.95513474608924, '11142': 117.47640907511303, '6839': 120.25809743193297, '291': 120.95513474608924, '5366': 121.29093343309624, '54681': 121.53330358177125, '11319': 122.43755844757601, '5927': 122.83025164124123, '10190': 127.04116722340024, '5993': 127.64847402973498, '80204': 129.52359092488697, '5019': 129.53562017763446, '55556': 130.67761200038777, '3162': 132.16373373238503, '847': 134.95143671557872, '26136': 136.88714614789944, '5699': 139.2460681791855, '23658': 140.23265749179075, '4775': 140.26781008881724, '8869': 140.32376943425953, '30836': 143.16836692411144, '6894': 146.25809743193295, '3480': 146.32978406063327, '24149': 149.1600357018745, '3909': 155.23867211816446, '1633': 158.2798393415647, '4216': 159.39407462831258, '4850': 161.43016238655494, '10165': 164.20490095578526, '7296': 164.88621098668355, '11072': 164.88852758254674, '10559': 165.76132788183554, '10491': 166.01710871790527, '2542': 167.19750489476425, '5106': 167.6447759992245, '9801': 168.59759414945052, '1429': 173.31267534272797, '6659': 176.3658718188757, '7158': 178.07030519405302, '2548': 178.83025164124123, '8446': 179.93570943232072, '9943': 179.97086202934722, '79073': 180.91860071441545, '6304': 181.00971265688423, '10765': 181.48380513613407, '29890': 182.90055683529425, '8731': 184.382980536781, '3251': 185.4593003572077, '9133': 186.172064954622, '2961': 186.83025164124126, '10270': 187.01942531376852, '4864': 188.31267534272797, '976': 191.78168835681998, '8870': 192.04254865804754, '9710': 192.57585223981877, '8243': 194.26317689709077, '23530': 194.71508119327748, '3566': 197.19888632941152, '7159': 198.89824023943103, '3978': 199.93570943232075, '9805': 200.6498554643823, '2673': 202.312675342728, '4144': 203.00463319172644, '3638': 205.66558274764031, '10617': 206.2678100888172, '2184': 206.30897731221745, '26020': 206.5952775535873, '3303': 207.11655188261105, '8974': 207.87511689515196, '5770': 208.0666071635425, '2817': 208.14662501447975, '5257': 208.28723540258574, '7485': 208.69935391001948, '836': 209.523590924887, '25793': 210.0328360011633, '5921': 210.70536853639322, '3628': 211.20721755164848, '3611': 211.75161522495125, '7750': 211.8654042382677, '22908': 212.1369123575955, '7867': 212.2775227457015, '5529': 212.45096913497076, '2356': 212.96716399883672, '65123': 213.031454566516, '5566': 213.0666071635425, '2109': 213.9671639988367, '5300': 216.0023165958632, '9917': 217.00231659586322, '10775': 217.09806173005845, '823': 217.9088880575312, '10099': 218.22200967369056, '8720': 219.20859898629575, '5796': 219.4398750434392, '6499': 219.61332143270852, '23224': 219.68964125313525, '7466': 220.48474029734996, '10682': 220.8654042382677, '178': 221.281220776212, '10434': 221.40472244641268, '4927': 221.96716399883672, '10285': 222.7553132554618, '4817': 222.96716399883672, '5836': 223.92229874492597, '7165': 224.9079528963153, '9053': 225.00231659586322, '1846': 225.20490095578526, '57804': 225.7562484166777, '9517': 225.7950990442147, '11065': 226.29926465533322, '9267': 226.48474029734996, '6793': 228.23265749179072, '9915': 228.5101802374922, '2956': 229.2992646553332, '4331': 229.51757629851323, '4792': 230.23497408765394, '1459': 230.37696591040728, '4016': 230.41211850743375, '4925': 231.02543994014223}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-3h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.2551615, 3.49827287897816e-215)
MSE_Test: 3.712


In [14]:
##Testing the Achilles-L1000-144h data by using the CTRP-L1000-3h model
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"individual_validation\Achilles_L1000_PhaseI_144h_unique.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['signature','cell_line','drug','pert_type','time','cell_viability']]
    sig_GE_info = pd.read_csv(r"individual_validation\Achilles_L1000_PhaseI_gene_expression_144h.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='signature',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    viability_value = pd.DataFrame(sig_viability).loc[:,'cell_viability']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,viability_value
    
#Retrieving the data
gene_expression,cell_viability = getData()

#Selecting the genes
selected_gene = {'993': 0.0, '1831': 1.0, '481': 8.55272889553975, '8878': 11.30666071635425, '1514': 13.693339283645752, '8607': 16.693339283645752, '10898': 20.72849188067225, '1026': 20.84967695500974, '1062': 20.869102268778253, '10904': 23.332100656496493, '3383': 25.0, '23378': 25.884829552036244, '55818': 31.015727283257988, '54915': 34.115170447963756, '23212': 39.712764597414264, '9448': 40.281220776212, '47': 44.44727110446024, '1662': 50.0, '54541': 52.687324657272015, '58478': 56.88482955203625, '5427': 59.00000000000001, '23338': 60.27752274570148, '6908': 66.16235229773773, '85377': 68.8945422089205, '23670': 72.91998214906275, '4783': 73.64847402973498, '57761': 76.41813313380749, '2115': 78.18177761150623, '2958': 78.75023379030398, '595': 80.4884383278605, '2274': 82.67761200038775, '23097': 90.36355522301247, '8480': 94.8242370148675, '8503': 97.29463146360678, '8349': 101.23034089592751, '5583': 101.40240585054948, '51001': 104.38298053678098, '991': 105.281220776212, '6919': 108.54069964279229, '1647': 109.65587009075604, '5054': 111.02913797065277, '2920': 111.20721755164848, '51282': 111.95513474608924, '11142': 117.47640907511303, '6839': 120.25809743193297, '291': 120.95513474608924, '5366': 121.29093343309624, '54681': 121.53330358177125, '11319': 122.43755844757601, '5927': 122.83025164124123, '10190': 127.04116722340024, '5993': 127.64847402973498, '80204': 129.52359092488697, '5019': 129.53562017763446, '55556': 130.67761200038777, '3162': 132.16373373238503, '847': 134.95143671557872, '26136': 136.88714614789944, '5699': 139.2460681791855, '23658': 140.23265749179075, '4775': 140.26781008881724, '8869': 140.32376943425953, '30836': 143.16836692411144, '6894': 146.25809743193295, '3480': 146.32978406063327, '24149': 149.1600357018745, '3909': 155.23867211816446, '1633': 158.2798393415647, '4216': 159.39407462831258, '4850': 161.43016238655494, '10165': 164.20490095578526, '7296': 164.88621098668355, '11072': 164.88852758254674, '10559': 165.76132788183554, '10491': 166.01710871790527, '2542': 167.19750489476425, '5106': 167.6447759992245, '9801': 168.59759414945052, '1429': 173.31267534272797, '6659': 176.3658718188757, '7158': 178.07030519405302, '2548': 178.83025164124123, '8446': 179.93570943232072, '9943': 179.97086202934722, '79073': 180.91860071441545, '6304': 181.00971265688423, '10765': 181.48380513613407, '29890': 182.90055683529425, '8731': 184.382980536781, '3251': 185.4593003572077, '9133': 186.172064954622, '2961': 186.83025164124126, '10270': 187.01942531376852, '4864': 188.31267534272797, '976': 191.78168835681998, '8870': 192.04254865804754, '9710': 192.57585223981877, '8243': 194.26317689709077, '23530': 194.71508119327748, '3566': 197.19888632941152, '7159': 198.89824023943103, '3978': 199.93570943232075, '9805': 200.6498554643823, '2673': 202.312675342728, '4144': 203.00463319172644, '3638': 205.66558274764031, '10617': 206.2678100888172, '2184': 206.30897731221745, '26020': 206.5952775535873, '3303': 207.11655188261105, '8974': 207.87511689515196, '5770': 208.0666071635425, '2817': 208.14662501447975, '5257': 208.28723540258574, '7485': 208.69935391001948, '836': 209.523590924887, '25793': 210.0328360011633, '5921': 210.70536853639322, '3628': 211.20721755164848, '3611': 211.75161522495125, '7750': 211.8654042382677, '22908': 212.1369123575955, '7867': 212.2775227457015, '5529': 212.45096913497076, '2356': 212.96716399883672, '65123': 213.031454566516, '5566': 213.0666071635425, '2109': 213.9671639988367, '5300': 216.0023165958632, '9917': 217.00231659586322, '10775': 217.09806173005845, '823': 217.9088880575312, '10099': 218.22200967369056, '8720': 219.20859898629575, '5796': 219.4398750434392, '6499': 219.61332143270852, '23224': 219.68964125313525, '7466': 220.48474029734996, '10682': 220.8654042382677, '178': 221.281220776212, '10434': 221.40472244641268, '4927': 221.96716399883672, '10285': 222.7553132554618, '4817': 222.96716399883672, '5836': 223.92229874492597, '7165': 224.9079528963153, '9053': 225.00231659586322, '1846': 225.20490095578526, '57804': 225.7562484166777, '9517': 225.7950990442147, '11065': 226.29926465533322, '9267': 226.48474029734996, '6793': 228.23265749179072, '9915': 228.5101802374922, '2956': 229.2992646553332, '4331': 229.51757629851323, '4792': 230.23497408765394, '1459': 230.37696591040728, '4016': 230.41211850743375, '4925': 231.02543994014223}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
cell_viability = np.array(cell_viability,dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter,cell_viability)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-3h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)
print('Test_Best_Pearson:',sc.stats.pearsonr(cl_prediction,cell_viability))
print("MSE_Test: %.3f" % (mean_squared_error(cl_prediction,cell_viability)))

Model Loaded Successfully!
Test_Best_Pearson: (0.03511552, 0.00032288975536606953)
MSE_Test: 3.892
