In [2]:
#Get the gene expression data of L1000_PhaseI_CCLE
import pandas as pd
from cmapPy.pandasGEXpress.parse import parse

#1.look for the landmark genes
gene_info = pd.read_csv("LINCS-L1000\PhaseI\GSE92742_Broad_LINCS_gene_info.txt",sep="\t",dtype=str)
landmark_gene_row_ids = gene_info["pr_gene_id"][gene_info["pr_is_lm"] == "1"]

#2.Read the matching file：L1000_PhaseI_CCLE_result.txt
sig_ids = []
signature_temp = pd.read_csv("LINCS_CCLE_matched\L1000_PhaseI_CCLE_result.txt",sep="\t",dtype=str);
sig_ids = set(signature_temp['sig_id'])
sig_ids = list(sig_ids)

landmark_only_gctoo = parse("LINCS-L1000\PhaseI\GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx",rid=landmark_gene_row_ids,cid=sig_ids)

df = pd.DataFrame(landmark_only_gctoo.data_df.T)
df.to_csv("LINCS_CCLE_matched/LINCS_PhaseI_CCLE_gene_expression.csv")

In [3]:
#Get the gene expression data of L1000_PhaseII_CCLE
import pandas as pd
from cmapPy.pandasGEXpress.parse import parse

#1.look for the landmark genes
gene_info = pd.read_csv("LINCS-L1000\PhaseII\GSE92742_Broad_LINCS_gene_info.txt",sep="\t",dtype=str)
landmark_gene_row_ids = gene_info["pr_gene_id"][gene_info["pr_is_lm"] == "1"]

#2.Read the matching file：L1000_PhaseII_CCLE_result.txt
sig_ids = []
signature_temp = pd.read_csv("LINCS_CCLE_matched\L1000_PhaseII_CCLE_result.txt",sep="\t",dtype=str);
sig_ids = set(signature_temp['sig_id'])
sig_ids = list(sig_ids)

landmark_only_gctoo = parse("LINCS-L1000\PhaseII\GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328.gctx",rid=landmark_gene_row_ids,cid=sig_ids)

df = pd.DataFrame(landmark_only_gctoo.data_df.T)
df.to_csv("LINCS_CCLE_matched/LINCS_PhaseII_CCLE_gene_expression.csv")

In [4]:
#Merge the LINCS_PhaseI_CCLE_gene_expression with the LINCS_PhaseII_CCLE_gene_expression data
import pandas as pd
import numpy as np

#Create the file to store the merged data：L1000_CCLE_gene_expression.txt
LINCS_CCLE_gene_expression_file = open(r"LINCS_CCLE_matched\L1000_CCLE_gene_expression.csv","w")

LINCS_PhaseI_CCLE_gene_expression = pd.read_csv(r"LINCS_CCLE_matched\LINCS_PhaseI_CCLE_gene_expression.csv")
LINCS_PhaseII_CCLE_gene_expression = pd.read_csv(r"LINCS_CCLE_matched\LINCS_PhaseII_CCLE_gene_expression.csv")

LINCS_CCLE_gene_expression = pd.concat([LINCS_PhaseI_CCLE_gene_expression,LINCS_PhaseII_CCLE_gene_expression],axis=0,sort=False)
LINCS_CCLE_gene_expression = LINCS_CCLE_gene_expression.reset_index(drop=True)

LINCS_CCLE_gene_expression.to_csv(LINCS_CCLE_gene_expression_file)
LINCS_CCLE_gene_expression_file.close()

In [5]:
#Use the L1000_Achilles_96h model to predict the cell viability value of CCLE_L1000_24h
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CCLE_matched\L1000_CCLE_result.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['sig_id','pert_id','pert_iname','pert_type','cell_id','pert_dose','pert_dose_unit','pert_idose','pert_itime','L1000_is_touchstone','inchi_key','pubchem_cid','CCLE_Target',"Doses (uM)","EC50 (uM)","IC50 (uM)",'Amax','ActArea','ActArea_std','IsEffective']]
    sig_GE_info = pd.read_csv(r"LINCS_CCLE_matched\L1000_CCLE_gene_expression.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='sig_id',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    meta_data = pd.DataFrame(sig_viability).loc[:,'sig_id':'IsEffective']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,meta_data
    
#Retrieving the data
gene_expression,meta_data = getData()

#Selecting the genes
selected_gene = {'1647': 1.4123577236878908, '11065': 2.5876422763121094, '890': 2.646070460520182, '9833': 4.646070460520182, '7105': 12.883143631583854, '10285': 15.116856368416146, '5111': 16.0, '23029': 16.646070460520182, '80349': 17.292140921040364, '1282': 17.76628726316771, '5236': 18.525853657872656, '79071': 18.71121951319102, '6659': 19.83143631583854, '1831': 20.058428184208072, '1509': 20.1786449868556, '7153': 24.4640650394332, '8091': 30.831436315838545, '4282': 31.17528455262422, '3162': 33.707859078959636, '10180': 34.82471544737578, '26292': 35.704498644728254, '2958': 37.63934959205742, '991': 38.9281300788664, '79090': 39.580921407849345, '873': 46.717940381653776, '1759': 47.343848236785675, '10962': 51.39555555253099, '211': 59.944932250023314, '3315': 65.47078590789596, '26036': 69.31902439489141, '9133': 69.33712736832291, '5796': 72.8213550131444, '54386': 77.22027099990677, '2048': 78.63934959205741, '481': 82.23035230260092, '8878': 83.9798373946117, '10898': 85.6427100262888, '22889': 85.78981030278739, '8446': 89.62254742090053, '23': 91.57420053938658, '11073': 93.24043360529505, '11098': 94.80325203971289, '9375': 95.0168021711569, '2131': 95.21691056567538, '4860': 95.22699186836952, '3725': 101.34384823678569, '387': 102.4989701840216, '3098': 106.48758807905286, '10273': 107.44390243404492, '3028': 109.12021680264752, '1029': 110.10005419725924, '10007': 110.69105690780273, '6275': 114.08661246033373, '1026': 114.3740921448681, '4927': 115.31360432870326, '5468': 120.33040649986016, '1786': 121.49094851328424, '3638': 124.22699186836952, '27346': 126.85831978968959, '9868': 127.57084010515521, '79921': 127.63132792132006, '9926': 128.54601626326095, '10494': 129.50775068444116, '8869': 132.90460703924674, '5347': 136.6641734339517, '993': 138.46612467139, '10589': 139.22233063186354, '11344': 139.25853657872656, '2274': 142.55609756595507, '5359': 144.20216802647528, '11188': 144.51241192094713, '10972': 147.44260163177034, '9212': 147.57962060557475, '9217': 149.5876422763121, '868': 150.427100262888, '1958': 152.31696476293462, '51056': 153.20346882874986, '91949': 154.07989159187096, '3486': 154.5346341582922, '10058': 156.29886178950315, '7157': 157.04704607923932, '5627': 157.4627642371586, '6839': 159.43382113135078, '6603': 160.74146342127344, '4851': 162.24845527603242, '8349': 164.293441723315, '25793': 167.3404878025543, '9552': 168.56747967092383, '9943': 169.37409214486811, '10644': 171.1739837503496, '8720': 172.21018969721266, '5580': 175.90460703924674, '9703': 176.5674796709238, '5921': 180.06720868462762, '10904': 180.25723577645195, '10954': 180.9449322500233, '8508': 185.55273713172372, '30001': 189.9818970265685, '4125': 190.18200542108696, '949': 191.49560974979025, '6709': 196.56281843441786, '11157': 200.78102980236784, '6117': 201.213550131444, '1635': 205.58764227631212, '7398': 206.38883468406823, '9915': 209.25853657872656, '55012': 210.76086719697955, '10013': 211.31436315838542, '25932': 211.61582655243774, '66008': 211.97647696038032, '55556': 215.02818427612564, '2309': 215.81669377663843, '5440': 218.35934960566794, '8985': 220.91468834194086, '5715': 222.61452575016315, '1017': 224.31360432870326, '25803': 226.070569118859, '9805': 227.16856368416146, '2113': 229.2290515003263, '1021': 229.5010298159784, '22908': 230.36195121021714, '64746': 231.8368563820267, '26054': 232.3042818556913, '10682': 232.7166395793792, '5770': 237.45734417097046, '3480': 239.888563697772, '200081': 240.24509484180103, '5110': 241.94829268425465, '23300': 243.1215176049221, '9653': 244.42037939442525, '1454': 246.25387534222054, '9533': 246.52455285559807, '5223': 247.20086722420066, '55324': 247.58092140784936, '10681': 248.54395663130416, '3693': 248.56747967092383, '665': 250.28336042062085, '2920': 251.02558267157644, '8061': 251.8448780527641, '7264': 253.73604335508526, '23326': 255.0067208684628, '1277': 258.53929539479816, '10237': 259.93279131537236, '8553': 259.9503523162115, '5054': 260.09203252652185, '7082': 260.4956097497902, '3383': 261.82807588160716, '780': 264.4956097497902, '10915': 265.5237940259159, '998': 268.1941463557379, '4582': 268.39349592057425, '5048': 268.4620054074764, '2146': 269.8751219608465, '3337': 269.9395121838351, '7077': 271.22493223641277, '8270': 271.6333875532768, '5641': 273.23165310487553, '10782': 273.34850947329164, '80347': 274.40357722326837, '58533': 274.6346883555514, '392': 276.04628724955717, '9170': 276.57886177589256, '3895': 276.63392952586923, '1605': 277.48758807905284, '836': 277.93279131537236, '5525': 279.177344184581, '1846': 279.60108401323765, '9183': 280.039024408502, '4144': 280.6246070528573, '3156': 281.51707315745307, '23585': 281.86428182847015, '4043': 282.16520324993013, '3978': 282.82601624965037, '51097': 283.51035228899036, '965': 284.68845530325353, '9897': 285.16314361797333, '54541': 286.3384281705975, '847': 286.6098645136572, '93487': 287.16986448643604, '2956': 287.75750676274816, '29937': 288.51111111867255, '9276': 289.5721409074298, '4303': 290.47880757863334, '4690': 291.18406505304375, '896': 291.6561517632143, '55893': 291.76758806544234, '2770': 292.1189160003729, '84722': 293.09799456530243, '25825': 294.64010842173957, '9797': 294.80791327621887, '29890': 295.2732791179267, '23386': 296.3384281705975, '54512': 297.20422765843205, '6804': 298.34850947329164, '3300': 299.1075338954042, '23271': 301.98449863111773, '90861': 303.1639024476555, '23588': 305.111436302228, '6390': 305.9395121838351, '58497': 306.29008128908356, '9181': 307.84617885503866, '55604': 309.2592954084087, '581': 309.29756098722856, '6944': 309.3552303417544, '701': 309.99794036804326, '976': 310.02558267157644, '2523': 310.99457993381185, '2896': 313.8738211585719, '2109': 313.9844986311177, '1019': 314.40227642099376, '10206': 315.46200540747645, '7168': 316.6990785785401, '10051': 316.8011924077561, '4780': 317.2652574471893, '55847': 317.98861789503127, '4846': 318.53929539479816, '10320': 320.2290515003263, '6856': 320.63728996010065, '23139': 320.8402168162581, '22883': 320.8911653023212, '1677': 321.4250406309312, '10953': 322.29008128908356, '332': 322.70710024927746, '8480': 323.560758802461, '6182': 327.52043359168454, '11182': 328.55945800018645, '9847': 329.0423848427333, '29082': 329.30092142145986, '11044': 329.3451490390603, '51335': 329.6333875532768, '3682': 329.98731709275665, '207': 330.1988075922439, '7020': 330.64065039433206, '9801': 331.6003251835554, '148022': 333.2538753422205, '6850': 333.41159889400564, '54442': 333.7300813163047, '4893': 336.8011924077561, '54205': 337.5728997371119, '891': 338.8697018946583, '9928': 339.1907859215066, '3122': 340.87848239507787, '23338': 341.5816802375315, '10775': 342.07989159187093, '7849': 342.32910569758553, '64943': 342.883902461266, '3108': 343.2370731710637, '5601': 344.138319776079, '9261': 344.40433605295055, '466': 344.82807588160716, '7485': 349.324986433672, '23161': 349.6078048817005}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter)

#Loading model
loaded_model = pickle.load(open("model_adjust\Achilles-L1000-96h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)

#Create a new file to record the predicted cell viability values
cell_viability_file = open(r"cell_viability_pred\L1000_Achilles_96h_clpred_all.txt","w",encoding="utf-8")
cell_viability_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_dose\tpert_dose_unit\tpert_idose\tpert_itime\tL1000_is_touchstone\tinchi_key\tpubchem_cid\tCCLE_Target\tDoses (uM)\tEC50 (uM)\tIC50 (uM)\tAmax\tActArea\tActArea_std\tIsEffective\tL1000_Achilles_96h_pred\n")
for index in range(len(meta_data)):
    cell_viability_file.write(str(meta_data.at[index,'sig_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_iname'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_type'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'cell_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_dose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_dose_unit'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_idose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_itime'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'L1000_is_touchstone'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'inchi_key'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pubchem_cid'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'CCLE_Target'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"Doses (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"EC50 (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"IC50 (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"Amax"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"ActArea"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"ActArea_std"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"IsEffective"])+"\t") 
    cell_viability_file.write(str(cl_prediction[index])+"\n")

cell_viability_file.close()

Model Loaded Successfully!


In [3]:
#Obtain the lowest cell viability value and output to the file：L1000_Achilles_96h_clpred_min.txt
import pandas as pd

#predcv_file = pd.read_table(r"cell_viability_pred\L1000_Achilles_96h_clpred_all.txt")
predcv_file = pd.read_table(r"cell_viability_pred\L1000_Achilles_96h_clpred_filter.txt")
min_predcv_file = open(r"cell_viability_pred\L1000_Achilles_96h_clpred_min.txt","w",encoding="utf-8")
min_predcv_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_dose\tpert_dose_unit\tpert_idose\tpert_itime\tL1000_is_touchstone\tinchi_key\tpubchem_cid\tCCLE_Target\tDoses (uM)\tEC50 (uM)\tIC50 (uM)\tAmax\tActArea\tActArea_std\tIsEffective\tL1000_Achilles_96h_minpred\n")
for drug in predcv_file['pert_id'].unique():
    for cell_line in predcv_file[predcv_file['pert_id'] == drug]['cell_id'].unique():
        selected_info = predcv_file[predcv_file['pert_id'] == drug][predcv_file['cell_id'] == cell_line]
        selected_info = selected_info.reset_index(drop=True)
        min_cvpred = selected_info['L1000_Achilles_96h_pred'].min()
        min_cvpred_index = selected_info['L1000_Achilles_96h_pred'].argmin()
        if len(selected_info[selected_info['L1000_Achilles_96h_pred'] == min_cvpred]['IsEffective'].unique()) == 1:
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'sig_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_iname'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_type'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'cell_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_dose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_dose_unit'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_idose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_itime'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_is_touchstone'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'inchi_key'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pubchem_cid'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'CCLE_Target'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"Doses (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"EC50 (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"IC50 (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"Amax"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"ActArea"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"ActArea_std"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"IsEffective"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_Achilles_96h_pred'])+"\n")
        
min_predcv_file.close()

  # Remove the CWD from sys.path while we load stuff.
will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  del sys.path[0]


In [1]:
#Use the L1000_CTRP_24h model to predict the cell viability value of CCLE_L1000_24h
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CCLE_matched\L1000_CCLE_result.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['sig_id','pert_id','pert_iname','pert_type','cell_id','pert_dose','pert_dose_unit','pert_idose','pert_itime','L1000_is_touchstone','inchi_key','pubchem_cid','CCLE_Target',"Doses (uM)","EC50 (uM)","IC50 (uM)",'Amax','ActArea','ActArea_std','IsEffective']]
    sig_GE_info = pd.read_csv(r"LINCS_CCLE_matched\L1000_CCLE_gene_expression.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='sig_id',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    meta_data = pd.DataFrame(sig_viability).loc[:,'sig_id':'IsEffective']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,meta_data
    
#Retrieving the data
gene_expression,meta_data = getData()

#Selecting the genes
selected_gene = {'6182': 0.9943611028318613, '5997': 1.994361102831861, '4864': 3.4971805514159304, '5427': 4.022555588672554, '7153': 5.999999999999999, '10775': 7.468986065575238, '5111': 11.45770827123896, '6790': 14.0733056631858, '1001': 14.98872220566372, '6184': 15.949249925486752, '983': 19.078944560353936, '11065': 23.016916691504413, '230': 25.457708271238957, '3315': 27.598680700442422, '10013': 27.904138748141644, '1958': 29.62687518628312, '4927': 29.94361102831861, '7538': 31.59304180327428, '11098': 32.60431959761056, '481': 32.90977764530978, '3303': 35.00563889716814, '7157': 40.09022235469021, '1022': 42.344930327876185, '10190': 45.36748591654874, '9903': 46.418235991061984, '6622': 49.85338867362839, '291': 51.05075007451324, '11041': 52.694541952300774, '890': 54.06202786884952, '10904': 57.344930327876185, '2542': 59.10150014902649, '813': 62.55920842026544, '3066': 65.5535695230973, '84617': 66.42387488823012, '9833': 67.89849985097351, '10362': 70.49718055141592, '29083': 73.95488882265488, '51170': 80.581764008938, '54386': 82.88722205663723, '8974': 84.09586125185835, '5236': 86.23215238451341, '3156': 87.24247257822995, '8985': 89.88062555884936, '4144': 93.52069374070821, '23588': 93.82423658716796, '1459': 94.73401423247775, '23463': 94.78572190761074, '5777': 97.0451111773451, '5054': 98.2772635618585, '1454': 99.91541654247791, '54541': 100.19831900150457, '7168': 100.23779128168154, '26227': 100.2603468703541, '51742': 103.02819448584069, '54733': 104.95488882265487, '8480': 106.55261192247757, '8727': 108.96052771982302, '1759': 109.57048621460173, '23300': 115.5084583457522, '7994': 117.94265342769887, '8508': 120.46238956778735, '6919': 124.92009783902631, '23636': 126.31673584203548, '2222': 126.86466646796467, '10857': 128.5582508196457, '7319': 129.17012451566387, '22889': 129.57048621460171, '58497': 135.64379187778752, '2309': 138.74156833088534, '3628': 138.85434627424812, '9688': 139.51313964230062, '6275': 144.63815298061937, '873': 145.1400148285837, '26292': 145.88062555884935, '93487': 154.57048621460171, '10681': 156.58740290610612, '22926': 156.63911058123912, '5873': 158.18044470938042, '9897': 158.796999701947, '8349': 159.55920842026543, '10915': 160.29981915053108, '9650': 161.45111177345106, '10797': 161.79795730256672, '23659': 162.85434627424812, '211': 163.45770827123894, '4043': 163.62879038752257, '1786': 168.62687518628312, '8914': 170.47558256336308, '10898': 171.14756892699134, '10058': 171.4586658718587, '6603': 171.85806997017679, '836': 173.14756892699134, '2523': 173.32801363637176, '9143': 173.55920842026543, '22908': 173.7293329359293, '11188': 174.1588467213276, '11157': 174.72933293592934, '8895': 174.73965312964586, '1514': 175.97840201194717, '7077': 176.22651348734524, '9375': 176.3336525335399, '9276': 176.68422175858421, '16': 176.77912540982285, '6709': 177.20768159460138, '10237': 177.22651348734524, '5048': 177.54229172876103, '3611': 177.73965312964586, '949': 178.83647198212395, '23386': 179.22651348734527, '3312': 179.30918174362785, '79090': 179.33833383008832, '10285': 180.22651348734527, '3978': 180.4069581967257, '4282': 181.08085976159342, '60528': 181.2368336810618, '6117': 182.8421108792921, '1647': 184.23779128168155, '2958': 185.3214171385839, '55556': 185.42951378539826, '4860': 185.8421108792921, '25793': 186.9323332339823, '5883': 187.45675067061921, '1111': 187.59495700451376, '701': 187.8421108792921, '6856': 187.92105543964604, '80347': 188.22087459017712, '51031': 188.54133412814127, '10206': 188.93797213115045, '9170': 189.72369403876118, '84722': 190.11841684053087, '26036': 190.9436110283186, '3895': 192.84774977646023, '3638': 194.36748591654873}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-24h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)

#Create a new file to record the predicted cell viability values
cell_viability_file = open(r"cell_viability_pred\L1000_CTRP_24h_clpred_all.txt","w",encoding="utf-8")
cell_viability_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_dose\tpert_dose_unit\tpert_idose\tpert_itime\tL1000_is_touchstone\tinchi_key\tpubchem_cid\tCCLE_Target\tDoses (uM)\tEC50 (uM)\tIC50 (uM)\tAmax\tActArea\tActArea_std\tIsEffective\tL1000_CTRP_24h_pred\n")
for index in range(len(meta_data)):
    cell_viability_file.write(str(meta_data.at[index,'sig_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_iname'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_type'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'cell_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_dose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_dose_unit'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_idose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_itime'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'L1000_is_touchstone'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'inchi_key'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pubchem_cid'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'CCLE_Target'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"Doses (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"EC50 (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"IC50 (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"Amax"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"ActArea"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"ActArea_std"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"IsEffective"])+"\t") 
    cell_viability_file.write(str(cl_prediction[index])+"\n")

cell_viability_file.close()

Model Loaded Successfully!


In [2]:
#Obtain the lowest cell viability value and output to the file：L1000_CTRP_24_clpred_min.txt
import pandas as pd

#predcv_file = pd.read_table(r"cell_viability_pred\L1000_CTRP_24h_clpred_all.txt")
predcv_file = pd.read_table(r"cell_viability_pred\L1000_CTRP_24h_clpred_filter.txt")
min_predcv_file = open(r"cell_viability_pred\L1000_CTRP_24h_clpred_min.txt","w",encoding="utf-8")
min_predcv_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_dose\tpert_dose_unit\tpert_idose\tpert_itime\tL1000_is_touchstone\tinchi_key\tpubchem_cid\tCCLE_Target\tDoses (uM)\tEC50 (uM)\tIC50 (uM)\tAmax\tActArea\tActArea_std\tIsEffective\tL1000_CTRP_24h_minpred\n")
for drug in predcv_file['pert_id'].unique():
    for cell_line in predcv_file[predcv_file['pert_id'] == drug]['cell_id'].unique():
        selected_info = predcv_file[predcv_file['pert_id'] == drug][predcv_file['cell_id'] == cell_line]
        selected_info = selected_info.reset_index(drop=True)
        min_cvpred = selected_info['L1000_CTRP_24h_pred'].min()
        min_cvpred_index = selected_info['L1000_CTRP_24h_pred'].argmin()
        if len(selected_info[selected_info['L1000_CTRP_24h_pred'] == min_cvpred]['IsEffective'].unique()) == 1:
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'sig_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_iname'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_type'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'cell_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_dose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_dose_unit'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_idose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_itime'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_is_touchstone'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'inchi_key'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pubchem_cid'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'CCLE_Target'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"Doses (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"EC50 (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"IC50 (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"Amax"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"ActArea"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"ActArea_std"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"IsEffective"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_CTRP_24h_pred'])+"\n")
        
min_predcv_file.close()

In [3]:
#Use the L1000_CTRP_6h model to predict the cell viability value of CCLE_L1000_24h
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_table(r"LINCS_CCLE_matched\L1000_CCLE_result.txt",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['sig_id','pert_id','pert_iname','pert_type','cell_id','pert_dose','pert_dose_unit','pert_idose','pert_itime','L1000_is_touchstone','inchi_key','pubchem_cid','CCLE_Target',"Doses (uM)","EC50 (uM)","IC50 (uM)",'Amax','ActArea','ActArea_std','IsEffective']]
    sig_GE_info = pd.read_csv(r"LINCS_CCLE_matched\L1000_CCLE_gene_expression.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='sig_id',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    meta_data = pd.DataFrame(sig_viability).loc[:,'sig_id':'IsEffective']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,meta_data
    
#Retrieving the data
gene_expression,meta_data = getData()

#Selecting the genes
selected_gene = {'4609': 0.0, '5427': 3.5188933726509135, '55818': 5.022672047181096, '5997': 5.962213254698173, '665': 6.481106627349087, '6804': 8.45843458016799, '1831': 10.549122768892376, '3725': 13.962213254698172, '23212': 17.075573490603652, '4144': 20.87908241503415, '10695': 20.939541207517077, '79073': 21.617138910435663, '9761': 22.90175446221525, '23210': 23.87152506597379, '23636': 23.98488530187927, '1647': 26.52645072171128, '230': 28.299730249900318, '8870': 30.136032283086575, '79071': 31.863967716913425, '4864': 33.022672047181096, '1050': 36.6095815613753, '1958': 36.939541207517074, '4616': 39.68515505197895, '54541': 40.68515505197895, '622': 40.730499146341145, '8396': 46.79595157537013, '6709': 47.31484494802105, '9531': 47.488663976409455, '11041': 58.81362998600516, '26036': 59.780836877249406, '991': 61.17125531587411, '7538': 61.8060726369448, '3312': 63.14358963214694, '5092': 66.30985131147499, '8318': 68.55668011795274, '80746': 70.46855564174265, '6810': 70.67759770291859, '10493': 74.17881266493447, '5696': 75.56423746701311, '22823': 82.68259133946465, '6182': 87.9118755237899, '23338': 91.79851528788444, '4043': 99.67759770291859, '1950': 101.92186279688204, '3162': 102.6296898960421, '9903': 102.86653142942772, '23300': 103.934547570971, '6772': 105.3904184386247, '10285': 105.97233431627284, '10915': 106.00256371251429, '79850': 106.1712553158741, '3337': 106.42820518392652, '874': 108.6322536085564, '22809': 108.79338786285585, '11188': 111.85897408036735, '22827': 114.78839422630976, '5467': 115.56923110355918, '4303': 119.07813720311796, '51719': 120.11079652339119, '51070': 126.07300977808936, '9686': 126.1964910755695, '8878': 127.0025637125143, '6856': 127.72550550979508, '949': 132.22928418432525, '25932': 132.63468353258816, '5359': 134.89419711315486, '5048': 136.31740866053536, '3066': 138.02010833466682, '79143': 138.9319838584567, '22889': 141.75560111755402, '25805': 143.51133602359056, '10112': 143.94709855657746, '5985': 143.95209219312352, '2274': 146.23684153338561, '5909': 149.81106627349087, '80347': 150.09824553778478, '8508': 150.16126804278196, '51282': 150.40553313674542, '3682': 152.72037808476645, '890': 152.83630203318626, '11344': 153.94709855657746, '3978': 153.97989166533318, '8826': 155.65735557976924, '6603': 156.11835387245154, '10782': 156.41052677329148, '25793': 156.48367033986338, '392': 158.71538444822042, '148022': 158.9798916653332, '1677': 159.5668011795274, '24149': 161.56180754298134, '3108': 166.55411640543844, '4793': 169.03022939624148, '2771': 169.60445413634673, '5708': 170.29473661335425, '10775': 177.82861089564338, '7398': 179.392982151139, '10559': 182.0025637125143, '9143': 182.83886574570056, '211': 182.9319838584567, '993': 183.84142945821486, '9688': 188.61970262294997, '10237': 190.14601955617871, '8624': 190.3425106317482, '8480': 191.94966226909173, '4927': 194.53144435825735, '5110': 194.6952761135536, '11065': 196.55425019392098, '8726': 196.81619369851947, '11044': 197.94709855657746, '54505': 201.8287446841259, '55324': 202.1763827409027, '6390': 207.41309048580578, '1019': 209.77341331667157, '4216': 213.20404842462986, '55556': 213.80350892443047, '23077': 214.8967608256692, '4846': 215.28218562774782, '10953': 215.83373832067196, '10320': 216.05802886845115, '6622': 216.10337296281335, '1021': 217.74548005597933, '4791': 218.62982368452464, '3098': 219.29217290083994, '3383': 221.01754462215249, '6657': 221.4836703398634, '22883': 221.64224088164852, '3028': 221.82618097161162, '10058': 221.91943287285025, '4775': 222.31727487205282, '200081': 222.3904184386247, '8061': 222.42833897240905, '9653': 222.90175446221522, '66008': 222.91686916033598, '10589': 223.48367033986338, '9918': 226.3476380567768, '11073': 227.33751699520218, '11230': 228.35519540583718, '23': 228.8186236225512, '10857': 229.77584324070332, '10681': 229.8941971131549, '1759': 229.92186279688204, '51422': 229.99487257497142, '8720': 230.78083687724944, '5641': 230.78839422630978, '1906': 231.00755734906036, '8349': 231.76572217912866, '8985': 232.33751699520215, '10206': 233.40553313674542, '23588': 233.96221325469818, '9217': 234.33751699520212, '701': 234.3450743442625, '9276': 234.39541207517078, '10898': 235.39541207517078, '6117': 236.45087723110763, '836': 236.89919074970095, '58497': 237.39797578768508, '4860': 237.46599192922838, '9375': 237.90931181127564, '3638': 238.32240229708142, '2523': 238.39797578768508, '5580': 238.87651870251983, '1111': 239.01268477408897, '466': 239.21916312275062, '873': 239.44831351859335, '9868': 239.89163340064061, '7077': 239.96720689124425, '9170': 240.92186279688204, '7157': 241.3048576749289, '22908': 241.49365761295553, '29890': 241.52388700919695, '6850': 242.17138910435665, '7168': 242.38286108956433, '11157': 242.9974362874857, '84722': 243.53144435825735, '23368': 244.244398882446, '23244': 244.46585814074584, '8553': 245.92429272091383, '9897': 246.5390017073177, '868': 246.85128294282447, '3156': 247.01255098560645, '23585': 247.86140400439913, '23386': 248.50877231107623, '5743': 248.54898898040986, '3895': 249.4785429148348, '5289': 250.87395499000556, '51097': 252.3425106317482, '6275': 254.00499363654606, '1848': 254.65222815474064, '80204': 258.6574893682518, '51031': 259.8892034766088, '10644': 261.5693648920417, '5588': 262.23184789683955, '7994': 262.3375169952021}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter)

#Loading model
loaded_model = pickle.load(open("model_adjust\CTRP-L1000-6h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)

#Create a new file to record the predicted cell viability values
cell_viability_file = open(r"cell_viability_pred\L1000_CTRP_6h_clpred_all.txt","w",encoding="utf-8")
cell_viability_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_dose\tpert_dose_unit\tpert_idose\tpert_itime\tL1000_is_touchstone\tinchi_key\tpubchem_cid\tCCLE_Target\tDoses (uM)\tEC50 (uM)\tIC50 (uM)\tAmax\tActArea\tActArea_std\tIsEffective\tL1000_CTRP_6h_pred\n")
for index in range(len(meta_data)):
    cell_viability_file.write(str(meta_data.at[index,'sig_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_iname'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_type'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'cell_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_dose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_dose_unit'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_idose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_itime'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'L1000_is_touchstone'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'inchi_key'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pubchem_cid'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'CCLE_Target'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"Doses (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"EC50 (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"IC50 (uM)"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"Amax"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"ActArea"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"ActArea_std"])+"\t")
    cell_viability_file.write(str(meta_data.at[index,"IsEffective"])+"\t") 
    cell_viability_file.write(str(cl_prediction[index])+"\n")

cell_viability_file.close()

Model Loaded Successfully!


In [4]:
#Obtain the lowest cell viability value and output to the file：L1000_CTRP_6h_clpred_min.txt
import pandas as pd

#predcv_file = pd.read_table(r"cell_viability_pred\L1000_CTRP_24h_clpred_all.txt")
predcv_file = pd.read_table(r"cell_viability_pred\L1000_CTRP_6h_clpred_filter.txt")
min_predcv_file = open(r"cell_viability_pred\L1000_CTRP_6h_clpred_min.txt","w",encoding="utf-8")
min_predcv_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_dose\tpert_dose_unit\tpert_idose\tpert_itime\tL1000_is_touchstone\tinchi_key\tpubchem_cid\tCCLE_Target\tDoses (uM)\tEC50 (uM)\tIC50 (uM)\tAmax\tActArea\tActArea_std\tIsEffective\tL1000_CTRP_6h_minpred\n")
for drug in predcv_file['pert_id'].unique():
    for cell_line in predcv_file[predcv_file['pert_id'] == drug]['cell_id'].unique():
        selected_info = predcv_file[predcv_file['pert_id'] == drug][predcv_file['cell_id'] == cell_line]
        selected_info = selected_info.reset_index(drop=True)
        min_cvpred = selected_info['L1000_CTRP_6h_pred'].min()
        min_cvpred_index = selected_info['L1000_CTRP_6h_pred'].argmin()
        if len(selected_info[selected_info['L1000_CTRP_6h_pred'] == min_cvpred]['IsEffective'].unique()) == 1:
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'sig_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_iname'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_type'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'cell_id'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_dose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_dose_unit'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_idose'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pert_itime'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_is_touchstone'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'inchi_key'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'pubchem_cid'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'CCLE_Target'])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"Doses (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"EC50 (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"IC50 (uM)"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"Amax"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"ActArea"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"ActArea_std"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,"IsEffective"])+"\t")
            min_predcv_file.write(str(selected_info.at[min_cvpred_index,'L1000_CTRP_6h_pred'])+"\n")
        
min_predcv_file.close()