In [2]:
#Obtain the gene expression data of L1000_PhaseI_NCI60
import pandas as pd
from cmapPy.pandasGEXpress.parse import parse

#1.look for the landmark genes
gene_info = pd.read_csv("LINCS_L1000\PhaseI\GSE92742_Broad_LINCS_gene_info.txt",sep="\t",dtype=str)
landmark_gene_row_ids = gene_info["pr_gene_id"][gene_info["pr_is_lm"] == "1"]

#2.Read the matching file：LINCS_L1000_Chem_PhaseI.csv
sig_ids = []
signature_temp = pd.read_csv("LINCS_L1000_Chem_pred\LINCS_L1000_Chem_PhaseI.csv",dtype=str);
sig_ids = set(signature_temp['sig_id'])
sig_ids = list(sig_ids)

landmark_only_gctoo = parse("LINCS_L1000\PhaseI\GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx",rid=landmark_gene_row_ids,cid=sig_ids)

df = pd.DataFrame(landmark_only_gctoo.data_df.T)
df.to_csv("LINCS_L1000_Chem_pred/LINCS_PhaseI_gene_expression.csv")

In [6]:
#Obtain the gene expression data of L1000_PhaseII_NCI60
import pandas as pd
from cmapPy.pandasGEXpress.parse import parse

#1.look for the landmark genes
gene_info = pd.read_csv("LINCS_L1000\PhaseII\GSE92742_Broad_LINCS_gene_info.txt",sep="\t",dtype=str)
landmark_gene_row_ids = gene_info["pr_gene_id"][gene_info["pr_is_lm"] == "1"]

#2.Read the matching file：LINCS_L1000_Chem_PhaseII.csv
sig_ids = []
signature_temp = pd.read_csv("LINCS_L1000_Chem_pred\LINCS_L1000_Chem_PhaseII.csv",dtype=str);
sig_ids = set(signature_temp['sig_id'])
sig_ids = list(sig_ids)

landmark_only_gctoo = parse("LINCS_L1000\PhaseII\GSE70138_Broad_LINCS_Level5_COMPZ_n118050x12328.gctx",rid=landmark_gene_row_ids,cid=sig_ids)

df = pd.DataFrame(landmark_only_gctoo.data_df.T)
df.to_csv("LINCS_L1000_Chem_pred/LINCS_PhaseII_gene_expression.csv")

In [7]:
#Combine the dataset of LINCS_PhaseI_gene_expression and LINCS_PhaseII_gene_expression
import pandas as pd
import numpy as np

#Create the file to store the merged information:L1000_gene_expression.csv
LINCS_NCI60_gene_expression_file = open(r"LINCS_L1000_Chem_pred\L1000_gene_expression.csv","w")

LINCS_PhaseI_NCI60_gene_expression = pd.read_csv(r"LINCS_L1000_Chem_pred\LINCS_PhaseI_gene_expression.csv")
LINCS_PhaseII_NCI60_gene_expression = pd.read_csv(r"LINCS_L1000_Chem_pred\LINCS_PhaseII_gene_expression.csv")

LINCS_NCI60_gene_expression = pd.concat([LINCS_PhaseI_NCI60_gene_expression,LINCS_PhaseII_NCI60_gene_expression],axis=0,sort=False)
LINCS_NCI60_gene_expression = LINCS_NCI60_gene_expression.reset_index(drop=True)

LINCS_NCI60_gene_expression.to_csv(LINCS_NCI60_gene_expression_file)
LINCS_NCI60_gene_expression_file.close()

In [10]:
#Use the L1000_Achilles_96h model to predict the cell viability value of LINCS_L1000_Chem
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
import pickle
import scipy as sc
from sklearn.metrics import mean_squared_error
pd.set_option('display.max_columns',15)
warnings.filterwarnings("ignore")

#Retrieving the data
def getData():
    #1.Reading the data
    sig_viability_info = pd.read_csv(r"LINCS_L1000_Chem_pred\LINCS_L1000_Chem.csv",sep="	",dtype=str)
    sig_viability_info_filter = sig_viability_info[['sig_id','pert_id','pert_iname','pert_type','cell_id','pert_idose','pert_itime']]
    sig_GE_info = pd.read_csv(r"LINCS_L1000_Chem_pred\L1000_gene_expression.csv",sep=",",dtype=str)
    sig_GE_info_filter = sig_GE_info[:]

    #2.Combining the two datasets
    sig_viability = pd.merge(sig_viability_info_filter,sig_GE_info_filter,left_on='sig_id',right_on='cid')
    select_sig_viavility2 = pd.DataFrame(sig_viability).loc[:,'5720':]
    select_sig_viavility_arr = select_sig_viavility2.copy()
    
    #3.Getting the cell viability value
    meta_data = pd.DataFrame(sig_viability).loc[:,'sig_id':'pert_itime']
    
    #4.Return the gene expression matrix and cell viability value
    return select_sig_viavility_arr,meta_data
    
#Retrieving the data
gene_expression,meta_data = getData()

##Selecting the genes
selected_gene = {'1647': 1.4123577236878908, '11065': 2.5876422763121094, '890': 2.646070460520182, '9833': 4.646070460520182, '7105': 12.883143631583854, '10285': 15.116856368416146, '5111': 16.0, '23029': 16.646070460520182, '80349': 17.292140921040364, '1282': 17.76628726316771, '5236': 18.525853657872656, '79071': 18.71121951319102, '6659': 19.83143631583854, '1831': 20.058428184208072, '1509': 20.1786449868556, '7153': 24.4640650394332, '8091': 30.831436315838545, '4282': 31.17528455262422, '3162': 33.707859078959636, '10180': 34.82471544737578, '26292': 35.704498644728254, '2958': 37.63934959205742, '991': 38.9281300788664, '79090': 39.580921407849345, '873': 46.717940381653776, '1759': 47.343848236785675, '10962': 51.39555555253099, '211': 59.944932250023314, '3315': 65.47078590789596, '26036': 69.31902439489141, '9133': 69.33712736832291, '5796': 72.8213550131444, '54386': 77.22027099990677, '2048': 78.63934959205741, '481': 82.23035230260092, '8878': 83.9798373946117, '10898': 85.6427100262888, '22889': 85.78981030278739, '8446': 89.62254742090053, '23': 91.57420053938658, '11073': 93.24043360529505, '11098': 94.80325203971289, '9375': 95.0168021711569, '2131': 95.21691056567538, '4860': 95.22699186836952, '3725': 101.34384823678569, '387': 102.4989701840216, '3098': 106.48758807905286, '10273': 107.44390243404492, '3028': 109.12021680264752, '1029': 110.10005419725924, '10007': 110.69105690780273, '6275': 114.08661246033373, '1026': 114.3740921448681, '4927': 115.31360432870326, '5468': 120.33040649986016, '1786': 121.49094851328424, '3638': 124.22699186836952, '27346': 126.85831978968959, '9868': 127.57084010515521, '79921': 127.63132792132006, '9926': 128.54601626326095, '10494': 129.50775068444116, '8869': 132.90460703924674, '5347': 136.6641734339517, '993': 138.46612467139, '10589': 139.22233063186354, '11344': 139.25853657872656, '2274': 142.55609756595507, '5359': 144.20216802647528, '11188': 144.51241192094713, '10972': 147.44260163177034, '9212': 147.57962060557475, '9217': 149.5876422763121, '868': 150.427100262888, '1958': 152.31696476293462, '51056': 153.20346882874986, '91949': 154.07989159187096, '3486': 154.5346341582922, '10058': 156.29886178950315, '7157': 157.04704607923932, '5627': 157.4627642371586, '6839': 159.43382113135078, '6603': 160.74146342127344, '4851': 162.24845527603242, '8349': 164.293441723315, '25793': 167.3404878025543, '9552': 168.56747967092383, '9943': 169.37409214486811, '10644': 171.1739837503496, '8720': 172.21018969721266, '5580': 175.90460703924674, '9703': 176.5674796709238, '5921': 180.06720868462762, '10904': 180.25723577645195, '10954': 180.9449322500233, '8508': 185.55273713172372, '30001': 189.9818970265685, '4125': 190.18200542108696, '949': 191.49560974979025, '6709': 196.56281843441786, '11157': 200.78102980236784, '6117': 201.213550131444, '1635': 205.58764227631212, '7398': 206.38883468406823, '9915': 209.25853657872656, '55012': 210.76086719697955, '10013': 211.31436315838542, '25932': 211.61582655243774, '66008': 211.97647696038032, '55556': 215.02818427612564, '2309': 215.81669377663843, '5440': 218.35934960566794, '8985': 220.91468834194086, '5715': 222.61452575016315, '1017': 224.31360432870326, '25803': 226.070569118859, '9805': 227.16856368416146, '2113': 229.2290515003263, '1021': 229.5010298159784, '22908': 230.36195121021714, '64746': 231.8368563820267, '26054': 232.3042818556913, '10682': 232.7166395793792, '5770': 237.45734417097046, '3480': 239.888563697772, '200081': 240.24509484180103, '5110': 241.94829268425465, '23300': 243.1215176049221, '9653': 244.42037939442525, '1454': 246.25387534222054, '9533': 246.52455285559807, '5223': 247.20086722420066, '55324': 247.58092140784936, '10681': 248.54395663130416, '3693': 248.56747967092383, '665': 250.28336042062085, '2920': 251.02558267157644, '8061': 251.8448780527641, '7264': 253.73604335508526, '23326': 255.0067208684628, '1277': 258.53929539479816, '10237': 259.93279131537236, '8553': 259.9503523162115, '5054': 260.09203252652185, '7082': 260.4956097497902, '3383': 261.82807588160716, '780': 264.4956097497902, '10915': 265.5237940259159, '998': 268.1941463557379, '4582': 268.39349592057425, '5048': 268.4620054074764, '2146': 269.8751219608465, '3337': 269.9395121838351, '7077': 271.22493223641277, '8270': 271.6333875532768, '5641': 273.23165310487553, '10782': 273.34850947329164, '80347': 274.40357722326837, '58533': 274.6346883555514, '392': 276.04628724955717, '9170': 276.57886177589256, '3895': 276.63392952586923, '1605': 277.48758807905284, '836': 277.93279131537236, '5525': 279.177344184581, '1846': 279.60108401323765, '9183': 280.039024408502, '4144': 280.6246070528573, '3156': 281.51707315745307, '23585': 281.86428182847015, '4043': 282.16520324993013, '3978': 282.82601624965037, '51097': 283.51035228899036, '965': 284.68845530325353, '9897': 285.16314361797333, '54541': 286.3384281705975, '847': 286.6098645136572, '93487': 287.16986448643604, '2956': 287.75750676274816, '29937': 288.51111111867255, '9276': 289.5721409074298, '4303': 290.47880757863334, '4690': 291.18406505304375, '896': 291.6561517632143, '55893': 291.76758806544234, '2770': 292.1189160003729, '84722': 293.09799456530243, '25825': 294.64010842173957, '9797': 294.80791327621887, '29890': 295.2732791179267, '23386': 296.3384281705975, '54512': 297.20422765843205, '6804': 298.34850947329164, '3300': 299.1075338954042, '23271': 301.98449863111773, '90861': 303.1639024476555, '23588': 305.111436302228, '6390': 305.9395121838351, '58497': 306.29008128908356, '9181': 307.84617885503866, '55604': 309.2592954084087, '581': 309.29756098722856, '6944': 309.3552303417544, '701': 309.99794036804326, '976': 310.02558267157644, '2523': 310.99457993381185, '2896': 313.8738211585719, '2109': 313.9844986311177, '1019': 314.40227642099376, '10206': 315.46200540747645, '7168': 316.6990785785401, '10051': 316.8011924077561, '4780': 317.2652574471893, '55847': 317.98861789503127, '4846': 318.53929539479816, '10320': 320.2290515003263, '6856': 320.63728996010065, '23139': 320.8402168162581, '22883': 320.8911653023212, '1677': 321.4250406309312, '10953': 322.29008128908356, '332': 322.70710024927746, '8480': 323.560758802461, '6182': 327.52043359168454, '11182': 328.55945800018645, '9847': 329.0423848427333, '29082': 329.30092142145986, '11044': 329.3451490390603, '51335': 329.6333875532768, '3682': 329.98731709275665, '207': 330.1988075922439, '7020': 330.64065039433206, '9801': 331.6003251835554, '148022': 333.2538753422205, '6850': 333.41159889400564, '54442': 333.7300813163047, '4893': 336.8011924077561, '54205': 337.5728997371119, '891': 338.8697018946583, '9928': 339.1907859215066, '3122': 340.87848239507787, '23338': 341.5816802375315, '10775': 342.07989159187093, '7849': 342.32910569758553, '64943': 342.883902461266, '3108': 343.2370731710637, '5601': 344.138319776079, '9261': 344.40433605295055, '466': 344.82807588160716, '7485': 349.324986433672, '23161': 349.6078048817005}
gene_expression_filter = np.array(gene_expression[list(selected_gene.keys())],dtype="float32")
dtest = xgb.DMatrix(gene_expression_filter)

#Loading model
loaded_model = pickle.load(open("model_adjust\Achilles-L1000-96h_XGBoost_1.dat","rb"))
print("Model Loaded Successfully!")

#Predicting the cell viability
cl_prediction = loaded_model.predict(dtest)

#Create a new file to record the predicted cell viability values
cell_viability_file = open(r"LINCS_L1000_Chem_pred\L1000_Achilles_96h_clpred_all.txt","w",encoding="utf-8")
cell_viability_file.write("sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_idose\tpert_itime\tL1000_Achilles_96h_pred\n")
for index in range(len(meta_data)):
    cell_viability_file.write(str(meta_data.at[index,'sig_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_iname'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_type'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'cell_id'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_idose'])+"\t")
    cell_viability_file.write(str(meta_data.at[index,'pert_itime'])+"\t")
    cell_viability_file.write(str(cl_prediction[index])+"\n")

cell_viability_file.close()

Model Loaded Successfully!


In [27]:
#Obtain the lowest cell viability value and output to the file
import pandas as pd

orginal_info = pd.read_table(r"LINCS_L1000_Chem_pred\L1000_Achilles_96h_clpred_all.txt",sep="	")
#Create the file to record the information
record_file = open(r"LINCS_L1000_Chem_pred\L1000_Achilles_96h_clpred_process.txt","w",encoding="utf-8")
record_file.write('sig_id\tpert_id\tpert_iname\tpert_type\tcell_id\tpert_idose\tpert_itime\tmin_value\n')

drug_info = set(orginal_info['pert_id'])
cl_info = set(orginal_info['cell_id'])
for drug_index in drug_info:
    for cl_index in cl_info:
        filter_data = orginal_info[orginal_info['pert_id'] == drug_index][orginal_info['cell_id'] == cl_index]
        filter_data = filter_data.reset_index(drop=True)
        if len(filter_data) != 0:
            min_value = filter_data['L1000_Achilles_96h_pred'].min()
            min_value_arg = filter_data['L1000_Achilles_96h_pred'].argmin()
            #max_value = filter_data['L1000_Achilles_96h_pred'].max()
            record_file.write(str(filter_data.at[min_value_arg,'sig_id'])+"\t")
            record_file.write(str(filter_data.at[min_value_arg,'pert_id'])+"\t")
            record_file.write(str(filter_data.at[min_value_arg,'pert_iname'])+"\t")
            record_file.write(str(filter_data.at[min_value_arg,'pert_type'])+"\t")
            record_file.write(str(filter_data.at[min_value_arg,'cell_id'])+"\t")
            record_file.write(str(filter_data.at[min_value_arg,'pert_idose'])+"\t")
            record_file.write(str(filter_data.at[min_value_arg,'pert_itime'])+"\t")
            record_file.write(str(min_value)+"\n")

record_file.close()