In [1]:
import pandas as pd
import numpy as np
import os
import gc
import matplotlib.pyplot as plt

_FOLDER = "data/"
_FOLDER_2 = "figures/"
_FOLDER_3 = "results/"
SAVE_FIGURES = False

from filtering import *
from fitting import *
from pubchem_processing import *
from merging import *

In [2]:
functions = [
             "fsigmoid", 
             "sigmoid_2_param",
             "sigmoid_3_param",
             "sigmoid_4_param",
             "logistic_4_param",
            "ll4_4_param",
            "ll4R_4_param",
            "logLogist_3_param"]

load_drug_properties = True

In [3]:
drug_curves = pd.read_csv(_FOLDER+"normalised_dose_response_data.csv").drop("Unnamed: 0", axis=1)
conc_columns= ["fd_num_"+str(i) for i in range(10)]
response_norm = ['norm_cells_'+str(i) for i in range(10)]
cell_features = pd.read_csv(_FOLDER +"Cell_Line_Features_PANCAN_simple_MOBEM.tsv", sep="\t")

In [4]:
drug_curves.shape[0] == drug_curves["DRUGID_COSMICID"].nunique()

True

In [5]:
if load_drug_properties:
    drug_features = pd.read_csv(_FOLDER+"drug_features_pubchem.csv")
else:
    # ~ 3 mins
    drugs_1 = pd.read_csv(_FOLDER+'drugs_gdsc1.csv')
    drug_features = pd.read_csv(_FOLDER + "Drug_Features.csv")
    drug_features = PreprocessDrugs(drug_features, drugs_1, save_features_names =True, _FOLDER_to_save = _FOLDER_3)
    drug_features.reset_index(inplace=True)
    drug_features.to_csv(_FOLDER+"drug_features_pubchem.csv", index=False)
# Total number of drugs: 250
# Number of not found drugs: 17
# Elements in drugs: 11 ['Pt', 'I', 'H', 'B', 'P', 'Br', 'O', 'S', 'N', 'Cl', 'F']
# Number of targets: 213
# Number of unique pathways: 23

## as in MSc project

In [6]:
fitting_function = "sigmoid_4_param"
filtration_name = "filt_123_04"

df = drug_curves.copy()

df = FilteringSigmoidCurves(drug_curves, filtering_scenario=[1,2,3], \
                        response_columns = response_norm, \
                        first_points_lower_limit = 0.8, last_points_upper_limit = 0.4)

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function,
#                                save_CCL_properties = True, _FOLDER_to_save = _FOLDER_3
                              )

#merged_df[["param_"+str(i) for i in range(1,5)]+["sigmoid_4_param"]].head()

train, test, test2 = SplitTrainTestFor10Drugs(merged_df, train_ratio = 0.8)
train.dropna(axis=0, inplace=True)
test.dropna(axis=0, inplace=True)
test2.dropna(axis=0, inplace=True)

new_folder = _FOLDER + filtration_name
if filtration_name not in os.listdir(_FOLDER):
    os.makedirs(new_folder)
train.to_csv(new_folder+"/train.csv")
test.to_csv(new_folder+"/test.csv")
test2.to_csv(new_folder+"/test2.csv")
train.shape[0], test.shape[0], test2.shape[0]

Original dataset: (225384, 43)


  3%|▎         | 81/2776 [00:00<00:03, 809.73it/s]

1st filtration (Ensure that all the response are less than 1): Filtered dataset: (63325, 43)
2d filtration (Ensure that first and last points form plateus): Filtered dataset: (6321, 45)
3d stage filtration (Specified location of the plateus): Filtered dataset: (2776, 45)


100%|██████████| 2776/2776 [00:03<00:00, 743.31it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 29
Number of samples for ML modelling: 2537
Number of drugs with more than 10 profiles: 67
Number of drug profiles not covered: 251


(1721, 460, 242)

In [7]:
fitting_function = "sigmoid_4_param"
filtration_name = "filt_123_04_r2_09"

df = drug_curves.copy()

df = FilteringSigmoidCurves(drug_curves, filtering_scenario=[1,2,3], \
                        response_columns = response_norm, \
                        first_points_lower_limit = 0.8, last_points_upper_limit = 0.4)

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function,
#                                save_CCL_properties = True, _FOLDER_to_save = _FOLDER_3
                              )
merged_df = merged_df[merged_df["sigmoid_4_param_r2"]>0.9]
#merged_df[["param_"+str(i) for i in range(1,5)]+["sigmoid_4_param"]].head()

train, test, test2 = SplitTrainTestFor10Drugs(merged_df, train_ratio = 0.8)
train.dropna(axis=0, inplace=True)
test.dropna(axis=0, inplace=True)
test2.dropna(axis=0, inplace=True)

new_folder = _FOLDER + filtration_name
if filtration_name not in os.listdir(_FOLDER):
    os.makedirs(new_folder)
train.to_csv(new_folder+"/train.csv")
test.to_csv(new_folder+"/test.csv")
test2.to_csv(new_folder+"/test2.csv")
train.shape[0], test.shape[0], test2.shape[0]

Original dataset: (225384, 43)


  3%|▎         | 87/2776 [00:00<00:03, 749.96it/s]

1st filtration (Ensure that all the response are less than 1): Filtered dataset: (63325, 43)
2d filtration (Ensure that first and last points form plateus): Filtered dataset: (6321, 45)
3d stage filtration (Specified location of the plateus): Filtered dataset: (2776, 45)


100%|██████████| 2776/2776 [00:03<00:00, 754.59it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 29
Number of samples for ML modelling: 2468
Number of drugs with more than 10 profiles: 67
Number of drug profiles not covered: 241


(1678, 447, 232)

## Additional 4th stage

In [8]:
fitting_function = "sigmoid_4_param"
filtration_name = "filt_1234_04"

df = drug_curves.copy()

df = FilteringSigmoidCurves(drug_curves, filtering_scenario = [1,2,3,4], \
                        response_columns = response_norm, \
                        first_points_lower_limit = 0.8, last_points_upper_limit = 0.4)

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function)


train, test, test2 = SplitTrainTestFor10Drugs(merged_df, train_ratio = 0.8)
new_folder = _FOLDER + filtration_name
if filtration_name not in os.listdir(_FOLDER):
    os.makedirs(new_folder)
train.to_csv(new_folder+"/train.csv")
test.to_csv(new_folder+"/test.csv")
test2.to_csv(new_folder+"/test2.csv")
train.shape[0], test.shape[0], test2.shape[0]

Original dataset: (225384, 43)


  3%|▎         | 91/2719 [00:00<00:02, 906.17it/s]

1st filtration (Ensure that all the response are less than 1): Filtered dataset: (63325, 43)
2d filtration (Ensure that first and last points form plateus): Filtered dataset: (6321, 45)
3d stage filtration (Specified location of the plateus): Filtered dataset: (2776, 45)
4th stage filtration (Cut off high ancedent points): Filtered dataset: (2719, 45)


100%|██████████| 2719/2719 [00:03<00:00, 775.06it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 21
Number of samples for ML modelling: 2492
Number of drugs with more than 10 profiles: 67
Number of drug profiles not covered: 245


(1773, 474, 245)

## No filtering - only fitting

In [9]:
%%time

df = drug_curves.copy()

fitting_function = "sigmoid_4_param"
filtration_name = "no_filt"

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function)

merged_df = merged_df[merged_df["sigmoid_4_param_r2"]>0.9]

train, test, test2 = SplitTrainTestFor10Drugs(merged_df, train_ratio = 0.8)
new_folder = _FOLDER + filtration_name
if filtration_name not in os.listdir(_FOLDER):
    os.makedirs(new_folder)
train.to_csv(new_folder+"/train.csv")
test.to_csv(new_folder+"/test.csv")
test2.to_csv(new_folder+"/test2.csv")
print(train.shape[0], test.shape[0], test2.shape[0])

100%|██████████| 225384/225384 [22:36<00:00, 166.10it/s] 


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 88411
Number of samples for ML modelling: 49280
Number of drugs with more than 10 profiles: 205
Number of drug profiles not covered: 60
39292 9928 60
CPU times: user 23min 25s, sys: 24.5 s, total: 23min 50s
Wall time: 23min 41s


## AUC -filtering

In [10]:
df = pd.read_csv("results/filt_auc.csv")

fitting_function = "sigmoid_4_param"
filtration_name = "auc_filt"

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function)


train, test, test2 = SplitTrainTestFor10Drugs(merged_df, train_ratio = 0.8)
new_folder = _FOLDER + filtration_name
if filtration_name not in os.listdir(_FOLDER):
    os.makedirs(new_folder)
train.to_csv(new_folder+"/train.csv")
test.to_csv(new_folder+"/test.csv")
test2.to_csv(new_folder+"/test2.csv")
train.shape[0], test.shape[0], test2.shape[0]

100%|██████████| 122642/122642 [14:23<00:00, 142.07it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 35031
Number of samples for ML modelling: 78650
Number of drugs with more than 10 profiles: 216
Number of drug profiles not covered: 0


(62825, 15825, 0)

In [11]:
df = pd.read_csv("results/filt_auc_02.csv")

fitting_function = "sigmoid_4_param"
filtration_name = "filt_auc_02"

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function)


train, test, test2 = SplitTrainTestFor10Drugs(merged_df, train_ratio = 0.8)
new_folder = _FOLDER + filtration_name
if filtration_name not in os.listdir(_FOLDER):
    os.makedirs(new_folder)
train.to_csv(new_folder+"/train.csv")
test.to_csv(new_folder+"/test.csv")
test2.to_csv(new_folder+"/test2.csv")
train.shape[0], test.shape[0], test2.shape[0]

100%|██████████| 12169/12169 [01:13<00:00, 165.95it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 3196
Number of samples for ML modelling: 8178
Number of drugs with more than 10 profiles: 113
Number of drug profiles not covered: 243


(6303, 1632, 243)

## Test sets from GDSC2

In [12]:
drug_curves = pd.read_csv(_FOLDER+"normalised_dose_response_data_GDCS2_EC_conc.csv", sep= "\t")
drug_curves.shape

(212349, 31)

In [13]:
conc_columns= ["fd_num_"+str(i) for i in range(8)]
response_norm = ['norm_cells_'+str(i) for i in range(8)]
load_drug_properties=False

In [14]:
%%time
if load_drug_properties:
    drug_features = pd.read_csv(_FOLDER+"drug_features_pubchem_gdsc2.csv")
else:
    # ~ 3 mins
    drug_features = pd.read_csv(_FOLDER + "drugs_gdsc2.csv")
    drug_features.columns = ['DRUG_ID', 'Drug_Name', 'Synonyms', 'Target_Pathway', 'Target', 'pubchem_id']
    drug_features = GetPubChemId(drug_features)
    drug_features = PreprocessDrugs(drug_features, drug_features_wih_pubchem_id = True, 
                                    save_features_names =False)
    drug_features.reset_index(inplace=True)
    drug_features = drug_features[drug_features["pubchem_id"]!= "-"]
    drug_features.to_csv(_FOLDER+"drug_features_pubchem_gdsc2.csv", index=False)
    
#columns with drug features can be different!

  0%|          | 0/44 [00:00<?, ?it/s]

Calling PubChem to get pubchem ids


100%|██████████| 44/44 [00:34<00:00,  1.27it/s]
  0%|          | 0/198 [00:00<?, ?it/s]

Total number of drugs: 192
Number of not found drugs: 2
Calling PubChem...


  8%|▊         | 16/198 [00:11<02:10,  1.40it/s]

Error with drug: AZD5363


 13%|█▎        | 25/198 [00:17<02:18,  1.25it/s]

Error with drug: Eg5_9814
Error with drug: JAK1_8709


 22%|██▏       | 44/198 [00:29<01:53,  1.36it/s]

Error with drug: Ulixertinib
Error with drug: Dactinomycin


 24%|██▎       | 47/198 [00:30<01:29,  1.70it/s]

Error with drug: CDK9_5038


 31%|███▏      | 62/198 [00:40<01:30,  1.51it/s]

Error with drug: Teniposide


 33%|███▎      | 65/198 [00:42<01:56,  1.14it/s]

Error with drug: BDP-00009066


 36%|███▌      | 71/198 [00:46<01:34,  1.34it/s]

Error with drug: Staurosporine
Error with drug: ULK1_4989


 44%|████▍     | 88/198 [00:59<01:23,  1.32it/s]

Error with drug: Sinularin


 48%|████▊     | 96/198 [01:04<01:16,  1.34it/s]

Error with drug: VSP34_8731


 57%|█████▋    | 113/198 [01:16<00:59,  1.42it/s]

Error with drug: BPD-00008900


 59%|█████▉    | 117/198 [01:18<00:53,  1.50it/s]

Error with drug: PAK_5339


 67%|██████▋   | 133/198 [01:29<00:48,  1.34it/s]

Error with drug: Dihydrorotenone


 74%|███████▍  | 147/198 [01:40<00:40,  1.26it/s]

Error with drug: CDK9_5576
Error with drug: ERK_6604
Error with drug: JAK_8517
Error with drug: IAP_5620


 81%|████████  | 160/198 [01:47<00:28,  1.32it/s]

Error with drug: Vincristine


 82%|████████▏ | 162/198 [01:47<00:23,  1.55it/s]

Error with drug: Buparlisib


 85%|████████▌ | 169/198 [01:52<00:20,  1.39it/s]

Error with drug: TAF1_5496


 95%|█████████▌| 189/198 [02:06<00:07,  1.28it/s]

Error with drug: Ulixertinib


 97%|█████████▋| 193/198 [02:08<00:03,  1.42it/s]

Error with drug: Mirin


100%|██████████| 198/198 [02:09<00:00,  1.53it/s]


Error with drug: ERK_2440
Error with drug: IRAK4_4710
Error with drug: IGF1R_3801
Processing drug properties...
Exceptions are found : 27
Elements in drugs: 10 ['Pt', 'N', 'Cl', 'F', 'O', 'S', 'P', 'Br', 'I', 'B']
Number of targets: 185
Number of unique pathways: 24
CPU times: user 5.47 s, sys: 286 ms, total: 5.76 s
Wall time: 2min 44s


In [15]:
drug_features2 = pd.read_csv(_FOLDER+"drug_features_pubchem_gdsc2.csv")
drug_features2.shape                                        

(177, 245)

In [16]:
drug_features2

Unnamed: 0,DRUG_ID,Drug_Name,Synonyms,Target,pubchem_id,molecular_weight,elements,2bonds,3bonds,xlogp,...,Metabolism,Mitosis,Other,"Other, kinases",PI3K/MTOR signaling,Protein stability and degradation,RTK signaling,Unclassified,WNT signaling,p53 pathway
0,1559,Luminespib,"AUY922, VER-52296,NVP-AUY922, AUY",HSP90,10096043,465.5,"'C', 'H', 'N', 'O'",9,0,2.3,...,0,0,0,0,0,1,0,0,0,0
1,1058,Pictilisib,"GDC-0941, GDC0941, RG-7621",PI3K (class 1),17755052,513.6,"'C', 'O', 'S', 'H', 'N'",10,0,1.6,...,0,0,0,0,1,0,0,0,0,0
2,1088,Irinotecan,"Camptosar, (+)-Irinotecan, Irinotecanum, irino...",TOP1,60838,586.7,"'C', 'H', 'N', 'O'",10,0,3.0,...,0,0,0,0,0,0,0,0,0,0
3,1549,Sapitinib,AZD8931,"EGFR, ERBB2, ERBB3",11488320,473.9,"'C', 'F', 'Cl', 'O', 'H', 'N'",9,0,4.0,...,0,0,0,0,0,0,0,0,0,0
4,1558,Lapatinib,"Tykerb, Tyverb","EGFR, ERBB2",208908,581.1,"'C', 'F', 'Cl', 'O', 'S', 'H', 'N'",15,0,5.1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,1908,Ulixertinib,"BVD-523, VRT752271","ERK1, ERK2","11719003, 58641927",,,0,0,,...,0,0,0,0,0,0,0,0,0,0
173,1915,AZD3759,-,EGFR,78209992,459.9,"'C', 'F', 'Cl', 'O', 'H', 'N'",9,0,4.1,...,0,0,0,0,0,0,0,0,0,0
174,1930,Telomerase Inhibitor IX,"MST-312, MST 312, MST312",Telomerase,10385095,380.3,"'C', 'H', 'N', 'O'",11,0,3.3,...,0,0,0,0,0,0,0,0,0,0
175,2040,Foretinib,"GSK1363089, XL-880, EXEL-2880, GSK089","MET, KDR, TIE2, VEGFR3/FLT4, RON, PDGFR, FGFR1...",42642645,632.7,"'C', 'F', 'O', 'H', 'N'",13,0,5.5,...,0,0,0,0,0,0,1,0,0,0


In [17]:
fitting_function = "sigmoid_4_param"
filtration_name = "filt_123_04"

df = drug_curves.copy()

df = FilteringSigmoidCurves(drug_curves, filtering_scenario=[1,2,3], \
                        response_columns = response_norm, \
                        first_points_lower_limit = 0.8, last_points_upper_limit = 0.4)

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df= MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function,
#                                save_CCL_properties = True, _FOLDER_to_save = _FOLDER_3
                              )
new_folder = _FOLDER + filtration_name
merged_df.to_csv(new_folder+"/test_gdsc2.csv")
merged_df.shape[0]

Original dataset: (212349, 31)


  7%|▋         | 77/1093 [00:00<00:01, 768.13it/s]

1st filtration (Ensure that all the response are less than 1): Filtered dataset: (49771, 31)
2d filtration (Ensure that first and last points form plateus): Filtered dataset: (3857, 33)
3d stage filtration (Specified location of the plateus): Filtered dataset: (1093, 33)


100%|██████████| 1093/1093 [00:01<00:00, 757.52it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 2


893

In [18]:
df

Unnamed: 0,CELL_LINE_NAME,SHORT_TAG,COSMIC_ID,DRUG_ID,DRUGID_COSMICID,SCAN_ID,MAX_CONC,norm_cells_0,norm_cells_1,norm_cells_2,...,fd_num_2,fd_num_3,fd_num_4,fd_num_5,fd_num_6,fd_num_7,dif_first,dif_last,sigmoid_4_param_r2,sigmoid_4_param
204,22RV1,L78,924100,1941,1941_924100,14769,10.0,1,0.964125,0.725165,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.035875,0.030083,0.997639,"[0.3267808152761417, 1.0551792679193388, -21.0..."
400,23132-87,L55,910924,1873,1873_910924,12704,10.0,1,0.978341,0.831073,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.021659,0.043333,0.983603,"[0.6622687028346836, 1.001310707183258, -4.705..."
132004,NCI-H209,L41,688013,1017,1017_688013,13955,10.0,1,0.969660,0.746992,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.030340,0.009569,0.944187,"[0.6928193573268133, 1.2233220610421172, -14.0..."
982,639-V,L78,906798,1529,1529_906798,12094,10.0,1,0.991725,0.954360,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.008275,0.020701,0.997228,"[0.5598321395239151, 1.06422507908648, -14.852..."
132054,NCI-H209,L73,688013,1190,1190_688013,21953,10.0,1,0.999315,0.678490,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.000685,0.031936,0.997680,"[0.3253251740225935, 1.0338004770189777, -15.2..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130200,NCI-H1963,L55,688010,1873,1873_688010,13715,10.0,1,0.972294,0.866483,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.027706,0.029830,0.943713,"[0.7544174909380578, 1.6111490976382308, -31.0..."
130406,NCI-H1975,L55,924244,1873,1873_924244,11744,10.0,1,0.955655,0.843890,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.044345,0.033536,0.977895,"[0.6069384722318689, 1.128805472803153, -5.575..."
130836,NCI-H2023,L9,1240187,1022,1022_1240187,15420,10.0,1,0.958403,0.582039,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.041597,0.038376,0.916739,"[0.34384555572095443, 0.9909581398266017, -4.8..."
130857,NCI-H2023,L18,1240187,1058,1058_1240187,15420,10.0,1,0.992961,0.866221,...,0.285714,0.428571,0.571429,0.714286,0.857143,1,0.007039,0.019208,0.974643,"[0.6395086531724254, 0.9795904154115005, -4.45..."


## GDSC2 without fitting

In [19]:
%%time

df = drug_curves.copy()

fitting_function = "sigmoid_4_param"
filtration_name = "no_filt"

df = ComputeFittingFunction(df, fitting_function, conc_columns, response_norm)

merged_df = MergeDrugCells(df, cell_features, drug_features, 
                               splitting_needed= True,
                               param_col_name = fitting_function)

merged_df = merged_df[merged_df["sigmoid_4_param_r2"]>0.9]

new_folder = _FOLDER + filtration_name
merged_df.to_csv(new_folder+"/test_gdsc2.csv")
merged_df.shape[0]

100%|██████████| 212349/212349 [25:14<00:00, 140.20it/s]


<function sigmoid_4_param at 0x7f11507f3b90>
Reduced number of samples: 58798
CPU times: user 25min 29s, sys: 7.55 s, total: 25min 36s
Wall time: 25min 29s


In [20]:
merged_df

Unnamed: 0,CELL_LINE_NAME,SHORT_TAG,COSMIC_ID,DRUG_ID,DRUGID_COSMICID,SCAN_ID,MAX_CONC,norm_cells_0,norm_cells_1,norm_cells_2,...,chr9:104248247-104249501(C9orf125)_HypMET,"chr9:115875199-115875738(C9orf109, C9orf110)_HypMET",chr9:123555399-123555899(FBXW2)_HypMET,chr9:140310894-140312457(EXD3)_HypMET,chr9:21974578-21975306(CDKN2A)_HypMET,chr9:35756948-35757339(MSMP)_HypMET,chr9:35791584-35791924(NPR2)_HypMET,chr9:4984543-4985630(JAK2)_HypMET,chr9:86571047-86572027(C9orf64)_HypMET,chr9:98783216-98784364(NCRNA00092)_HypMET
0,22RV1,R1,924100,1003,1003_924100,8965,0.1,1,1.123625,1.130823,...,0,0,0,0,0,0,0,0,0,0
1,22RV1,R1,924100,1003,1003_924100,8966,0.1,1,1.147480,1.183175,...,0,0,0,0,0,0,0,0,0,0
2,22RV1,R1,924100,1003,1003_924100,14769,0.1,1,0.977229,0.953603,...,0,0,0,0,0,0,0,0,0,0
3,22RV1,R1,924100,1003,1003_924100,24663,0.1,1,0.990563,1.002675,...,0,0,0,0,0,0,0,0,0,0
4,22RV1,L2,924100,1004,1004_924100,14769,0.1,1,1.032395,1.016626,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138945,HCC1569,L23,907046,1180,1180_907046,9850,1.0,1,0.998301,0.954344,...,0,0,0,0,0,0,0,0,0,0
138982,SNU-1040,L17,1659823,1626,1626_1659823,4138,10.0,1,0.979195,0.979195,...,0,0,0,0,0,0,0,0,0,0
138987,SNU-1040,L119,1659823,1720,1720_1659823,4120,10.0,1,1.150299,1.127768,...,0,0,0,0,0,0,0,0,0,0
138996,SNU-1040,L95,1659823,1819,1819_1659823,4138,3.0,1,0.959447,0.722733,...,0,0,0,0,0,0,0,0,0,0
