# Combine GDSC and CCLE Data

This journal includes code to combine the relevant GDSC and CCLE data into one singular dataset.

The GDSC and CCLE files sourced from DepMap include:
- Drug Sensitivity AUC (Sanger GDSC2)
- Drug Sensitivity IC50 (Sanger GDSC2)
- Omics Absolute CN Gene Public 24Q4
- Harmonized RPPA CCLE
- Batch Corrected Expression Public 24Q4
- miRNA Expression
- Damaging Mutations
- Hotspot Mutations
- Metabolomics

NOTE: We do not need metadata for each cell line id (e.g. ACH-000001) as our model is built purely on molecular profiles (e.g., gene expression, mutations) to predict drug response

In [2]:
import pandas as pd
import numpy as np
import csv

### Step 1: Load the Data from CSV

In [8]:
data_directory = 'data/'
file_names = [
    'Batch_corrected_Expression_Public_24Q4_subsetted.csv',
    'Damaging_Mutations_subsetted.csv', 
    'Harmonized_RPPA_CCLE_subsetted.csv',
    'Hotspot_Mutations_subsetted.csv', 
    'IC50_AUC_merged.csv', 
    'Metabolomics_subsetted.csv',
    'miRNA_Expression_subsetted.csv',
    'Omics_Absolute_CN_Gene_Public_24Q4_subsetted.csv'
    ]

In [9]:
batch_corrected_expression = pd.read_csv(f'{data_directory}{file_names[0]}')
damaging_mutations = pd.read_csv(f'{data_directory}{file_names[1]}')
harmonized_RPPA = pd.read_csv(f'{data_directory}{file_names[2]}')
hotspot_mutations = pd.read_csv(f'{data_directory}{file_names[3]}')
IC50_AUC = pd.read_csv(f'{data_directory}{file_names[4]}')
metabolomics = pd.read_csv(f'{data_directory}{file_names[5]}')
miRNA_expression = pd.read_csv(f'{data_directory}{file_names[6]}')
absolute_copy_number = pd.read_csv(f'{data_directory}{file_names[7]}')

In [23]:
all_data = batch_corrected_expression.merge(damaging_mutations, on='Unnamed: 0')
all_data = all_data.merge(harmonized_RPPA, on='Unnamed: 0')
all_data = all_data.merge(hotspot_mutations, on='Unnamed: 0')
all_data = all_data.merge(metabolomics, on='Unnamed: 0')
all_data = all_data.merge(miRNA_expression, on='Unnamed: 0')
all_data

Unnamed: 0.1,Unnamed: 0,NEMP2_x,SPDYE11,MED6_x,SPATA1_x,HMG20B_x,PITRM1_x,TCIRG1_x,CDKN2B_x,MKRN2OS_x,...,MIMAT0002192,MIMAT0002191,MIMAT0002190,MIMAT0002189,MIMAT0002188,MIMAT0002187,MIMAT0002186,MIMAT0002185,MIMAT0010151,MIMAT0010150
0,ACH-000873,2.281262,0.016496,4.913394,0.592549,4.933815,5.068384,4.910413,0.996458,0.035011,...,5.995485,6.678776,5.898692,6.143638,6.928133,5.995485,5.995485,6.303598,5.721373,7.386552
1,ACH-000860,2.397921,-0.007359,6.064227,0.790612,5.526024,5.495815,5.223437,3.722068,0.486790,...,5.980025,6.523248,6.113742,6.666757,9.254084,6.057667,5.897724,6.235727,5.832890,7.291861
2,ACH-000439,1.961548,0.139871,4.366399,0.574739,5.879936,5.027458,6.596313,-0.026794,0.283322,...,5.766595,6.533875,5.813012,6.063503,6.448240,6.024807,5.766595,6.226509,5.879951,7.363171
3,ACH-000318,2.962951,-0.007359,6.028266,0.636129,4.722188,5.235858,5.235896,0.043358,2.200371,...,5.984134,6.149544,5.951868,6.566663,9.038480,6.218975,5.778734,6.336105,5.867896,6.669310
4,ACH-000789,1.310378,0.039888,5.794821,0.320498,6.220254,6.758748,2.632036,1.905007,0.506708,...,5.848498,6.381110,6.069315,6.260778,6.445594,6.129489,5.800123,6.330917,5.940871,6.637349
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
856,ACH-000979,2.288769,0.016474,4.355408,0.362453,4.983387,5.820018,3.814418,1.936184,1.003271,...,7.844109,6.382494,6.120186,6.245267,8.101030,6.080444,5.939227,6.196528,5.909053,6.860839
857,ACH-000095,3.060390,0.016474,5.489647,0.910464,4.771486,5.113535,0.874038,0.196770,0.190636,...,5.971314,6.791814,5.853996,6.257011,6.914086,6.238405,5.902315,6.463361,5.971314,9.905372
858,ACH-000532,3.054335,0.016474,5.250358,0.561613,4.462247,5.012875,6.018417,2.034527,1.742081,...,6.309795,6.611910,5.953032,6.290019,8.044613,6.329482,5.901591,6.249445,5.705149,6.778209
859,ACH-000874,3.507669,0.016474,4.854990,0.146312,4.401372,4.644323,3.150008,0.075189,0.190636,...,6.168121,6.985273,5.735793,6.770300,8.415531,6.053763,6.077457,6.410409,5.980711,7.058208


In [24]:
absolute_copy_number

Unnamed: 0.1,Unnamed: 0,HNRNPA1P41,PRELID3BP1,BTNL12P,SMIM10L1,SNORD115-15,EGOT,MIPEPP2,RPL9P5,SNORD115-23,...,GPRIN2,EIF4E2,TTYH2,CD63,LPGAT1,EDEM1,TBC1D5,RPL23,GIT2,VPS9D1
0,ACH-000628,3.0,4.0,5.0,2.0,4.0,2.0,4.0,4.0,4.0,...,1.0,3.0,4.0,2.0,4.0,2.0,2.0,2.0,2.0,3.0
1,ACH-003071,3.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,...,1.0,3.0,2.0,3.0,2.0,2.0,2.0,3.0,3.0,2.0
2,ACH-001608,4.0,5.0,3.0,2.0,2.0,3.0,2.0,3.0,2.0,...,3.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0
3,ACH-000278,1.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,...,1.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,1.0
4,ACH-001350,5.0,3.0,4.0,5.0,3.0,2.0,3.0,3.0,3.0,...,2.0,2.0,4.0,3.0,3.0,2.0,3.0,3.0,3.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1602,ACH-001389,2.0,3.0,3.0,3.0,2.0,3.0,2.0,3.0,2.0,...,2.0,2.0,5.0,2.0,3.0,3.0,3.0,2.0,2.0,2.0
1603,ACH-001835,2.0,4.0,4.0,3.0,3.0,2.0,5.0,2.0,3.0,...,6.0,4.0,4.0,3.0,4.0,2.0,2.0,4.0,3.0,3.0
1604,ACH-000296,2.0,3.0,4.0,3.0,2.0,2.0,3.0,3.0,2.0,...,3.0,3.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,2.0
1605,ACH-001971,2.0,4.0,3.0,4.0,4.0,3.0,4.0,4.0,4.0,...,3.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0


Unnamed: 0.1,Unnamed: 0,NEMP2,SPDYE11,MED6,SPATA1,HMG20B,PITRM1,TCIRG1,CDKN2B,MKRN2OS,...,XYLB,CDC25A,NR1H4,NUP153,SUPT7L,GFPT2,USP15,IQSEC1,FGFBP1,FGF19
0,ACH-000873,2.281262,0.016496,4.913394,0.592549,4.933815,5.068384,4.910413,0.996458,0.035011,...,1.522375,2.734131,0.036676,4.489500,4.126495,2.933520,4.679719,2.658149,6.949334,0.067254
1,ACH-000860,2.397921,-0.007359,6.064227,0.790612,5.526024,5.495815,5.223437,3.722068,0.486790,...,2.401933,4.026986,0.102299,4.812712,4.754488,1.668809,5.260394,2.678779,5.844726,0.406430
2,ACH-000439,1.961548,0.139871,4.366399,0.574739,5.879936,5.027458,6.596313,-0.026794,0.283322,...,1.871678,4.355534,-0.004244,4.359530,4.027693,0.044818,4.765310,3.460506,-0.096728,0.001493
3,ACH-000318,2.962951,-0.007359,6.028266,0.636129,4.722188,5.235858,5.235896,0.043358,2.200371,...,1.720886,2.895112,-0.004244,3.991084,4.657785,1.441628,5.563495,1.228177,7.678403,0.001493
4,ACH-001142,1.997125,-0.007359,4.781246,1.004993,5.559541,6.614231,6.000796,5.244141,-0.048123,...,1.816077,2.128269,0.050052,4.144621,4.822296,6.326762,5.743718,3.069706,-0.137807,-0.012049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1668,ACH-001843,2.425672,0.016474,5.064429,1.155729,6.132326,6.435993,6.059272,0.075189,0.294127,...,1.809829,2.525644,0.004200,3.596841,5.268157,4.697931,5.741231,2.436233,0.248119,0.045752
1669,ACH-002074,2.435179,0.016474,4.795754,0.494388,5.265443,5.683454,1.697058,1.882359,0.230385,...,1.768574,4.710684,0.004200,6.343630,4.363348,1.019808,5.406873,4.043767,1.515018,0.091958
1670,ACH-001164,1.893673,0.016474,4.460321,0.362453,5.729537,7.230451,4.420567,1.809993,0.190636,...,1.584718,3.526075,0.019553,3.603731,3.929013,1.789794,4.072432,2.397131,0.590804,0.014186
1671,ACH-003180,2.717188,0.016474,4.991506,0.604765,5.400438,5.396326,4.389461,3.618080,0.190636,...,3.326611,3.303162,0.004200,4.711988,4.557213,3.643244,5.510186,3.488515,0.248119,0.014186
