In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
CRISPR_GENE_Dep = pd.read_csv('CRISPRGeneDependency.csv')
CRISPR_GENE_Dep = CRISPR_GENE_Dep.rename(columns={'Unnamed: 0': 'Depmap ID'})
CRISPR_GENE_Dep.head()

Unnamed: 0,Depmap ID,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),...,ZWILCH (55055),ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009)
0,ACH-000001,0.06355,0.017803,0.016486,0.066293,0.029758,0.000815,0.023883,0.050448,0.003767,...,0.068175,0.032055,0.004142,0.001468,0.003247,0.195908,0.039109,0.011347,0.007978,0.437038
1,ACH-000004,0.01667,0.044559,0.035885,0.026364,0.028644,0.020426,0.000832,0.04591,0.002606,...,0.093742,0.493753,0.002883,0.007575,0.005719,0.014641,0.414198,0.006575,0.001914,0.046494
2,ACH-000005,0.040757,0.022681,0.005047,0.008861,0.064044,0.031699,0.004941,0.047061,0.036375,...,0.152467,0.362731,0.029695,0.024869,0.017247,0.078232,0.067087,0.028369,0.019612,0.070876
3,ACH-000007,0.018149,0.051901,0.006559,0.004373,0.012135,0.003699,0.007898,0.107051,0.066871,...,0.197652,0.550288,0.017119,0.001285,0.006273,0.036344,0.464681,0.013396,0.229044,0.51128
4,ACH-000009,0.026656,0.115825,0.011635,0.011006,0.014439,0.021746,0.011194,0.130169,0.018793,...,0.246275,0.295764,0.042915,0.00172,0.055031,0.087271,0.73979,0.010245,0.038641,0.345928


In [3]:
DRC_table = pd.read_excel('DRCtableAUC_merge_DepmapID.xlsx')
DRC_table.head()

Unnamed: 0,Cell name,Depmap ID,"AUC, DEG-35"
0,143B,ACH-001001,0.738025
1,22RV1,ACH-000956,0.543824
2,2313287,ACH-000948,0.362258
3,253J,ACH-000011,0.811752
4,253JBV,ACH-000026,0.713113


In [4]:
#function that merges the AUC data and the biomarker table and then iterates through each gene and calculates R-squared. 
def calculate_r (df, target_column):
    r_values = {}
    
    # Select the target column
    target_data = df[target_column]
    
    # Loop through each column in the DataFrame
    for column in df.columns:
        # Check if the column contains numeric data
        if pd.api.types.is_numeric_dtype(df[column]):
            # Check for missing values in target_data and column
            if target_data.isnull().any() or df[column].isnull().any():
                r_values[column] = np.nan
            else:
                # Check if the column has more than one unique value
                if df[column].nunique() > 1:
                    # Calculate correlation with the target column
                    correlation = np.corrcoef(target_data, df[column])[0, 1]
                    r_values[column] = correlation
                else:
                    r_values[column] = np.nan
    
    return r_values

In [5]:
CRISPR_DEP = pd.merge(CRISPR_GENE_Dep, DRC_table, on = 'Depmap ID', how = 'inner')
CRISPR_DEP.head()

Unnamed: 0,Depmap ID,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),...,ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009),Cell name,"AUC, DEG-35"
0,ACH-000001,0.06355,0.017803,0.016486,0.066293,0.029758,0.000815,0.023883,0.050448,0.003767,...,0.004142,0.001468,0.003247,0.195908,0.039109,0.011347,0.007978,0.437038,NIHOVCAR3,0.98595
1,ACH-000004,0.01667,0.044559,0.035885,0.026364,0.028644,0.020426,0.000832,0.04591,0.002606,...,0.002883,0.007575,0.005719,0.014641,0.414198,0.006575,0.001914,0.046494,HEL,0.614378
2,ACH-000005,0.040757,0.022681,0.005047,0.008861,0.064044,0.031699,0.004941,0.047061,0.036375,...,0.029695,0.024869,0.017247,0.078232,0.067087,0.028369,0.019612,0.070876,HEL9217,0.55677
3,ACH-000007,0.018149,0.051901,0.006559,0.004373,0.012135,0.003699,0.007898,0.107051,0.066871,...,0.017119,0.001285,0.006273,0.036344,0.464681,0.013396,0.229044,0.51128,LS513,0.671215
4,ACH-000011,0.014322,0.029423,0.008681,0.002485,0.05554,0.04573,0.009439,0.018032,0.016507,...,0.01139,0.009365,0.004227,0.015555,0.195118,0.00963,0.125268,0.265885,253J,0.811752


In [9]:
col = 'AUC, DEG-35'
CRISPR_DEP = calculate_r(CRISPR_DEP, col)
CRISPR_DEP = pd.DataFrame(data=CRISPR_DEP, index=[0])
CRISPR_DEP = CRISPR_DEP.rename(index={0:'Crispr Dep'})

In [10]:
CRISPR_DEP.head()

Unnamed: 0,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),AADAC (13),...,ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009),"AUC, DEG-35"
Crispr Dep,-0.040015,0.039989,0.092853,0.036169,-0.053086,-0.027674,0.038352,0.028993,0.103771,-0.008365,...,-0.054618,0.056275,0.02434,0.034784,0.076411,-0.071943,0.083824,0.029641,0.047014,1.0


In [11]:
CRISPR_DEP.to_csv('Results/All cell lines Crispr Dep.csv')

In [14]:
CRISPR_GENE_Effect = pd.read_csv('CRISPRGeneEffect.csv')
CRISPR_GENE_Effect = CRISPR_GENE_Effect.rename(columns={'Unnamed: 0': 'Depmap ID'})
CRISPR_GENE_Effect.head()

Unnamed: 0,Depmap ID,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),...,ZWILCH (55055),ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009)
0,ACH-000001,-0.122637,0.025881,0.034217,-0.128082,-0.031285,0.338046,-0.006439,-0.093642,0.189186,...,-0.131727,-0.039829,0.179405,0.283552,0.204513,-0.289724,-0.062972,0.07418,0.111244,-0.467908
1,ACH-000004,0.019756,-0.08364,-0.060118,-0.027417,-0.036116,-0.001056,0.312876,-0.086897,0.204434,...,-0.170329,-0.454263,0.194583,0.098989,0.126948,0.032983,-0.410392,0.113156,0.234388,-0.088306
2,ACH-000005,-0.107208,-0.023211,0.200204,0.116039,-0.172227,-0.071294,0.20327,-0.127806,-0.090981,...,-0.301695,-0.454969,-0.061959,-0.036427,0.016602,-0.201273,-0.178877,-0.055349,-0.002161,-0.186842
3,ACH-000007,-0.031027,-0.13785,0.067704,0.107988,0.007992,0.124945,0.049548,-0.220824,-0.165669,...,-0.30339,-0.507272,-0.0254,0.236659,0.07201,-0.100344,-0.46216,-0.001555,-0.325964,-0.48666
4,ACH-000009,0.008888,-0.146566,0.084471,0.089419,0.065109,0.027841,0.087943,-0.161369,0.041121,...,-0.255466,-0.288739,-0.037132,0.261444,-0.062391,-0.112703,-0.598698,0.095877,-0.026742,-0.320759


In [15]:
CRISPR_GENE = pd.merge(CRISPR_GENE_Effect, DRC_table, on = 'Depmap ID', how = 'inner')
CRISPR_GENE.head()

Unnamed: 0,Depmap ID,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),...,ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009),Cell name,"AUC, DEG-35"
0,ACH-000001,-0.122637,0.025881,0.034217,-0.128082,-0.031285,0.338046,-0.006439,-0.093642,0.189186,...,0.179405,0.283552,0.204513,-0.289724,-0.062972,0.07418,0.111244,-0.467908,NIHOVCAR3,0.98595
1,ACH-000004,0.019756,-0.08364,-0.060118,-0.027417,-0.036116,-0.001056,0.312876,-0.086897,0.204434,...,0.194583,0.098989,0.126948,0.032983,-0.410392,0.113156,0.234388,-0.088306,HEL,0.614378
2,ACH-000005,-0.107208,-0.023211,0.200204,0.116039,-0.172227,-0.071294,0.20327,-0.127806,-0.090981,...,-0.061959,-0.036427,0.016602,-0.201273,-0.178877,-0.055349,-0.002161,-0.186842,HEL9217,0.55677
3,ACH-000007,-0.031027,-0.13785,0.067704,0.107988,0.007992,0.124945,0.049548,-0.220824,-0.165669,...,-0.0254,0.236659,0.07201,-0.100344,-0.46216,-0.001555,-0.325964,-0.48666,LS513,0.671215
4,ACH-000011,0.02267,-0.057743,0.079679,0.227512,-0.130448,-0.107818,0.070043,-0.003131,0.006741,...,0.048549,0.070936,0.164518,0.013401,-0.296421,0.067705,-0.232453,-0.347234,253J,0.811752


In [16]:
col = 'AUC, DEG-35'
CRISPR_GENE = calculate_r(CRISPR_GENE, col)
CRISPR_GENE = pd.DataFrame(data=CRISPR_GENE, index=[0])
CRISPR_GENE = CRISPR_GENE.rename(index={0:'Crispr Gene'})

In [17]:
CRISPR_GENE.head()

Unnamed: 0,A1BG (1),A1CF (29974),A2M (2),A2ML1 (144568),A3GALT2 (127550),A4GALT (53947),A4GNT (51146),AAAS (8086),AACS (65985),AADAC (13),...,ZWINT (11130),ZXDA (7789),ZXDB (158586),ZXDC (79364),ZYG11A (440590),ZYG11B (79699),ZYX (7791),ZZEF1 (23140),ZZZ3 (26009),"AUC, DEG-35"
Crispr Gene,0.053587,-0.036381,-0.059708,0.002672,0.087945,0.054877,0.00677,0.02762,-0.080665,0.067784,...,0.040814,-0.013957,-0.076354,-0.062923,-0.047656,0.112386,-0.074896,-0.029518,-0.036038,1.0


In [18]:
CRISPR_GENE.to_csv('Results/All cell lines Crispr Gene.csv')

In [19]:
Omics_CN_GENE = pd.read_csv('OmicsCNGene.csv')
Omics_CN_GENE = Omics_CN_GENE.rename(columns={'Unnamed: 0': 'Depmap ID'})
Omics_CN_GENE.head()

Unnamed: 0,Depmap ID,FAM87B (400728),LINC01128 (643837),AL669831.7 (107984850),FAM41C (284593),LINC02593 (100130417),SAMD11 (148398),NOC2L (26155),KLHL17 (339451),PLEKHN1 (84069),...,TXLNGY (246126),KDM5D (8284),AC010889.2 (105377224),TTTY10 (246119),EIF1AY (9086),RPS4Y2 (140032),PRORY (100533178),TTTY13 (83868),TTTY5 (83863),DAZ3 (57054)
0,ACH-000431,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,...,,,,,,,,,,
1,ACH-000358,0.642543,0.642543,0.642543,0.642543,0.642543,0.642543,0.642543,0.642543,0.642543,...,0.666204,0.666204,0.666204,0.666204,0.666204,0.666204,0.666204,0.666204,0.666204,0.666204
2,ACH-000207,0.932707,0.932707,0.932707,0.932707,0.932707,0.932707,0.932707,0.932707,0.932707,...,,,,,,,,,,
3,ACH-002486,1.125419,1.125419,1.125419,1.125419,1.125419,1.125419,1.125419,1.125419,1.125419,...,,,,,,,,,,
4,ACH-001705,0.829411,0.829411,0.829411,0.829411,0.829411,0.829411,0.829411,0.829411,0.829411,...,,,,,,,,,,


In [20]:
Omics_CN_GENE = pd.merge(Omics_CN_GENE, DRC_table, on = 'Depmap ID', how = 'inner')
Omics_CN_GENE.head()

Unnamed: 0,Depmap ID,FAM87B (400728),LINC01128 (643837),AL669831.7 (107984850),FAM41C (284593),LINC02593 (100130417),SAMD11 (148398),NOC2L (26155),KLHL17 (339451),PLEKHN1 (84069),...,AC010889.2 (105377224),TTTY10 (246119),EIF1AY (9086),RPS4Y2 (140032),PRORY (100533178),TTTY13 (83868),TTTY5 (83863),DAZ3 (57054),Cell name,"AUC, DEG-35"
0,ACH-000431,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,0.938003,...,,,,,,,,,NCIH1694,0.875704
1,ACH-000680,0.752561,0.752561,0.752561,0.752561,0.752561,0.752561,0.752561,0.752561,0.752561,...,,,,,,,,,SW948,0.97066
2,ACH-000750,0.710801,0.710801,0.710801,0.710801,0.710801,0.710801,0.710801,0.710801,0.710801,...,,,,,,,,,LOXIMVI,0.76878
3,ACH-000759,0.589654,0.589654,0.589654,0.589654,0.589654,0.589654,0.589654,0.589654,0.589654,...,,,,,,,,,MDAMB175VII,0.427436
4,ACH-000800,1.278302,1.278302,1.278302,1.278302,1.278302,1.278302,1.278302,1.278302,1.278302,...,0.337099,0.315289,0.315289,0.315289,0.315289,0.315289,0.315289,0.382629,NCIH446,0.527345


In [21]:
col = 'AUC, DEG-35'
Omics_CN_GENE = calculate_r(Omics_CN_GENE, col)
Omics_CN_GENE = pd.DataFrame(data=Omics_CN_GENE, index=[0])
Omics_CN_GENE = Omics_CN_GENE.rename(index={0:'Omics_CN_Gene'})

In [22]:
Omics_CN_GENE.head()

Unnamed: 0,FAM87B (400728),LINC01128 (643837),AL669831.7 (107984850),FAM41C (284593),LINC02593 (100130417),SAMD11 (148398),NOC2L (26155),KLHL17 (339451),PLEKHN1 (84069),PERM1 (84808),...,KDM5D (8284),AC010889.2 (105377224),TTTY10 (246119),EIF1AY (9086),RPS4Y2 (140032),PRORY (100533178),TTTY13 (83868),TTTY5 (83863),DAZ3 (57054),"AUC, DEG-35"
Omics_CN_Gene,0.125191,0.121578,0.11984,0.115097,0.115097,0.120741,0.120985,0.119367,0.119837,0.116752,...,,,,,,,,,,1.0


In [23]:
Omics_CN_GENE.to_csv('Results/All cell lines Omics_CN_GENE.csv')

In [24]:
Omics_EPCG_TPMlogp1 = pd.read_csv('OmicsExpressionProteinCodingGenesTPMLogp1.csv')
Omics_EPCG_TPMlogp1 = Omics_EPCG_TPMlogp1.rename(columns={'Unnamed: 0': 'Depmap ID'})
Omics_EPCG_TPMlogp1.head()

Unnamed: 0,Depmap ID,TSPAN6 (7105),TNMD (64102),DPM1 (8813),SCYL3 (57147),C1orf112 (55732),FGR (2268),CFH (3075),FUCA2 (2519),GCLC (2729),...,H3C2 (8358),H3C3 (8352),AC098582.1 (8916),DUS4L-BCAP29 (115253422),C8orf44-SGK3 (100533105),ELOA3B (728929),NPBWR1 (2831),ELOA3D (100506888),ELOA3 (162699),CDR1 (1038)
0,ACH-001113,4.331992,0.0,7.36466,2.792855,4.471187,0.028569,1.226509,3.044394,6.500005,...,2.689299,0.189034,0.201634,2.130931,0.555816,0.0,0.275007,0.0,0.0,0.0
1,ACH-001289,4.567424,0.584963,7.106641,2.543496,3.50462,0.0,0.189034,3.813525,4.221877,...,1.286881,1.049631,0.321928,1.464668,0.632268,0.0,0.014355,0.0,0.0,0.0
2,ACH-001339,3.15056,0.0,7.379118,2.333424,4.228049,0.056584,1.31034,6.687201,3.682573,...,0.594549,1.097611,0.831877,2.946731,0.475085,0.0,0.084064,0.0,0.0,0.042644
3,ACH-001538,5.08534,0.0,7.154211,2.545968,3.084064,0.0,5.86839,6.165309,4.489928,...,0.214125,0.632268,0.298658,1.641546,0.443607,0.0,0.028569,0.0,0.0,0.0
4,ACH-000242,6.729417,0.0,6.537917,2.456806,3.867896,0.799087,7.208478,5.570159,7.127117,...,1.117695,2.358959,0.084064,1.910733,0.0,0.0,0.464668,0.0,0.0,0.0


In [25]:
Omics_EPCG = pd.merge(Omics_EPCG_TPMlogp1, DRC_table, on = 'Depmap ID', how = 'inner')
Omics_EPCG.head()

Unnamed: 0,Depmap ID,TSPAN6 (7105),TNMD (64102),DPM1 (8813),SCYL3 (57147),C1orf112 (55732),FGR (2268),CFH (3075),FUCA2 (2519),GCLC (2729),...,AC098582.1 (8916),DUS4L-BCAP29 (115253422),C8orf44-SGK3 (100533105),ELOA3B (728929),NPBWR1 (2831),ELOA3D (100506888),ELOA3 (162699),CDR1 (1038),Cell name,"AUC, DEG-35"
0,ACH-001113,4.331992,0.0,7.36466,2.792855,4.471187,0.028569,1.226509,3.044394,6.500005,...,0.201634,2.130931,0.555816,0.0,0.275007,0.0,0.0,0.0,LC1SQSF,0.992851
1,ACH-001289,4.567424,0.584963,7.106641,2.543496,3.50462,0.0,0.189034,3.813525,4.221877,...,0.321928,1.464668,0.632268,0.0,0.014355,0.0,0.0,0.0,COGAR359,0.942068
2,ACH-000242,6.729417,0.0,6.537917,2.456806,3.867896,0.799087,7.208478,5.570159,7.127117,...,0.084064,1.910733,0.0,0.0,0.464668,0.0,0.0,0.0,RT4,0.684817
3,ACH-000233,0.056584,0.0,6.094236,3.971773,3.731183,0.028569,6.093602,3.033863,3.422233,...,0.641546,3.157044,0.226509,0.0,0.0,0.056584,0.0,0.0,DEL,0.797382
4,ACH-000461,4.017031,0.0,6.534497,2.226509,3.02148,0.028569,0.084064,5.588565,6.380937,...,0.286881,1.304511,0.422233,0.0,0.014355,0.028569,0.0,0.0,SNU1196,0.919561


In [26]:
col = 'AUC, DEG-35'
Omics_EPCG = calculate_r(Omics_EPCG, col)
Omics_EPCG = pd.DataFrame(data=Omics_EPCG, index=[0])
Omics_EPCG = Omics_EPCG.rename(index={0:'Omics_EPCG'})

In [27]:
Omics_EPCG.head()

Unnamed: 0,TSPAN6 (7105),TNMD (64102),DPM1 (8813),SCYL3 (57147),C1orf112 (55732),FGR (2268),CFH (3075),FUCA2 (2519),GCLC (2729),NFYA (4800),...,H3C3 (8352),AC098582.1 (8916),DUS4L-BCAP29 (115253422),C8orf44-SGK3 (100533105),ELOA3B (728929),NPBWR1 (2831),ELOA3D (100506888),ELOA3 (162699),CDR1 (1038),"AUC, DEG-35"
Omics_EPCG,0.075598,-0.119561,0.123418,0.000747,0.122261,-0.012976,0.101214,0.082543,0.056817,-0.033863,...,-0.026925,-0.112308,0.05763,-0.053566,0.003002,0.049504,0.021936,0.026022,0.064571,1.0


In [28]:
Omics_EPCG.to_csv('Results/All cell lines Omics_EPCG.csv')

In [30]:
All_dfs = [CRISPR_DEP, CRISPR_GENE, Omics_CN_GENE, Omics_EPCG]
result = pd.concat(All_dfs)
result.to_csv('Results/All cell lines Combined.csv')

In [31]:
result= result.transpose()
result.to_csv('Results/All cell lines Combined2.csv')

In [40]:
DRC_TP53_MUT = pd.read_excel('DRCtableAUC_merge_DepmapID.xlsx', 'TP53 MUT')
DRC_TP53_MUT = DRC_TP53_MUT.rename(columns={'Depmap Id':'Depmap ID'})

In [43]:
CRISPR_DEP = pd.merge(CRISPR_GENE_Dep, DRC_TP53_MUT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
CRISPR_DEP = calculate_r(CRISPR_DEP, col)
CRISPR_DEP = pd.DataFrame(data=CRISPR_DEP, index=[0])
CRISPR_DEP = CRISPR_DEP.rename(index={0:'Crispr Dep'})
CRISPR_DEP.to_csv('Results/TP53 MUT Crispr Dep.csv')

In [44]:
CRISPR_GENE = pd.merge(CRISPR_GENE_Effect, DRC_TP53_MUT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
CRISPR_GENE = calculate_r(CRISPR_GENE, col)
CRISPR_GENE = pd.DataFrame(data=CRISPR_GENE, index=[0])
CRISPR_GENE = CRISPR_GENE.rename(index={0:'Crispr Gene'})
CRISPR_DEP.to_csv('Results/TP53 MUT Crispr Gene.csv')

In [47]:
Omics_CN_GENE = pd.read_csv('OmicsCNGene.csv')
Omics_CN_GENE = Omics_CN_GENE.rename(columns={'Unnamed: 0': 'Depmap ID'})
Omics_CN_GENE = pd.merge(Omics_CN_GENE, DRC_TP53_MUT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
Omics_CN_GENE = calculate_r(Omics_CN_GENE, col)
Omics_CN_GENE = pd.DataFrame(data=Omics_CN_GENE, index=[0])
Omics_CN_GENE = Omics_CN_GENE.rename(index={0:'Omics CN Gene'})
Omics_CN_GENE.to_csv('Results/TP53 MUT Omics_CN_GENE.csv')

In [48]:
Omics_EPCG = pd.merge(Omics_EPCG_TPMlogp1, DRC_TP53_MUT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
Omics_EPCG = calculate_r(Omics_EPCG, col)
Omics_EPCG = pd.DataFrame(data=Omics_EPCG, index=[0])
Omics_EPCG = Omics_EPCG.rename(index={0:'Omics EPCG'})
Omics_EPCG.to_csv('Results/TP53 MUT Omics_EPCG.csv')

In [49]:
All_dfs = [CRISPR_DEP, CRISPR_GENE, Omics_CN_GENE, Omics_EPCG]
result = pd.concat(All_dfs)
result= result.transpose()
result.to_csv('Results/TP53 MUT Combined.csv')

In [50]:
DRC_TP53_WT = pd.read_excel('DRCtableAUC_merge_DepmapID.xlsx', 'TP53 WT')

In [51]:
CRISPR_DEP = pd.merge(CRISPR_GENE_Dep, DRC_TP53_WT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
CRISPR_DEP = calculate_r(CRISPR_DEP, col)
CRISPR_DEP = pd.DataFrame(data=CRISPR_DEP, index=[0])
CRISPR_DEP = CRISPR_DEP.rename(index={0:'Crispr Dep'})
CRISPR_DEP.to_csv('Results/TP53 WT Crispr Dep.csv')

In [52]:
CRISPR_GENE = pd.merge(CRISPR_GENE_Effect, DRC_TP53_WT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
CRISPR_GENE = calculate_r(CRISPR_GENE, col)
CRISPR_GENE = pd.DataFrame(data=CRISPR_GENE, index=[0])
CRISPR_GENE = CRISPR_GENE.rename(index={0:'Crispr Gene'})
CRISPR_DEP.to_csv('Results/TP53 WT Crispr Gene.csv')

In [53]:
Omics_CN_GENE = pd.read_csv('OmicsCNGene.csv')
Omics_CN_GENE = Omics_CN_GENE.rename(columns={'Unnamed: 0': 'Depmap ID'})
Omics_CN_GENE = pd.merge(Omics_CN_GENE, DRC_TP53_WT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
Omics_CN_GENE = calculate_r(Omics_CN_GENE, col)
Omics_CN_GENE = pd.DataFrame(data=Omics_CN_GENE, index=[0])
Omics_CN_GENE = Omics_CN_GENE.rename(index={0:'Omics CN Gene'})
Omics_CN_GENE.to_csv('Results/TP53 WT Omics_CN_GENE.csv')

In [54]:
Omics_EPCG = pd.merge(Omics_EPCG_TPMlogp1, DRC_TP53_WT, on = 'Depmap ID', how = 'inner')
col = 'AUC, DEG-35'
Omics_EPCG = calculate_r(Omics_EPCG, col)
Omics_EPCG = pd.DataFrame(data=Omics_EPCG, index=[0])
Omics_EPCG = Omics_EPCG.rename(index={0:'Omics EPCG'})
Omics_EPCG.to_csv('Results/TP53 WT Omics_EPCG.csv')

In [55]:
All_dfs = [CRISPR_DEP, CRISPR_GENE, Omics_CN_GENE, Omics_EPCG]
result = pd.concat(All_dfs)
result= result.transpose()
result.to_csv('Results/TP53 WT Combined.csv')

In [2]:
df1 = pd.read_csv('Results/All cell lines Combined2.csv')

In [3]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Crispr Dep,Crispr Gene,Omics_CN_Gene,Omics_EPCG
0,A1BG (1),-0.040015,0.053587,-0.037946,-0.093893
1,A1CF (29974),0.039989,-0.036381,-0.011779,-0.079961
2,A2M (2),0.092853,-0.059708,-0.052906,-0.011244
3,A2ML1 (144568),0.036169,0.002672,-0.053818,-0.038121
4,A3GALT2 (127550),-0.053086,0.087945,0.031896,0.050417


In [4]:
print(len(df1))

25149


In [5]:
df1 = df1.dropna()
print(len(df1))

16919


In [7]:
df1.to_csv('Results/All cell lines Combined_2 no NAN.csv')

In [23]:
df1 = pd.read_csv('Results/TP53 MUT Combined.csv')

In [24]:
df1 = df1.rename(columns={'Crispr Gene.1':'Omics_CN_Gene', 'Crispr Gene.2':'Omics_EPCG'})

In [25]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Crispr Dep,Crispr Gene,Omics_CN_Gene,Omics_EPCG
0,A1BG (1),0.118788,-0.080818,-0.044104,-0.04488
1,A1CF (29974),-0.01038,0.033377,0.021461,-0.138938
2,A2M (2),0.064295,-0.021111,-0.102846,0.007831
3,A2ML1 (144568),0.011556,0.023815,-0.102391,-0.057117
4,A3GALT2 (127550),-0.035414,0.093598,0.024758,0.011895


In [26]:
print(len(df1))

25149


In [27]:
df1 = df1.dropna()
print(len(df1))

16918


In [28]:
df1.to_csv('Results/TP53 MUT Combined no NAN.csv')

In [14]:
df1 = pd.read_csv('Results/TP53 WT Combined.csv')

In [15]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Crispr Dep,Crispr Gene,Crispr Gene.1,Crispr Gene.2
0,A1BG (1),-0.160731,0.169842,0.004919,-0.114786
1,A1CF (29974),0.063812,-0.061233,-0.11116,-0.027236
2,A2M (2),0.076446,-0.056562,0.050132,0.041095
3,A2ML1 (144568),0.013304,0.009898,0.046604,-0.125952
4,A3GALT2 (127550),-0.015246,0.037298,-0.029117,0.123618


In [18]:
df1 = df1.rename(columns={'Crispr Gene.1':'Omics_CN_Gene', 'Crispr Gene.2':'Omics_EPCG'})

In [19]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Crispr Dep,Crispr Gene,Omics_CN_Gene,Omics_EPCG
0,A1BG (1),-0.160731,0.169842,0.004919,-0.114786
1,A1CF (29974),0.063812,-0.061233,-0.11116,-0.027236
2,A2M (2),0.076446,-0.056562,0.050132,0.041095
3,A2ML1 (144568),0.013304,0.009898,0.046604,-0.125952
4,A3GALT2 (127550),-0.015246,0.037298,-0.029117,0.123618


In [20]:
print(len(df1))

25149


In [21]:
df1 = df1.dropna()
print(len(df1))

16977


In [22]:
df1.to_csv('Results/TP53 WT Combined no NAN.csv')