# Pan Cancer EGFR 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
import statsmodels.stats.multitest
#sys.path.append('C:\\Users\\brittany\\henderson\\GitHub\\GBM_for_CPTAC\\')
#import cis_functions as f
#sys.path.append(R'C:\Users\Lindsey\Documents\GitHub\WhenMutationsDontMatter\\')
#import plot_utils as p


import cptac
import cptac.utils as u

  import pandas.util.testing as tm


In [2]:

'''
@Param df: Dataframe. Contains numeric values (such as proteomics) for linear regression
@Param label_column: String. Name of column that will be your x axis and will be compared to all values in df unless otherwise specified. 
@Param alpha: significant level
@Param comparison_columns: columns that will be looped through and used as y axis for linear regression. 
All other columns beside label column unless specified here. 
@Param correction_method: String. Specifies method of adjustment for multiple testing. See -
https://www.statsmodels.org/stable/generated/statsmodels.stats.multitest.multipletests.html
    - for documentation and available methods.

This function will return a data frame will all significant linear regressions. The data frame includes the comparison, slope, R-squared, and P-value. 
'''
def wrap_lin_regression(df,label_column, alpha=.05,comparison_columns=None,correction_method='bonferroni',return_all = True):
    

    
    '''If no comparison columns specified, use all columns except the specified labed column'''
    if not comparison_columns:
        comparison_columns = list(df.columns)
        comparison_columns.remove(label_column)
    '''Store comparisons,p-values, correlation in their own array'''
    comparisons = []
    pvals = []
    correlation=[]
    
    
    '''Format results in a pandas dataframe'''
    newdf = pd.DataFrame(columns=['Comparison','Correlation','P_value'])
    for inter_gene in comparison_columns:
        #create subset df with interacting gene/ gene (otherwise drop NaN drops everything)
        df_subset = df[[label_column,inter_gene]]
        #do a linear regression to see if it's a meaningful association
        #dropna will remove rows with nan
        df_subset = df_subset.dropna(axis=0, how="any")
        count_row = df_subset.shape[0]
        if count_row > 20:
            x1 = df_subset[[label_column]].values
            y1 = df_subset[[inter_gene]].values
            x1 = x1[:,0]
            y1 = y1[:,0]

            #slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x1,y1)
            corr, pval = scipy.stats.pearsonr(x1,y1)

            comparisons.append(inter_gene)
            pvals.append(pval)
            correlation.append(corr)
            #slope_val.append(slope)
        
    '''Correct for multiple testing to determine if each comparison meets the new cutoff'''
    results = statsmodels.stats.multitest.multipletests(pvals=pvals, alpha=alpha, method=correction_method)
    reject = results[0]
        
    if return_all:
        for i in range(0,len(comparisons)):
            newdf = newdf.append({'Comparison': comparisons[i],"Correlation": correlation[i],'P_value': pvals[i]}, ignore_index=True)
        
    '''Else only add significant comparisons'''
    if (return_all == False):
            for i in range(0, len(reject)):
                if reject[i]:
                    newdf = newdf.append({'Comparison': comparisons[i],"Slope": slope_val[i], 'R_squared': r_squared[i], 'P_value': pvals[i]}, ignore_index=True)
                    
    '''Sort dataframe by ascending p-value'''
    newdf = newdf.sort_values(by='P_value', ascending=True)
    '''If results df is not empty, return it, else return None'''
    return newdf



       
       

In [3]:


interacting_genes = ['CD81', 'CLEC4G', 'NFKBIA', 'MAPK1', 'FOXC1', 'FHIT', 'RAPGEF1', 'SP1', 'MAG', 'ADH7', 'PIP5K2C', 'TMSB4X', 'COL6A3', 'CRK', 'SLC39A6', 'TGFBR2', 'ELK4', 'PRKCD', 'PLD2', 'EPS15', 'GNG11', 'RASSF5', 'GPX2', 'TEAD4', 'ENC1', 'SLC5A3', 'PIK3C2G', 'NOX4', 'CSPG4', 'COMP', 'IL10RB', 'TSC2', 'LBR', 'GSK3B', 'COL4A1', 'CDK2', 'SLC6A9', 'PIK3CA', 'CACNG3', 'CYP2C19', 'BCAN', 'PPP1R12C', 'PIK3CG', 'VCL', 'VTN', 'ACADM', 'COL11A2', 'TCF7L2', 'HSPA8', 'TYRO3', 'AMIGO2', 'IFNA7', 'SFRP1', 'IL1A', 'GNAI2', 'HAVCR1', 'PRKD1', 'SLC2A5', 'CACNB4', 'PRKD2', 'NR3C1', 'ALK', 'ICAM2', 'G6PC3', 'SQSTM1', 'CREB5', 'ITGA5', 'JUNB', 'CHRM2', 'NTF4', 'PLA2G4E', 'SLC39A8', 'TGFB2', 'MYL7', 'CDH3', 'IFNGR1', 'CES2', 'SKP1', 'PLA1A', 'LDHA', 'STAT5', 'MAPT', 'SLC39A9', 'IL6', 'SLC5A8', 'IFNG', 'SLC39A13', 'MAP4K3', 'PPP2R1B', 'ITGAE', 'HDC', 'PLA2G4A', 'FOXM1', 'HLA-DOA', 'SEMA6A', 'IRS4', 'PLCE1', 'APC2', 'CTNNA2', 'NOTCH3', 'CDK6', 'ANGPT1', 'GRB10', 'LRP6', 'NFKB1', 'EDN2', 'UGT1A4', 'LAMTOR3', 'LAMB3', 'PLTP', 'IKKA', 'APOA2', 'TNFSF13B', 'TOP1', 'THBS3', 'FZD5', 'RPS6KA5', 'RPS6KB2', 'TEK', 'HSPA6', 'PLCB3', 'BCL2L11', 'MYL9', 'COL9A2', 'PSAT1', 'NOTCH4', 'TEAD2', 'SGK3', 'RGMA', 'HLA-F', 'IFNLR1', 'SLC6A3', 'NFATC1', 'EFNA1', 'CRABP1', 'PLA2G16', 'HIF3A', 'CACNB2', 'WNT5A', 'IL10RA', 'PRLR', 'IL11RA', 'MYH10', 'ARRB2', 'RAC2', 'FLNB', 'MAPK6', 'SMAD2', 'CBR1', 'MKNK2', 'PIK3R2', 'TSC1', 'CALML6', 'PPP3R2', 'TPM3', 'CEBPB', 'PFKL', 'CDH18', 'C1QB', 'DVL3', 'ITGA6', 'FGD1', 'ROS', 'HEY1', 'ZAP70', 'PFKFB1', 'PPP2R2B', 'BCL2L1', 'PIK3C2A', 'ACTG1', 'MYLPF', 'KLK15', 'LAMB4', 'CTSB', 'AJUBA', 'SLC39A4', 'TXN', 'DBI', 'RALA', 'HLA-DRA', 'TTC9', 'CSF2RA', 'COL1A2', 'CASP3', 'WNT3', 'CSF2RB', 'IFNAR2', 'SLIT2', 'ENPP3', 'JAG1', 'CUL1', 'FAM91A1', 'GNGT1', 'NFKB2', 'UGT2B4', 'CFL2', 'CDH17', 'BCL2', 'SGK2', 'SRMS', 'TIE1', 'VPS16', 'PIP5K1B', 'SLC5A12', 'GATA4', 'RALGDS', 'C3orf10', 'RASGRP2', 'TNXB', 'LAMB2', 'HLA-A', 'CPT2', 'LAMC1', 'COL5A2', 'CXCL10', 'NOG', 'CSF1R', 'CDH4', 'IL11', 'ABCG5', 'ITGAL', 'PRKCG', 'HNF1A', 'RALB', 'JAK1', 'AKT1S1', 'HLA-DQA2', 'CSF3R', 'MDM4', 'MGST3', 'ADAM17', 'GCLC', 'PPP2CB', 'IKBKG', 'USF', 'FGF23', 'ABCC4', 'ITGB7', 'RFC4', 'TCL1B', 'SLC6A17', 'PRKAA2', 'ANG3', 'LPAR6', 'NTRK2', 'FOXC2', 'PPP2R5E', 'SYNGAP1', 'SOS1', 'IFNAR1', 'IL21R', 'TGFB3', 'WAS', 'GSTM5', 'PTPN7', 'APOC3', 'AQP1', 'IL27RA', 'CACNG5', 'CASP8', 'ELK1', 'DLL4', 'JAK3', 'ACOX1', 'PARVB', 'GJA1', 'PRB1', 'MAP2K6', 'THBS1', 'FOXO3', 'RGL2', 'CCKBR', 'S100P', 'IFNA10', 'SLC5A4', 'WNT3A', 'ARHGEF28', 'GNG2', 'CDH15', 'IFNA13', 'SOS2', 'JMJD7-PLA2G4B', 'BRAP', 'GPAM', 'RAP1A', 'HLA-DMA', 'PPM1B', 'NFATC3', 'GNA13', 'PRKAR2B', 'PPP1R14A', 'SLC5A7', 'C1QBP', 'GYS1', 'PLOD2', 'WASF1', 'ABCC5', 'HLA-DRB1', 'RASGRP3', 'G6PC2', 'VEGFC', 'HLA-G', 'EPOR', 'SMARCA1', 'COL9A1', 'CYP7A1', 'CBL', 'GSN', 'UGT2B7', 'PCK1', 'CACNA1B', 'EPHA4', 'CXCL8', 'PELO', 'PGK2', 'CD209', 'FGF17', 'GNG7', 'HSPA1L', 'ITGAV', 'TNFRSF1A', 'FN1', 'CHRM5', 'NOS1', 'CACNG4', 'MAPK7', 'PRKAR2A', 'CTNNA1', 'STK4', 'PLA2G12B', 'ACTR2', 'RICTOR', 'NOTCH1', 'CLDN1', 'EHHADH', 'BHLHE41', 'CYP1A2', 'S100A6', 'FTL', 'SMC1A', 'PLCG1', 'LRRC8A', 'ERK2', 'CREB3L1', 'SMAD3', 'RAC3', 'EPB41L4B', 'BDKRB2', 'EGR1', 'VPS11', 'SLC2A2', 'SLC9A1', 'FZD10', 'PRKAR1B', 'KAT5', 'ITGA3', 'CCNE1', 'SLC6A8', 'MAPK11', 'PPP2R1A', 'REL', 'DUSP1', 'HEYL', 'PRKCH', 'RTN4R', 'COL4A2', 'FAS', 'ERBB2', 'DIAPH1', 'RAB7A', 'DUSP3', 'MAPKAPK3', 'CAV1', 'CUL4A', 'AIP', 'BCR', 'WASF2', 'NRG1', 'ITGA1', 'BMP4', 'CYCS', 'SNAI2', 'BRIX1', 'PDYN', 'ARHGEF4', 'CACNA1I', 'RAPGEF5', 'CACNA2D4', 'VCAN', 'PPP2CA', 'RAD51', 'CACNA2D3', 'RXRB', 'GNG3', 'CALML5', 'CAB39', 'MAPK14', 'GSTA2', 'ITGA9', 'MMP2', 'F2', 'KITLG', 'FIGF', 'RASGRP4', 'VPS33A', 'ARNT', 'MAPK15', 'WNT7B', 'MSN', 'VAV2', 'CDH11', 'PGD', 'COL4A5', 'DNER', 'CD48', 'PPP5C', 'IL3', 'PARP1', 'EFNA2', 'PLA2G10', 'PRDX1', 'PARVG', 'OMG', 'EFNA5', 'UPK3A', 'PPP2R3B', 'APEX1', 'GPR153', 'ECSIT', 'ITGB1', 'PRKCZ', 'BAD', 'DDB1', 'CACNA1A', 'RAB9A', 'AXIN2', 'ABHD2', 'IGF1R', 'PPP2R2D', 'SLC7A5', 'MAP3K12', 'ITGA4', 'SOCS3', 'PPP1R12B', 'NCKAP1', 'PLA2G4F', 'CDH12', 'IRS2', 'SLC39A7', 'PTEN', 'LRP5', 'RXRA', 'PIK3R6', 'CXCL2', 'PPP2R2C', 'ALDH3A1', 'ANGPT4', 'CTNNA3', 'CTSL', 'DKK4', 'CLTCL1', 'UGT1A1', 'COL6A1', 'MMP7', 'PDK1', 'TFAP2A', 'CYP2B6', 'RXRG', 'SHC2', 'NFI', 'LEF1', 'AGER', 'SLC6A16', 'TLR4', 'MPL', 'PIK3C3', 'ATR', 'WNT10B', 'SLC2A12', 'SLC6A5', 'WNT11', 'PDLIM5', 'GNG8', 'PLCB1', 'TAB1', 'SRC', 'FGF10', 'PRKACG', 'ANGPT2', 'MYLK3', 'TNN', 'ATF4', 'SLC39A2', 'TTBK1', 'SLC6A14', 'PPP1CC', 'MAX', 'BUB1B-PAK6', 'GNA11', 'CCND3', 'CDH5', 'MGST1', 'ALDOB', 'G6PD', 'NCOA2', 'PFN1', 'RASGRF1', 'TSPAN8', 'DSCC1', 'TAOK2', 'MAP3K13', 'ITGA11', 'PPM1A', 'SGK1', 'CDKN1A', 'PPARA', 'NOS3', 'CDH8', 'MAPK13', 'HDAC7', 'CHUK', 'SDS', 'MCL1', 'FRAT2', 'BRAF', 'MAP3K11', 'CFL1', 'PLA2G4D', 'MGST2', 'IL9R', 'IL1B', 'FGFR1', 'ABL1', 'ARL5B', 'PTPRZ1', 'PDGFC', 'NTN1', 'IGF1', 'RHOC', 'DVL2', 'GADD45G', 'IL17B', 'YES1', 'MAP3K14', 'FOLR1', 'PLK2', 'DUSP16', 'GRB7', 'ERAS', 'ENAH', 'ITGA2B', 'TGFBR3', 'SLC5A9', 'SLCO2B1', 'SCD', 'SLC5A2', 'FOXO1', 'MAP3K6', 'SLC5A5', 'SSH1', 'DUSP2', 'DNAJC15', 'MAPKAP1', 'ORAI1', 'PLCB4', 'SLC2A6', 'GSR', 'ELAVL1', 'MAPK8', 'SOD3', 'CHRM1', 'TNS4', 'TLN2', 'FGF3', 'AKT1', 'CREB3', 'HEY2', 'PIK3AP1', 'SDSL', 'FKBP1A', 'MYL10', 'PTPRD', 'TESK2', 'STOM', 'FASN', 'IL6R', 'PRKAB2', 'CHRM3', 'CDH2', 'MMP12', 'VPS39', 'IL12B', 'GADD45A', 'BAIAP2', 'ITGAM', 'NBN', 'GNB3', 'TLN1', 'KEAP1', 'VASP', 'FLT4', 'CSNK1A1', 'LAMA2', 'IFNA6', 'CHAD', 'MAPK8IP1', 'VEGFB', 'HK2', 'VIL2', 'MP1', 'ARHGEF6', 'CLTB', 'MYL2', 'HSPA2', 'SLC39A5', 'PIK3C2B', 'PPP3R1', 'VWF', 'GADD45B', 'NEDD4', 'TP53', 'MST1', 'PAK7', 'ACTB', 'SMAD7', 'TGFA', 'ABCG8', 'SOX9', 'MSH2', 'IL2RB', 'SETD2', 'PTGES3', 'MYC', 'SLC7A11', 'FLT3', 'SHC3', 'CSNK2A2', 'RNF144B', 'LAT', 'HLA-DQB1', 'IFNA4', 'HES5', 'EPO', 'ETNK2', 'RBX1', 'FGF12', 'MAPK4', 'CSN2', 'CSNK1A1L', 'IL15RA', 'PLA2G2C', 'RASSF1', 'CDKN2B', 'RAD50', 'HES1', 'MYL12B', 'CCND2', 'LPAR5', 'WNT9B', 'ACTN4', 'MAPK8IP2', 'CYP8B1', 'ALDOC', 'KLF6', 'RALBP1', 'SLC19A2', 'PPP2R4', 'DKK2', 'GLUT1', 'ABL2', 'FZD4', 'PLA2G3', 'COL3A1', 'PLA2G6', 'SOD1', 'PKM2', 'RASGRP1', 'CDH20', 'AHCTF1', 'CYP1B1', 'TCF4', 'SLC2A1', 'ITGB8', 'JUN', 'COL4A6', 'IL20RB', 'GNB5', 'FGF9', 'FGF6', 'MAP3K8', 'MBL2', 'FGF22', 'LAMA5', 'AXIN1', 'TOP2A', 'MOS', 'MYB', 'CES1', 'RAB2A', 'RASAL1', 'GNB1', 'SHMT2', 'TNK2', 'CCL1', 'LDHB', 'ESR1', 'SMAD9', 'CSNK2A3', 'SPRY1', 'SLIT3', 'MFGE8', 'UGT1A6', 'IL6ST', 'PDPK1', 'SMAD4', 'BHLHE40', 'ARPC5', 'RAC1', 'LAMB1', 'ABCB4', 'GPR115', 'ITGB5', 'NRIP1', 'RAF1', 'PTPRT', 'CACNA1G', 'EFCAB3', 'EIF2S1', 'BST2', 'MIF', 'PFKFB4', 'KCNJ2', 'LGALS3', 'VPS18', 'CNTFR', 'GIT1', 'PLA2G12A', 'MDM2', 'ERRFI1', 'PLA2G2F', 'CACNG2', 'IL5RA', 'PSPH', 'SETD7', 'RASA4B', 'PRDX6', 'AKAP13', 'HSP90AA1', 'DNAJB1', 'VIM', 'CDH1', 'CACNA1F', 'SLC26A2', 'FOXO4', 'CPT1A', 'GNG13', 'MAP3K4', 'SLC2A9', 'GGT1', 'PTPN1', 'TACR1', 'PPP3CB', 'COL2A1', 'GYS2', 'DDIT3', 'DIAP1', 'NF1', 'BID', 'RAB5B', 'CDH22', 'DAPK3', 'JAK2', 'ZYX', 'THBD', 'GNGT2', 'NLK', 'NPC2', 'FAK', 'FZD1', 'PTPN11', 'CEBPZ', 'MYL1', 'MYLK4', 'POU5F1', 'PSMC5', 'PAK6', 'CD44', 'RPS6KA3', 'RAB8A', 'GPX3', 'FGD4', 'PRKCB', 'FGFBP1', 'PDGFD', 'ATM', 'PFKM', 'HLA-DMB', 'EXOC2', 'SLC10A1', 'LTB4R', 'PLA2G4C', 'CHTF18', 'AHRR', 'UGT1A9', 'RPS6', 'PRKAA1', 'HRAS', 'SERPINA1', 'CASP9', 'HBEGF', 'GCC1', 'ELF5', 'IRS1', 'RIN1', 'CSH1', 'CSK', 'HLA-DRB4', 'DAB2IP', 'LAMC3', 'SLC9A3', 'UGT1A7', 'RAPGEF2', 'SLC39A11', 'SHMT1', 'RASA3', 'HSPA1B', 'CREB3L4', 'STAT3', 'CACNB3', 'STAT1', 'COL6A5', 'LAMA1', 'E2F2', 'ITGB4', 'PDK4', 'B3GNT5', 'SLCO1B1', 'SLC27A1', 'RASA2', 'GRIN1', 'COL6A6', 'SMAD6', 'BAAT', 'GDNF', 'SHOC2', 'IL2RA', 'NGF', 'ETS1', 'ALOX5AP', 'FGF20', 'COL9A3', 'PLD1', 'MAPK3', 'ILK', 'CYP4A11', 'LIF', 'F2R', 'HIF1A', 'FGF4', 'ITGA8', 'IL2RG', 'CCL20', 'PTPRC', 'IFNB1', 'CAV3', 'HSPA1A', 'PTPN5', 'ABCC2', 'CSF3', 'GNAI3', 'CPEB4', 'SLC2A3', 'VEGFA', 'COX-2', 'RGL1', 'CLEC6A', 'MET', 'TSC22D3', 'MEF2D', 'FGF2', 'GSTM2', 'LIPE', 'IL1R2', 'WNT6', 'F2RL3', 'NPC1', 'PIP5K1A', 'MAPK9', 'HLA-DQB2', 'HSPB1', 'ROCK2', 'TNFRSF11A', 'MAP3K5', 'PITX2', 'ENO1', 'MAPKAPK5', 'JUND', 'E2F1', 'CDH7', 'CSNK2B', 'AQP4', 'DLL3', 'ARAF', 'NF2', 'PHLPP2', 'RDX', 'PPP3CC', 'FGF21', 'VAV3', 'CACNG6', 'HLA-DPB1', 'RB1', 'CRTC2', 'PXN', 'DUSP9', 'WNT16', 'CALM1', 'PCK2', 'IL3RA', 'MAP2K4', 'FGF8', 'ITGB6', 'CXCR7', 'ITGA10', 'PRKAG1', 'LPAR3', 'PEBP1', 'PTK2', 'TSG101', 'TNR', 'SLC6A4', 'CYP2C9', 'PGM2', 'GNAQ', 'ARHGEF1', 'VEGFD', 'ATF2', 'COL1A1', 'SLC6A19', 'SLC6A18', 'TRAF6', 'CREB3L2', 'MAP3K1', 'FGF16', 'SLC6A6', 'MDH1', 'PHLPP1', 'CALM2', 'IL4R', 'IQGAP1', 'TJP1', 'NTF3', 'PTGS2', 'TYMP', 'SDPR', 'LATS2', 'CSF1', 'NQO1', 'PKN2', 'FOXA1', 'NTRK1', 'PGM1', 'ANKRD1', 'E2F3', 'WNT5B', 'LDHD', 'MYLK2', 'PPARD', 'PPARGC1A', 'STMN1', 'STK11', 'DUSP8', 'HCK', 'HLA-DQA1', 'RAB11B', 'CD14', 'FZD3', 'MAP2K2', 'RAP1B', 'PARVA', 'KDM5C', 'MEF2C', 'FGF1', 'GSTM4', 'IFNA8', 'CACNA1C', 'SLC2A10', 'FANCI', 'TCF7', 'GCLM', 'CD19', 'TPCN2', 'DNAJC7', 'PIAS3', 'PPP2R3C', 'TAOK3', 'CALML4', 'ME1', 'MGAM', 'PIK3R1', 'MKNK1', 'DUSP7', 'MYL3', 'GSTA3', 'ARHGEF7', 'GPI', 'DOCK1', 'EFNA3', 'PRKCQ', 'ANXA2', 'PPP2R2A', 'COL6A2', 'IL12A', 'RASA1', 'HLA-DRB3', 'GSTM1', 'CACNA1S', 'PAK4', 'PLCG2', 'SRGN', 'INS', 'FZD6', 'DIAP3', 'VDR', 'SLC6A1', 'OTUD5', 'DEPTOR', 'GNG10', 'IL4', 'PLCB2', 'TXK', 'PAK2', 'TEAD3', 'TIAM1', 'ENO2', 'FGFR2', 'ZEB1', 'CYP4F12', 'MAP2K7', 'CCNE2', 'MAP2K5', 'FZD7', 'IL7', 'CBP', 'XYLT1', 'APC', 'KAT2B', 'CDH16', 'SLC6A2', 'PLA2G1B', 'NR1I3', 'TNFSF13', 'AHR', 'SLC2A7', 'RRAS', 'RTN4', 'IFNA16', 'PLAC8', 'RASAL3', 'IFNA2', 'GNAL', 'APOA1', 'ACACA', 'WNT1', 'STK3', 'PLA2G2A', 'FGF11', 'RASGRF2', 'CLEC4M', 'GSTA4', 'MTDH', 'VAV1', 'BLK', 'SMAD5', 'FKBP5', 'AGTR1', 'PKN3', 'SLC6A20', 'NCOA6', 'ANG1', 'NOTCH2', 'ITGAX', 'SLC5A11', 'CDH10', 'MAP3K2', 'CDC42EP3', 'LRRK2', 'CCR2', 'SERPINE1', 'CSNK2A1', 'SELP', 'FGR', 'PLA2G2D', 'NGFR', 'FKHR', 'NR1H3', 'MSH6', 'ITGB2', 'FLNC', 'ITGA7', 'PRKCA', 'PIK3R4', 'PGF', 'PLA2G2E', 'STAT5A', 'SRXN1', 'PPP1CA', 'CACNG8', 'PIK3CD', 'SLC39A10', 'DAPK1', 'PIK3R5', 'TCL1A', 'MAPK10', 'LAMA4', 'FOS', 'FZD8', 'UGT1A3', 'EP300', 'MERTK', 'CLTA', 'KSR2', 'COL11A1', 'RIPK4', 'ERK1', 'GAPDH', 'NR0B2', 'CACNG7', 'MYL12A', 'DDIT4', 'VPS4A', 'TOX2', 'THEM4', 'CAMK1', 'BAK1', 'CYP2A6', 'ACAA1', 'THBS4', 'SERPINB9', 'GHR', 'PPP2R5A', 'TNFSF11', 'PKLR', 'CDKN2C', 'GFAP', 'TCF7L1', 'LPAR1', 'BRCA1', 'CCNG1', 'LTB', 'PLXNA2', 'MAPK8IP3', 'PAK1', 'DVL1', 'DAXX', 'SRPX2', 'CRABP2', 'FTH1', 'CBR3', 'INHBB', 'PGK1', 'CDC42', 'BIRC5', 'FABP1', 'MYLK', 'KLF4', 'FGF13', 'ASGR1', 'FZD9', 'MLST8', 'BIRC3', 'PPP1R14C', 'RASAL2', 'NAV3', 'CYP3A4', 'BLVRB', 'SCIN', 'FCGR2A', 'CTF1', 'CDKN1B', 'AFDN', 'CDC37', 'CYP3A7', 'VPS41', 'ITGAD', 'ERBB1', 'MAP2K3', 'PIP5KL1', 'PDGFRB', 'ERBB4', 'AMPK', 'BAX', 'AGTR2', 'PIK3R3', 'FLT3LG', 'ANGPTL4', 'GRIN2A', 'CDH13', 'SULT2A1', 'HLA-C', 'CAB39L', 'SLC6A15', 'JAG2', 'PFKFB2', 'PTK6', 'RAB5A', 'WNT2B', 'RRAS2', 'SULT1A1', 'NCOA3', 'ETS2', 'SLIT1', 'TRAF2', 'KIT', 'GAS6', 'BMPR1A', 'SLC39A14', 'YY1', 'NCK1', 'CASP7', 'ARHGAP35', 'COL5A1', 'RASA4', 'GAP43', 'FLT1', 'MBP', 'WNT2', 'CSH2', 'TNK1', 'HMOX1', 'SLC6A7', 'KRAS', 'PRKAB1', 'AKT2', 'PRKAG2', 'RFC3', 'ITGB3', 'CAP2', 'PPP1CB', 'RGS2', 'IRF7', 'PRKRA', 'IFNGR2', 'TYK2', 'NR1I2', 'MEF2B', 'HLA-DOB', 'NCAN', 'UBE2T', 'CRKL', 'TAB2', 'LIMK1', 'LATS1', 'CALM3', 'HGF', 'IL7R', 'ENO3', 'MAFF', 'MYL5', 'NR1H4', 'CACNG1', 'ZFP36', 'SLC2A13', 'IL20RA', 'SPINK13', 'GNB2', 'EIF2AK2', 'HSP90AB1', 'LPAR2', 'DDB2', 'SRF', 'BRCA2', 'FASLG', 'RHEB', 'GNAS', 'FZD2', 'TNFAIP3', 'HLA-DRB5', 'NOS2', 'EIF4EBP1', 'ICAM1', 'CEBP', 'PGBD5', 'ARRB1', 'CDH24', 'PKM', 'RARB', 'ICAM3', 'MAP3K7', 'SSH2', 'BMPR2', 'SELENOP', 'CREBBP', 'EGF', 'ANXA1', 'MAP4K1', 'NCOA1', 'GJB2', 'LEP', 'CDC25B', 'EIF4E1B', 'PRKAR1A', 'P23', 'DCAF1', 'TRIM28', 'GAST', 'STYK1', 'CHGA', 'WNT4', 'BMPR1B', 'ZAK', 'SLC5A10', 'GRB2', 'RHOB', 'IFNA5', 'KDR', 'COL4A3', 'IFNA17', 'C11orf13', 'LILRB3', 'IL22RA1', 'COL5A3', 'ABCB11', 'IGFBP1', 'SCNN1A', 'PRKCI', 'GSTP1', 'PRRG4', 'CHTF8', 'KPNA1', 'SLC2A8', 'ACTN1', 'SLC2A11', 'CHST11', 'ERK', 'RELB', 'CES4A', 'CD300A', 'GSTA1', 'CDH9', 'XIAP', 'SPRY2', 'RPS6KB1', 'PRL', 'EPHA3', 'TXNRD1', 'ATF6B', 'PRKAG3', 'DLL1', 'G6PC', 'ACAN', 'PIK3IP1', 'MRE11A', 'CREB1', 'HLA-E', 'PGR', 'RHOD', 'KLK8', 'YAP1', 'HAVCR2', 'NR4A1', 'CYFIP2', 'IL2', 'CTGF', 'TXNRD3', 'DAPK2', 'HLA-B', 'TIMD4', 'SYK', 'BTG2', 'RAB10', 'PIP5K2B', 'SSPN', 'IL1R1', 'CXCL1', 'IBSP', 'GH2', 'PDGFRA', 'INSR', 'CD47', 'KTN1', 'GSTM3', 'PIK3CB', 'CD63', 'GH1', 'THBS2', 'PBRM1', 'RPS6KA4', 'TNC', 'RELA', 'COL4A4', 'PRKD3', 'SPP1', 'PPP2R5B', 'VHL', 'IL12RB2', 'MAFG', 'LIN28B', 'CAPN2', 'ITGA2', 'VIL1', 'HLA-DPA1', 'CREB3L3', 'CDK4', 'ULK1', 'STRADA', 'PAK3', 'YY1AP1', 'EIF4E', 'SCP2', 'IRF3', 'ARG1', 'PPP1R12A', 'EFNA4', 'PIP5K1C', 'CDKN2A', 'BCAR1', 'CACNA1E', 'SERTAD2', 'E2F5', 'TNF', 'GAB1', 'PTGR1', 'CHRM4', 'BDKRB1', 'EIF4B', 'SREBF1', 'CACNA2D2', 'IGF2', 'CES5A', 'MSK1', 'SLC27A5', 'C5', 'CACNA1H', 'AKT3', 'PIP5K2A', 'GNG5', 'FGFR4', 'LDHC', 'NFE2L2', 'GNA12', 'RHOA', 'FGF14', 'AMOT', 'PMP2', 'FLNA', 'SLC39A12', 'MTOR', 'PHGDH', 'PDGFB', 'ERBB3', 'PPP2R3A', 'PRKACA', 'FGFR3', 'PNCK', 'GNB4', 'CLEC10A', 'IL8', 'EPAS1', 'MAP4K2', 'ALAS1', 'BMP2', 'LPAR4', 'IFNA14', 'CALML3', 'TAZ', 'GAB2', 'KSR1', 'SHH', 'TGFB1', 'BAP1', 'ATP2C2', 'PPP2R5C', 'RAB14', 'TGFBR1', 'ABCB1', 'IP6K3', 'PLA2G4B', 'CACNB1', 'PDE4B', 'CCND1', 'CD36', 'DUSP6', 'EIF4G', 'FRAT1', 'CEBPA', 'RAB5C', 'ANG4', 'TAOK1', 'RELN', 'FGF19', 'RS1', 'GRIN2B', 'FGF5', 'SLC5A1', 'WNT7A', 'FGF18', 'ROCK1', 'CDK1', 'EGFR', 'PFKFB3', 'F2RL2', 'ESR2', 'ARHGAP5', 'SLC6A11', 'GRLF1', 'GRB14', 'PAK5', 'OSMR', 'MRAS', 'BIRC2', 'CACYBP', 'CYP3A5', 'WNT10A', 'SORBS2', 'MMP1', 'PRKCE', 'SEC14L1', 'CDH6', 'PPP3CA', 'USF2', 'DDX58', 'GNG12', 'FYN', 'BDNF', 'DUSP4', 'IKBKB', 'TLR2', 'PRKACB', 'PTPRR', 'GNG4', 'ATAD2', 'PTPRA', 'AIF1', 'LIFR', 'EPHA2', 'VEGF', 'RPTOR', 'PPP2R5D', 'SLC5A6', 'ACLY', 'CCL2', 'UBE2C', 'HTR7', 'PDGFA', 'ACACB', 'IFNA21', 'SLC2A14', 'HK1', 'PPP5D1', 'TEAD1', 'CDKN1C', 'PFKP', 'PLA2G5', 'MMP9', 'SLC39A1', 'SLC2A4', 'CTNNB1', 'VEGFR2', 'SNURF', 'RBL2', 'CLTC', 'MAP4K4', 'AXL', 'CACNA2D1', 'TFF2', 'OSM', 'SAV1', 'SERPINB2', 'APOA5', 'CDH19', 'HK3', 'NRAS', 'ARF6', 'ANG2', 'TBC1D1', 'MAP2K1', 'IKBKE', 'PKN1', 'CAV2', 'DUSP10', 'ZIC2', 'LAMC2', 'TBK1', 'CACNA1D', 'ALDOA', 'POLK', 'CYP1A1', 'FGF7', 'GSTT2', 'VGLL4', 'GSTA5', 'CETN3', 'SLC6A13', 'ABI2', 'SHC1', 'EML4', 'LAMA3', 'SHC4', 'STAT5B', 'HSP90B1', 'RAD17', 'MAPKAPK2', 'PPARG', 'SMAD1', 'GSTT1', 'ABCC3', 'LMNB2', 'ACVR1', 'MAPK12', 'SSH3', 'CEP290', 'TPI1', 'CES3', 'MYOF', 'SLC39A3', 'EIF4E2']

# GBM

In [4]:
#load GBM data 
#cptac.download(dataset='gbm')
brain = cptac.Gbm()

                                        



In [5]:
df1 = brain.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df2.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')

df3.filter(like='GAB1_phosphoproteomics_Y', axis=1)



Unnamed: 0_level_0,GAB1_phosphoproteomics_Y259,GAB1_phosphoproteomics_Y307,GAB1_phosphoproteomics_Y373,GAB1_phosphoproteomics_Y373T390,GAB1_phosphoproteomics_Y406,GAB1_phosphoproteomics_Y657,GAB1_phosphoproteomics_Y689
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C3L-00104,,-0.035054,,,-0.048097,-0.010128,-0.203653
C3L-00365,,0.389611,,,0.754949,1.826921,1.857998
C3L-00674,0.566358,,,,-0.812507,0.619202,0.214643
C3L-00677,,0.050395,,0.296117,,,0.349729
C3L-01040,,-0.254910,,,0.158536,0.532003,0.363687
...,...,...,...,...,...,...,...
C3N-03183,,-0.135757,,,-0.188414,1.275994,0.285433
C3N-03184,,-0.045292,,,0.152106,1.601829,0.939990
C3N-03186,,-0.037752,-0.457546,,,,-1.118827
C3N-03188,,0.086859,,,-0.260785,0.608676,0.695039


In [6]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1197",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='GBM'
df.to_csv("GBM_EGFR_Y1197_phospho.csv")

  reject = pvals <= alphacBonf
  pvals_corrected[pvals_corrected>1] = 1


In [7]:

df

Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
14365,EGFR_phosphoproteomics_Y1197,1.000000,0.000000e+00,GBM
14364,EGFR_phosphoproteomics_Y1172,0.899603,8.871131e-34,GBM
14348,EGFR_phosphoproteomics_S1166Y1172,0.881333,1.002277e-30,GBM
14362,EGFR_phosphoproteomics_Y1092,0.908060,3.304647e-23,GBM
14347,EGFR_phosphoproteomics_S1166,0.739790,5.458615e-17,GBM
...,...,...,...,...
5451,BAZ1B_phosphoproteomics_S708,-0.000009,9.999604e-01,GBM
33328,PALMD_phosphoproteomics_S486,0.000008,9.999697e-01,GBM
815,ADD3_phosphoproteomics_S677S679S681,-0.000002,9.999883e-01,GBM
22015,ITPRID2_phosphoproteomics_T867,-0.000002,9.999904e-01,GBM


In [8]:
#cptac.download(dataset='ccrcc')
kidney = cptac.Ccrcc()


                                          

In [9]:
df1 = kidney.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df2.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')

phospho_sites.remove('Sample_Status_')

df3.filter(like='EGFR_phosphoproteomics_Y', axis=1)



Unnamed: 0_level_0,EGFR_phosphoproteomics_Y1016,EGFR_phosphoproteomics_Y1039,EGFR_phosphoproteomics_Y1119,EGFR_phosphoproteomics_Y1144,EGFR_phosphoproteomics_Y963
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C3L-00004,,0.285696,,0.999000,-0.013055
C3L-00010,,0.601337,0.412977,0.700833,
C3L-00011,,-0.239413,0.271045,-0.430652,
C3L-00026,,0.444422,1.011546,0.465533,0.460270
C3L-00079,,,-0.000500,,0.040108
...,...,...,...,...,...
C3N-01646,,-0.160831,,-0.404793,
C3N-01648,,-0.413072,,-0.650702,
C3N-01649,,0.353399,-0.059095,0.369224,-0.250412
C3N-01651,,,0.682451,,


In [10]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1144",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='Kidney'
df.to_csv("Kidney_EGFR_Y1144_phospho.csv")

# Ovarian 

In [11]:
#cptac.download(dataset='ovarian')
Ovar = cptac.Ovarian()

                                            

In [12]:
df1 = Ovar.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df3.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')


phospho_sites.remove('Sample_Status_')
df3



Unnamed: 0_level_0,AAAS_phosphoproteomics_S495,AAGAB_phosphoproteomics_S311,AAK1_phosphoproteomics_S18,AAK1_phosphoproteomics_S20,AAK1_phosphoproteomics_S21,AAK1_phosphoproteomics_S624,AAK1_phosphoproteomics_S637,AAK1_phosphoproteomics_S642,AAK1_phosphoproteomics_S668S678,AAK1_phosphoproteomics_S668T674,...,ZZZ3_phosphoproteomics_S135,ZZZ3_phosphoproteomics_S314,ZZZ3_phosphoproteomics_S391,ZZZ3_phosphoproteomics_S89,ZZZ3_phosphoproteomics_S90,ZZZ3_phosphoproteomics_S91,EGFR_Mutation_,EGFR_Location_,EGFR_Mutation_Status_,Sample_Status_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01OV007,,,,,,,,-1.829,,,...,,-2.533,,,,-1.320,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
01OV017,,,,,,,,-2.095,,,...,,-3.276,,,,-0.264,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
01OV018,,,,,,,,-1.748,,,...,,,-0.847,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
01OV023,-1.137,,,,,,,,-2.185,,...,,,-0.455,,,-1.129,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
01OV026,-0.987,,,,-2.269,-3.363,,,,,...,,,,,,-1.301,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26OV002,,,,,,,,,,-2.921,...,,,,,,-1.232,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
26OV008,,,,,,,,-1.796,,,...,,,,,-2.134,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
26OV009,,,,,,,,-2.795,,,...,,,,,-2.503,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
26OV011,-0.742,,,,-1.704,-2.075,,,,,...,,,,,,0.713,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [13]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1172",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='Ovar'
df.to_csv("Ovar_EGFR_Y1172_phospho.csv")
df
#df3.filter(like='EGFR_phosphoproteomics_Y', axis=1)


Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
854,EGFR_phosphoproteomics_Y1172,1.000000,0.000000e+00,Ovar
1689,MARK2_phosphoproteomics_S40,0.805187,4.084338e-07,Ovar
853,EGFR_phosphoproteomics_S1166,0.789336,5.966751e-07,Ovar
253,ARHGEF5_phosphoproteomics_S606,0.722735,9.442209e-07,Ovar
1379,KDM3B_phosphoproteomics_S779,0.721595,1.000808e-06,Ovar
...,...,...,...,...
2466,RGS14_phosphoproteomics_S203,-0.000776,9.972646e-01,Ovar
3141,TNS1_phosphoproteomics_S1327,-0.000529,9.981828e-01,Ovar
3458,ZNF687_phosphoproteomics_S253,0.000242,9.989014e-01,Ovar
2977,TANC2_phosphoproteomics_S1740,-0.000299,9.989753e-01,Ovar


# Colon

In [14]:
#cptac.download(dataset='colon')
colon = cptac.Colon()

                                          

In [15]:
df1 = colon.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
#df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df3.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')


phospho_sites.remove('Sample_Status_')




In [16]:

df3.filter(like='EGFR_phosphoproteomics_Y', axis=1)

Unnamed: 0_level_0,EGFR_phosphoproteomics_Y1092,EGFR_phosphoproteomics_Y1172,EGFR_phosphoproteomics_Y1197
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01CO001,,,
01CO005,,,-1.002
01CO006,0.178,-0.421,
01CO008,-0.784,,
01CO013,,,
...,...,...,...
21CO007,-0.396,,
22CO004,,,
22CO006,-0.400,,
24CO005,,,


In [17]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1092",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='Colon'
df.to_csv("colon_EGFR_Y1092_phospho.csv")
df


Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
2396,EGFR_phosphoproteomics_Y1092,1.000000,0.000000,Colon
9747,WNK2_phosphoproteomics_S1889,0.737281,0.000017,Colon
4514,MAP1B_phosphoproteomics_S1400,0.627090,0.000056,Colon
5180,NCAM1_phosphoproteomics_S784,0.623998,0.000062,Colon
4874,MID1_phosphoproteomics_S98,0.697698,0.000106,Colon
...,...,...,...,...
9911,ZC3HC1_phosphoproteomics_S354,0.000099,0.999563,Colon
797,ARVCF_phosphoproteomics_S348,-0.000081,0.999685,Colon
8619,SYNPO_phosphoproteomics_S882,-0.000074,0.999747,Colon
8129,SRRM1_phosphoproteomics_S463,0.000024,0.999890,Colon


# Brca 

In [18]:
#cptac.download(dataset='brca')
brca = cptac.Brca()

                                         

In [19]:
df1 = brca.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df3.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')


phospho_sites.remove('Sample_Status_')

df3



Unnamed: 0_level_0,A2M_phosphoproteomics_S710,AAAS_phosphoproteomics_S495,AAAS_phosphoproteomics_S541,AAED1_phosphoproteomics_S12,AAGAB_phosphoproteomics_S310S311,AAGAB_phosphoproteomics_S311,AAK1_phosphoproteomics_S14,AAK1_phosphoproteomics_S18,AAK1_phosphoproteomics_S21,AAK1_phosphoproteomics_S618T620S623,...,ZZZ3_phosphoproteomics_S391,ZZZ3_phosphoproteomics_S397,ZZZ3_phosphoproteomics_S397T428N429,ZZZ3_phosphoproteomics_S606,ZZZ3_phosphoproteomics_S82,ZZZ3_phosphoproteomics_S89,EGFR_Mutation_,EGFR_Location_,EGFR_Mutation_Status_,Sample_Status_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CPT000814,,1.9431,,,0.0127,-0.4495,,-1.1852,-0.8333,0.0863,...,-8.8556,,-0.8493,-0.1744,-0.0273,-6.8916,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
CPT001846,,0.2274,,0.5472,0.6895,-0.7680,,0.6895,0.3903,,...,-4.0222,,1.1806,-1.2700,-0.8983,-3.6228,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
X01BR001,,-2.2853,-0.8967,4.1225,0.4842,-1.2458,-0.0310,0.8805,0.7448,1.0649,...,-1.6299,-0.2017,-0.3953,-2.1328,-0.8815,-2.1191,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
X01BR008,,1.3714,,-0.0636,-0.6224,-1.2732,-1.1351,-0.8652,0.0031,0.2173,...,1.3333,0.7840,0.1078,0.4824,,-0.9748,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
X01BR009,,0.2682,,,-0.6207,-2.3561,-0.3078,0.3785,0.6990,-0.1082,...,0.5267,-0.6661,0.5539,-0.3728,,-0.8354,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
X21BR001,,0.9424,,0.4838,0.0477,-0.3365,-0.4697,-0.4213,0.1117,-0.5217,...,0.9078,0.7330,1.5429,0.6015,,-2.2315,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
X21BR002,,0.2955,,-0.0053,0.1169,0.5654,-0.7218,-0.2368,0.2974,-0.0491,...,-0.4611,-0.3007,0.4122,-0.6325,,-0.4046,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
X21BR010,-0.7266,-0.1201,-0.1477,1.8818,0.8338,0.2280,-0.0877,-0.4698,-0.2266,-0.0214,...,-2.2380,0.9095,0.3844,0.1160,-3.4806,0.0170,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
X22BR005,-0.7618,-0.3378,3.2472,,1.0000,1.8410,,-0.6455,0.6330,0.2228,...,1.3874,,0.0792,1.0615,,0.3436,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [20]:

df3.filter(like='EGFR_phosphoproteomics', axis=1)
#no EGFR phospho Y data 

Unnamed: 0_level_0,EGFR_phosphoproteomics_S1039,EGFR_phosphoproteomics_S1064,EGFR_phosphoproteomics_S1166,EGFR_phosphoproteomics_S991,EGFR_phosphoproteomics_T1041S1042,EGFR_phosphoproteomics_T693
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CPT000814,-0.2928,0.1774,-1.9944,0.1950,0.1806,-0.3343
CPT001846,1.9046,,5.2070,0.4808,,3.1401
X01BR001,,-0.3739,3.2933,,,1.3256
X01BR008,-0.5764,,-0.8240,,-0.1732,-0.5732
X01BR009,0.4722,,1.6650,,-0.6993,0.0248
...,...,...,...,...,...,...
X21BR001,-0.2240,,-2.5568,,-0.3555,-1.7590
X21BR002,0.1734,,-0.3882,,-1.0938,-0.9023
X21BR010,-0.0334,,0.5478,-1.3929,-1.0773,-0.9196
X22BR005,,,0.0336,,,-1.5959


# LUAD

In [21]:
#cptac.download(dataset='luad')
luad = cptac.Luad()

                                         



In [22]:
df1 = luad.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df3.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')


phospho_sites.remove('Sample_Status_')

df3



Unnamed: 0_level_0,AAAS_phosphoproteomics_S495,AAAS_phosphoproteomics_S541,AAED1_phosphoproteomics_S12,AAGAB_phosphoproteomics_S310S311,AAGAB_phosphoproteomics_S311,AAK1_phosphoproteomics_S20,AAK1_phosphoproteomics_S618T620S624,AAK1_phosphoproteomics_S624,AAK1_phosphoproteomics_S678,AAK1_phosphoproteomics_S682,...,ZZEF1_phosphoproteomics_T66,ZZZ3_phosphoproteomics_S113,ZZZ3_phosphoproteomics_S391,ZZZ3_phosphoproteomics_S397N429,ZZZ3_phosphoproteomics_S606,ZZZ3_phosphoproteomics_S89,EGFR_Mutation_,EGFR_Location_,EGFR_Mutation_Status_,Sample_Status_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00001,-0.7127,0.2683,0.9238,0.1711,,1.0165,,-0.2064,-0.0278,,...,,-1.0721,-0.3262,-0.1611,,0.6164,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00009,0.6344,,1.3945,-2.3271,-0.4007,1.5299,,-0.1799,1.5237,0.1158,...,0.3657,-0.1195,0.9010,0.2720,0.7177,-0.9776,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00080,1.0390,,-0.2910,,0.2226,0.5386,,-1.0926,,-1.4284,...,-0.6087,-0.3008,0.7575,,0.1502,0.1946,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00083,-0.4906,,0.8506,,0.3430,0.2118,,2.5134,1.7586,-1.1010,...,,-0.2718,0.7937,0.3386,0.4415,-1.7661,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00093,1.0305,-0.5968,0.3249,1.0423,-0.2167,1.1706,1.0851,,0.4152,0.6599,...,,0.0422,0.4389,,0.1586,0.0303,"[Missense_Mutation, Missense_Mutation]","[p.L858R, p.A871G]",Multiple_mutation,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-02729,0.0377,,-0.4162,,-1.3180,1.2554,-0.2403,-2.7458,-1.0661,-0.9181,...,,-0.2763,-1.3520,,,-1.1201,[Missense_Mutation],[p.L858R],Single_mutation,Tumor
X11LU013,0.4598,,-1.3742,-0.4182,1.1134,-0.3382,,,1.5914,0.5378,...,,-1.2122,-0.3557,,-0.5020,-2.0238,[In_Frame_Del],[p.E746_S752delinsA],Single_mutation,Tumor
X11LU016,0.8232,0.5637,0.3794,,1.0695,-0.5345,-0.8673,,1.1221,-0.4329,...,0.1143,-0.1998,0.1237,,-0.5815,-1.0779,"[Missense_Mutation, Missense_Mutation]","[p.G719C, p.S768I]",Multiple_mutation,Tumor
X11LU022,0.7228,-0.4196,0.0563,0.8353,1.3252,1.1244,0.7530,,-0.7750,-0.1004,...,,1.1886,-1.8792,,-0.5501,-0.5140,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [23]:

df3.filter(like='EGFR_phosphoproteomics_Y', axis=1)

Unnamed: 0_level_0,EGFR_phosphoproteomics_Y1092,EGFR_phosphoproteomics_Y1172,EGFR_phosphoproteomics_Y1197
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00001,,5.9688,8.5140
C3L-00009,-2.0647,-1.0692,-1.3295
C3L-00080,,,-2.1724
C3L-00083,,-2.7310,-2.2124
C3L-00093,,-0.7893,0.7454
...,...,...,...
C3N-02729,,2.8111,1.8433
X11LU013,,1.0763,1.3651
X11LU016,,-0.3352,-1.1907
X11LU022,,2.8952,3.0779


In [24]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1197",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='Luad'
df.to_csv("Luad_EGFR_Y1197_phospho.csv")
df


Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
10047,EGFR_phosphoproteomics_Y1197,1.000000e+00,0.000000e+00,Luad
10046,EGFR_phosphoproteomics_Y1172,8.817770e-01,1.885106e-31,Luad
10038,EGFR_phosphoproteomics_S1064,7.289959e-01,2.552188e-17,Luad
10040,EGFR_phosphoproteomics_S1166,6.916211e-01,4.397719e-15,Luad
10044,EGFR_phosphoproteomics_T693,6.863959e-01,8.495371e-15,Luad
...,...,...,...,...
4378,BRMS1L_phosphoproteomics_S174,4.159763e-05,9.997572e-01,Luad
12232,FLNA_phosphoproteomics_S1081,4.075534e-05,9.997577e-01,Luad
16132,KANSL1_phosphoproteomics_S991,3.754599e-05,9.997914e-01,Luad
1647,ANK3_phosphoproteomics_S1824,1.305538e-05,9.998987e-01,Luad


# HNSCC

In [33]:
#cptac.download(dataset='hnscc')
Hnscc = cptac.Hnscc()

                                          

In [26]:
df1 = Hnscc.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df3.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')


phospho_sites.remove('Sample_Status_')

df3



Unnamed: 0_level_0,A2M_phosphoproteomics_S710,A2M_phosphoproteomics_S928,A2ML1_phosphoproteomics_S658,A2ML1_phosphoproteomics_S698,AAAS_phosphoproteomics_S462,AAAS_phosphoproteomics_S478,AAAS_phosphoproteomics_S508,AAAS_phosphoproteomics_T488S508,AAED1_phosphoproteomics_S12,AAGAB_phosphoproteomics_S106,...,ZZZ3_phosphoproteomics_S314,ZZZ3_phosphoproteomics_S381T388,ZZZ3_phosphoproteomics_S391,ZZZ3_phosphoproteomics_S396,ZZZ3_phosphoproteomics_S91,ZZZ3_phosphoproteomics_T301,EGFR_Mutation_,EGFR_Location_,EGFR_Mutation_Status_,Sample_Status_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00977,,,,,23.810752,16.462169,,,19.304315,,...,,,20.726809,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00987,,,,,24.303142,,,,19.581772,,...,,10.553611,,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00994,,,,,23.834852,16.514470,,,20.286625,,...,,,20.389204,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00995,14.950532,,,,24.188308,,,,19.822521,18.761755,...,,,20.849815,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00997,,,,,24.178120,,17.683328,,19.792555,,...,,,20.776460,14.551561,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-04277,,12.966113,,,23.969325,16.161552,18.924554,,19.760969,19.797038,...,,,21.123244,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-04278,,,13.672171,,24.486603,16.941698,17.945032,,18.729742,19.684109,...,,,21.169331,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-04279,,,,13.404450,24.128939,16.820011,,,20.094552,19.822749,...,,,,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-04280,,,,16.816393,23.876969,,,,19.311999,19.554730,...,,,,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [27]:

df3.filter(like='EGFR_phosphoproteomics_Y', axis=1)

Unnamed: 0_level_0,EGFR_phosphoproteomics_Y1092,EGFR_phosphoproteomics_Y1110,EGFR_phosphoproteomics_Y1172,EGFR_phosphoproteomics_Y1197,EGFR_phosphoproteomics_Y727
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
C3L-00977,17.473013,,20.292057,23.960771,
C3L-00987,,,18.218061,21.464089,
C3L-00994,16.886517,,19.119405,22.517973,
C3L-00995,,,,22.753777,
C3L-00997,,16.956105,18.056164,21.358591,
...,...,...,...,...,...
C3N-04277,,18.134882,,23.335068,
C3N-04278,,,,21.584917,
C3N-04279,,18.366149,19.220107,22.176533,
C3N-04280,,19.317100,19.149914,21.303716,


In [28]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1197",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='Hnscc'
df.to_csv("Hnscc_EGFR_Y1197_phospho.csv")

df

Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
7654,EGFR_phosphoproteomics_Y1197,1.000000,0.000000e+00,Hnscc
7653,EGFR_phosphoproteomics_Y1172,0.877079,1.910986e-20,Hnscc
7645,EGFR_phosphoproteomics_S1166Y1172,0.902376,5.970836e-17,Hnscc
7652,EGFR_phosphoproteomics_Y1110,0.747328,2.047774e-14,Hnscc
7651,EGFR_phosphoproteomics_Y1092,0.916211,7.916959e-12,Hnscc
...,...,...,...,...
17957,NOP14_phosphoproteomics_S349,0.000033,9.998338e-01,Hnscc
16449,MYH14_phosphoproteomics_S1977S1991,0.000024,9.998712e-01,Hnscc
31225,ZC3H15_phosphoproteomics_S231,-0.000015,9.998833e-01,Hnscc
10504,GRASP_phosphoproteomics_S94,0.000004,9.999706e-01,Hnscc


# Lscc

In [29]:
Lscc = cptac.Lscc()

                                         

In [30]:
df1 = Lscc.join_omics_to_mutations(omics_df_name="phosphoproteomics", mutations_genes="EGFR")
df1.columns = df1.columns.droplevel(2)
df1.columns = df1.columns.droplevel(2)
df1.columns =df1.columns.map('_'.join)


df2 = df1.loc[df1['Sample_Status_'] == "Tumor"]

df3 = df2.loc[:,~df1.columns.duplicated()]
phospho_sites = df3.columns.values.tolist()

phospho_sites.remove('EGFR_Mutation_')

phospho_sites.remove('EGFR_Location_')

phospho_sites.remove('EGFR_Mutation_Status_')


phospho_sites.remove('Sample_Status_')

df3



Unnamed: 0_level_0,A4GALT_phosphoproteomics_T324,AAAS_phosphoproteomics_S495,AAED1_phosphoproteomics_S12,AAGAB_phosphoproteomics_S310S311,AAGAB_phosphoproteomics_S311,AAK1_phosphoproteomics_S18,AAK1_phosphoproteomics_S21,AAK1_phosphoproteomics_S618T620S624,AAK1_phosphoproteomics_S624,AAK1_phosphoproteomics_S637,...,ZZZ3_phosphoproteomics_S397N422,ZZZ3_phosphoproteomics_S397T428N429,ZZZ3_phosphoproteomics_S606,ZZZ3_phosphoproteomics_S82,ZZZ3_phosphoproteomics_S89,ZZZ3_phosphoproteomics_T428N429,EGFR_Mutation_,EGFR_Location_,EGFR_Mutation_Status_,Sample_Status_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00081,,1.4658,-0.3928,,,0.0176,-0.0391,0.5031,0.9853,-0.7181,...,,-0.5079,0.6333,,-1.9627,0.0360,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00415,,-1.3706,-1.4682,0.8167,0.4114,1.3812,1.5283,1.5884,-2.4650,0.5195,...,0.1847,-0.1381,-0.5674,,-0.5089,0.0481,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00445,,0.5106,,0.3627,1.9174,0.1257,-0.2341,-0.5839,,-0.8685,...,,,-0.0649,0.2649,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00568,0.9270,0.9634,,-0.6045,1.5058,0.8369,0.3980,-0.6754,1.9850,-1.3999,...,0.7660,,0.5590,1.1532,-0.9744,1.5806,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00603,,0.9622,-0.3333,0.2873,0.5218,0.4575,0.5404,-0.0373,-0.9639,0.9122,...,,-0.0831,1.0581,2.5538,0.1829,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-03886,,,-0.5543,0.3347,-0.2520,-0.3899,-0.4523,-0.1836,-1.9700,-0.4595,...,0.6898,,-0.1740,,-1.1266,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-04124,,0.0878,-0.2821,0.9885,1.1933,-0.4032,0.6692,1.4047,-0.7181,1.4531,...,,,-0.2623,,-1.3501,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-04127,3.6205,-0.9368,0.1505,0.0656,-0.1708,0.7750,0.3364,1.0741,1.1670,0.7325,...,,,0.3021,-0.7024,-0.6155,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-04155,-1.6357,1.4479,,0.7692,1.0117,0.3880,-0.0797,0.2368,-0.1207,0.8967,...,0.1597,,0.3660,1.3503,-0.6546,-0.1537,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [31]:

df3.filter(like='EGFR_phosphoproteomics_Y', axis=1)



Unnamed: 0_level_0,EGFR_phosphoproteomics_Y1172,EGFR_phosphoproteomics_Y1197
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00081,1.0487,-0.7231
C3L-00415,-2.1963,-1.3811
C3L-00445,-0.2028,-1.0228
C3L-00568,0.2427,-1.4842
C3L-00603,-0.6293,-1.3100
...,...,...
C3N-03886,-2.9095,-1.6461
C3N-04124,-0.6278,-1.5549
C3N-04127,-2.0908,
C3N-04155,-0.3742,-1.9664


In [32]:
df =(wrap_lin_regression(df3,"EGFR_phosphoproteomics_Y1197",comparison_columns = phospho_sites, return_all = True))
df['Cancer Type']='Lscc'
df.to_csv("Lscc_EGFR_Y1197_phospho.csv")

df


Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
10565,EGFR_phosphoproteomics_Y1197,1.000000,0.000000e+00,Lscc
10564,EGFR_phosphoproteomics_Y1172,0.912756,7.644531e-33,Lscc
10555,EGFR_phosphoproteomics_S1064,0.722468,2.977440e-16,Lscc
10554,EGFR_phosphoproteomics_S1042S1045N1053,0.732426,3.582486e-15,Lscc
10553,EGFR_phosphoproteomics_S1039,0.688452,4.739004e-15,Lscc
...,...,...,...,...
16400,ITPR2_phosphoproteomics_T1856,0.000020,9.998640e-01,Lscc
7641,CLIP2_phosphoproteomics_S202S204S207,-0.000008,9.999443e-01,Lscc
25408,PDLIM4_phosphoproteomics_S112,0.000006,9.999568e-01,Lscc
40839,WDR44_phosphoproteomics_S262T271,-0.000003,9.999820e-01,Lscc
