# Pearson Tables EGFR proteomics vs all interacting proteins

This notebook creates dfs for each cancer type that records the pearson correlation stats for EGFR proteomics vs all interacting proteins. (return all)

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
import statsmodels.stats.multitest

sys.path.append('C:/Users/Lindsey/Documents/GitHub/WhenMutationsDontMatter/')
import plot_utils as p


import cptac
import cptac.utils as u

ModuleNotFoundError: No module named 'plot_utils'

In [16]:
print(sys.path)

['/Users/Lindsey/Documents/GitHub/WhenMutationsDontMatter/Pan_Cancer_EGFR/Step3_Trans_effect/Pearson_dfs_by_cancer', '/Users/Lindsey/anaconda3/lib/python37.zip', '/Users/Lindsey/anaconda3/lib/python3.7', '/Users/Lindsey/anaconda3/lib/python3.7/lib-dynload', '', '/Users/Lindsey/anaconda3/lib/python3.7/site-packages', '/Users/Lindsey/anaconda3/lib/python3.7/site-packages/aeosa', '/Users/Lindsey/anaconda3/lib/python3.7/site-packages/IPython/extensions', '/Users/Lindsey/.ipython', 'C://Users//Lindsey//Documents//GitHub//WhenMutationsDontMatter//']


In [2]:

'''
@Param df: Dataframe. Contains numeric values (such as proteomics) for linear regression
@Param label_column: String. Name of column that will be your x axis and will be compared to all values in df unless otherwise specified. 
@Param alpha: significant level
@Param comparison_columns: columns that will be looped through and used as y axis for linear regression. 
All other columns beside label column unless specified here. 
@Param correction_method: String. Specifies method of adjustment for multiple testing. See -
https://www.statsmodels.org/stable/generated/statsmodels.stats.multitest.multipletests.html
    - for documentation and available methods.

This function will return a data frame will all significant linear regressions. The data frame includes the comparison, slope, R-squared, and P-value. 
'''
def wrap_lin_regression(df,label_column, alpha=.05,comparison_columns=None,correction_method='bonferroni',return_all = True):
    pd.set_option("display.precision", 3)

    df = df.dropna(axis=1, how="all")
    
    '''If no comparison columns specified, use all columns except the specified labed column'''
    if not comparison_columns:
        comparison_columns = list(df.columns)
        comparison_columns.remove(label_column)
    '''Store comparisons,p-values, r_squared, and slope in their own array'''
    comparisons = []
    pvals = []
    correlation=[]
    
    
    '''Format results in a pandas dataframe'''
    newdf = pd.DataFrame(columns=['Comparison','Correlation','P_value'])
    for inter_gene in comparison_columns:
        #create subset df with interacting gene/ gene (otherwise drop NaN drops everything)
        df_subset = df[[label_column,inter_gene]]
        #do a linear regression to see if it's a meaningful association
        #dropna will remove rows with nan
        df_subset = df_subset.dropna(axis=0, how="any")
        count_row = df_subset.shape[0]
        if count_row > 20:
            x1 = df_subset[[label_column]].values
            y1 = df_subset[[inter_gene]].values
            x1 = x1[:,0]
            y1 = y1[:,0]

        #slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x1,y1)
        corr, pval = scipy.stats.pearsonr(x1,y1)
        
        comparisons.append(inter_gene)
        pvals.append(pval)
        correlation.append(corr)
        #slope_val.append(slope)
        
    '''Correct for multiple testing to determine if each comparison meets the new cutoff'''
    results = statsmodels.stats.multitest.multipletests(pvals=pvals, alpha=alpha, method=correction_method)
    reject = results[0]
        
    if return_all:
        for i in range(0,len(comparisons)):
            newdf = newdf.append({'Comparison': comparisons[i],"Correlation": correlation[i],'P_value': pvals[i]}, ignore_index=True)
        
    '''Else only add significant comparisons'''
    if (return_all == False):
            for i in range(0, len(reject)):
                if reject[i]:
                    newdf = newdf.append({'Comparison': comparisons[i],"Correlation": correlation[i], 'P_value': pvals[i]}, ignore_index=True)
                    
    '''Sort dataframe by ascending p-value'''
    newdf = newdf.sort_values(by='P_value', ascending=True)
    '''If results df is not empty, return it, else return None'''
    return newdf



       
       

In [3]:
interacting_genes = ['CD81', 'CLEC4G', 'NFKBIA', 'MAPK1', 'FOXC1', 'FHIT', 'RAPGEF1', 'SP1', 'MAG', 'ADH7', 'PIP5K2C', 'TMSB4X', 'COL6A3', 'CRK', 'SLC39A6', 'TGFBR2', 'ELK4', 'PRKCD', 'PLD2', 'EPS15', 'GNG11', 'RASSF5', 'GPX2', 'TEAD4', 'ENC1', 'SLC5A3', 'PIK3C2G', 'NOX4', 'CSPG4', 'COMP', 'IL10RB', 'TSC2', 'LBR', 'GSK3B', 'COL4A1', 'CDK2', 'SLC6A9', 'PIK3CA', 'CACNG3', 'CYP2C19', 'BCAN', 'PPP1R12C', 'PIK3CG', 'VCL', 'VTN', 'ACADM', 'COL11A2', 'TCF7L2', 'HSPA8', 'TYRO3', 'AMIGO2', 'IFNA7', 'SFRP1', 'IL1A', 'GNAI2', 'HAVCR1', 'PRKD1', 'SLC2A5', 'CACNB4', 'PRKD2', 'NR3C1', 'ALK', 'ICAM2', 'G6PC3', 'SQSTM1', 'CREB5', 'ITGA5', 'JUNB', 'CHRM2', 'NTF4', 'PLA2G4E', 'SLC39A8', 'TGFB2', 'MYL7', 'CDH3', 'IFNGR1', 'CES2', 'SKP1', 'PLA1A', 'LDHA', 'STAT5', 'MAPT', 'SLC39A9', 'IL6', 'SLC5A8', 'IFNG', 'SLC39A13', 'MAP4K3', 'PPP2R1B', 'ITGAE', 'HDC', 'PLA2G4A', 'FOXM1', 'HLA-DOA', 'SEMA6A', 'IRS4', 'PLCE1', 'APC2', 'CTNNA2', 'NOTCH3', 'CDK6', 'ANGPT1', 'GRB10', 'LRP6', 'NFKB1', 'EDN2', 'UGT1A4', 'LAMTOR3', 'LAMB3', 'PLTP', 'IKKA', 'APOA2', 'TNFSF13B', 'TOP1', 'THBS3', 'FZD5', 'RPS6KA5', 'RPS6KB2', 'TEK', 'HSPA6', 'PLCB3', 'BCL2L11', 'MYL9', 'COL9A2', 'PSAT1', 'NOTCH4', 'TEAD2', 'SGK3', 'RGMA', 'HLA-F', 'IFNLR1', 'SLC6A3', 'NFATC1', 'EFNA1', 'CRABP1', 'PLA2G16', 'HIF3A', 'CACNB2', 'WNT5A', 'IL10RA', 'PRLR', 'IL11RA', 'MYH10', 'ARRB2', 'RAC2', 'FLNB', 'MAPK6', 'SMAD2', 'CBR1', 'MKNK2', 'PIK3R2', 'TSC1', 'CALML6', 'PPP3R2', 'TPM3', 'CEBPB', 'PFKL', 'CDH18', 'C1QB', 'DVL3', 'ITGA6', 'FGD1', 'ROS', 'HEY1', 'ZAP70', 'PFKFB1', 'PPP2R2B', 'BCL2L1', 'PIK3C2A', 'ACTG1', 'MYLPF', 'KLK15', 'LAMB4', 'CTSB', 'AJUBA', 'SLC39A4', 'TXN', 'DBI', 'RALA', 'HLA-DRA', 'TTC9', 'CSF2RA', 'COL1A2', 'CASP3', 'WNT3', 'CSF2RB', 'IFNAR2', 'SLIT2', 'ENPP3', 'JAG1', 'CUL1', 'FAM91A1', 'GNGT1', 'NFKB2', 'UGT2B4', 'CFL2', 'CDH17', 'BCL2', 'SGK2', 'SRMS', 'TIE1', 'VPS16', 'PIP5K1B', 'SLC5A12', 'GATA4', 'RALGDS', 'C3orf10', 'RASGRP2', 'TNXB', 'LAMB2', 'HLA-A', 'CPT2', 'LAMC1', 'COL5A2', 'CXCL10', 'NOG', 'CSF1R', 'CDH4', 'IL11', 'ABCG5', 'ITGAL', 'PRKCG', 'HNF1A', 'RALB', 'JAK1', 'AKT1S1', 'HLA-DQA2', 'CSF3R', 'MDM4', 'MGST3', 'ADAM17', 'GCLC', 'PPP2CB', 'IKBKG', 'USF', 'FGF23', 'ABCC4', 'ITGB7', 'RFC4', 'TCL1B', 'SLC6A17', 'PRKAA2', 'ANG3', 'LPAR6', 'NTRK2', 'FOXC2', 'PPP2R5E', 'SYNGAP1', 'SOS1', 'IFNAR1', 'IL21R', 'TGFB3', 'WAS', 'GSTM5', 'PTPN7', 'APOC3', 'AQP1', 'IL27RA', 'CACNG5', 'CASP8', 'ELK1', 'DLL4', 'JAK3', 'ACOX1', 'PARVB', 'GJA1', 'PRB1', 'MAP2K6', 'THBS1', 'FOXO3', 'RGL2', 'CCKBR', 'S100P', 'IFNA10', 'SLC5A4', 'WNT3A', 'ARHGEF28', 'GNG2', 'CDH15', 'IFNA13', 'SOS2', 'JMJD7-PLA2G4B', 'BRAP', 'GPAM', 'RAP1A', 'HLA-DMA', 'PPM1B', 'NFATC3', 'GNA13', 'PRKAR2B', 'PPP1R14A', 'SLC5A7', 'C1QBP', 'GYS1', 'PLOD2', 'WASF1', 'ABCC5', 'HLA-DRB1', 'RASGRP3', 'G6PC2', 'VEGFC', 'HLA-G', 'EPOR', 'SMARCA1', 'COL9A1', 'CYP7A1', 'CBL', 'GSN', 'UGT2B7', 'PCK1', 'CACNA1B', 'EPHA4', 'CXCL8', 'PELO', 'PGK2', 'CD209', 'FGF17', 'GNG7', 'HSPA1L', 'ITGAV', 'TNFRSF1A', 'FN1', 'CHRM5', 'NOS1', 'CACNG4', 'MAPK7', 'PRKAR2A', 'CTNNA1', 'STK4', 'PLA2G12B', 'ACTR2', 'RICTOR', 'NOTCH1', 'CLDN1', 'EHHADH', 'BHLHE41', 'CYP1A2', 'S100A6', 'FTL', 'SMC1A', 'PLCG1', 'LRRC8A', 'ERK2', 'CREB3L1', 'SMAD3', 'RAC3', 'EPB41L4B', 'BDKRB2', 'EGR1', 'VPS11', 'SLC2A2', 'SLC9A1', 'FZD10', 'PRKAR1B', 'KAT5', 'ITGA3', 'CCNE1', 'SLC6A8', 'MAPK11', 'PPP2R1A', 'REL', 'DUSP1', 'HEYL', 'PRKCH', 'RTN4R', 'COL4A2', 'FAS', 'ERBB2', 'DIAPH1', 'RAB7A', 'DUSP3', 'MAPKAPK3', 'CAV1', 'CUL4A', 'AIP', 'BCR', 'WASF2', 'NRG1', 'ITGA1', 'BMP4', 'CYCS', 'SNAI2', 'BRIX1', 'PDYN', 'ARHGEF4', 'CACNA1I', 'RAPGEF5', 'CACNA2D4', 'VCAN', 'PPP2CA', 'RAD51', 'CACNA2D3', 'RXRB', 'GNG3', 'CALML5', 'CAB39', 'MAPK14', 'GSTA2', 'ITGA9', 'MMP2', 'F2', 'KITLG', 'FIGF', 'RASGRP4', 'VPS33A', 'ARNT', 'MAPK15', 'WNT7B', 'MSN', 'VAV2', 'CDH11', 'PGD', 'COL4A5', 'DNER', 'CD48', 'PPP5C', 'IL3', 'PARP1', 'EFNA2', 'PLA2G10', 'PRDX1', 'PARVG', 'OMG', 'EFNA5', 'UPK3A', 'PPP2R3B', 'APEX1', 'GPR153', 'ECSIT', 'ITGB1', 'PRKCZ', 'BAD', 'DDB1', 'CACNA1A', 'RAB9A', 'AXIN2', 'ABHD2', 'IGF1R', 'PPP2R2D', 'SLC7A5', 'MAP3K12', 'ITGA4', 'SOCS3', 'PPP1R12B', 'NCKAP1', 'PLA2G4F', 'CDH12', 'IRS2', 'SLC39A7', 'PTEN', 'LRP5', 'RXRA', 'PIK3R6', 'CXCL2', 'PPP2R2C', 'ALDH3A1', 'ANGPT4', 'CTNNA3', 'CTSL', 'DKK4', 'CLTCL1', 'UGT1A1', 'COL6A1', 'MMP7', 'PDK1', 'TFAP2A', 'CYP2B6', 'RXRG', 'SHC2', 'NFI', 'LEF1', 'AGER', 'SLC6A16', 'TLR4', 'MPL', 'PIK3C3', 'ATR', 'WNT10B', 'SLC2A12', 'SLC6A5', 'WNT11', 'PDLIM5', 'GNG8', 'PLCB1', 'TAB1', 'SRC', 'FGF10', 'PRKACG', 'ANGPT2', 'MYLK3', 'TNN', 'ATF4', 'SLC39A2', 'TTBK1', 'SLC6A14', 'PPP1CC', 'MAX', 'BUB1B-PAK6', 'GNA11', 'CCND3', 'CDH5', 'MGST1', 'ALDOB', 'G6PD', 'NCOA2', 'PFN1', 'RASGRF1', 'TSPAN8', 'DSCC1', 'TAOK2', 'MAP3K13', 'ITGA11', 'PPM1A', 'SGK1', 'CDKN1A', 'PPARA', 'NOS3', 'CDH8', 'MAPK13', 'HDAC7', 'CHUK', 'SDS', 'MCL1', 'FRAT2', 'BRAF', 'MAP3K11', 'CFL1', 'PLA2G4D', 'MGST2', 'IL9R', 'IL1B', 'FGFR1', 'ABL1', 'ARL5B', 'PTPRZ1', 'PDGFC', 'NTN1', 'IGF1', 'RHOC', 'DVL2', 'GADD45G', 'IL17B', 'YES1', 'MAP3K14', 'FOLR1', 'PLK2', 'DUSP16', 'GRB7', 'ERAS', 'ENAH', 'ITGA2B', 'TGFBR3', 'SLC5A9', 'SLCO2B1', 'SCD', 'SLC5A2', 'FOXO1', 'MAP3K6', 'SLC5A5', 'SSH1', 'DUSP2', 'DNAJC15', 'MAPKAP1', 'ORAI1', 'PLCB4', 'SLC2A6', 'GSR', 'ELAVL1', 'MAPK8', 'SOD3', 'CHRM1', 'TNS4', 'TLN2', 'FGF3', 'AKT1', 'CREB3', 'HEY2', 'PIK3AP1', 'SDSL', 'FKBP1A', 'MYL10', 'PTPRD', 'TESK2', 'STOM', 'FASN', 'IL6R', 'PRKAB2', 'CHRM3', 'CDH2', 'MMP12', 'VPS39', 'IL12B', 'GADD45A', 'BAIAP2', 'ITGAM', 'NBN', 'GNB3', 'TLN1', 'KEAP1', 'VASP', 'FLT4', 'CSNK1A1', 'LAMA2', 'IFNA6', 'CHAD', 'MAPK8IP1', 'VEGFB', 'HK2', 'VIL2', 'MP1', 'ARHGEF6', 'CLTB', 'MYL2', 'HSPA2', 'SLC39A5', 'PIK3C2B', 'PPP3R1', 'VWF', 'GADD45B', 'NEDD4', 'TP53', 'MST1', 'PAK7', 'ACTB', 'SMAD7', 'TGFA', 'ABCG8', 'SOX9', 'MSH2', 'IL2RB', 'SETD2', 'PTGES3', 'MYC', 'SLC7A11', 'FLT3', 'SHC3', 'CSNK2A2', 'RNF144B', 'LAT', 'HLA-DQB1', 'IFNA4', 'HES5', 'EPO', 'ETNK2', 'RBX1', 'FGF12', 'MAPK4', 'CSN2', 'CSNK1A1L', 'IL15RA', 'PLA2G2C', 'RASSF1', 'CDKN2B', 'RAD50', 'HES1', 'MYL12B', 'CCND2', 'LPAR5', 'WNT9B', 'ACTN4', 'MAPK8IP2', 'CYP8B1', 'ALDOC', 'KLF6', 'RALBP1', 'SLC19A2', 'PPP2R4', 'DKK2', 'GLUT1', 'ABL2', 'FZD4', 'PLA2G3', 'COL3A1', 'PLA2G6', 'SOD1', 'PKM2', 'RASGRP1', 'CDH20', 'AHCTF1', 'CYP1B1', 'TCF4', 'SLC2A1', 'ITGB8', 'JUN', 'COL4A6', 'IL20RB', 'GNB5', 'FGF9', 'FGF6', 'MAP3K8', 'MBL2', 'FGF22', 'LAMA5', 'AXIN1', 'TOP2A', 'MOS', 'MYB', 'CES1', 'RAB2A', 'RASAL1', 'GNB1', 'SHMT2', 'TNK2', 'CCL1', 'LDHB', 'ESR1', 'SMAD9', 'CSNK2A3', 'SPRY1', 'SLIT3', 'MFGE8', 'UGT1A6', 'IL6ST', 'PDPK1', 'SMAD4', 'BHLHE40', 'ARPC5', 'RAC1', 'LAMB1', 'ABCB4', 'GPR115', 'ITGB5', 'NRIP1', 'RAF1', 'PTPRT', 'CACNA1G', 'EFCAB3', 'EIF2S1', 'BST2', 'MIF', 'PFKFB4', 'KCNJ2', 'LGALS3', 'VPS18', 'CNTFR', 'GIT1', 'PLA2G12A', 'MDM2', 'ERRFI1', 'PLA2G2F', 'CACNG2', 'IL5RA', 'PSPH', 'SETD7', 'RASA4B', 'PRDX6', 'AKAP13', 'HSP90AA1', 'DNAJB1', 'VIM', 'CDH1', 'CACNA1F', 'SLC26A2', 'FOXO4', 'CPT1A', 'GNG13', 'MAP3K4', 'SLC2A9', 'GGT1', 'PTPN1', 'TACR1', 'PPP3CB', 'COL2A1', 'GYS2', 'DDIT3', 'DIAP1', 'NF1', 'BID', 'RAB5B', 'CDH22', 'DAPK3', 'JAK2', 'ZYX', 'THBD', 'GNGT2', 'NLK', 'NPC2', 'FAK', 'FZD1', 'PTPN11', 'CEBPZ', 'MYL1', 'MYLK4', 'POU5F1', 'PSMC5', 'PAK6', 'CD44', 'RPS6KA3', 'RAB8A', 'GPX3', 'FGD4', 'PRKCB', 'FGFBP1', 'PDGFD', 'ATM', 'PFKM', 'HLA-DMB', 'EXOC2', 'SLC10A1', 'LTB4R', 'PLA2G4C', 'CHTF18', 'AHRR', 'UGT1A9', 'RPS6', 'PRKAA1', 'HRAS', 'SERPINA1', 'CASP9', 'HBEGF', 'GCC1', 'ELF5', 'IRS1', 'RIN1', 'CSH1', 'CSK', 'HLA-DRB4', 'DAB2IP', 'LAMC3', 'SLC9A3', 'UGT1A7', 'RAPGEF2', 'SLC39A11', 'SHMT1', 'RASA3', 'HSPA1B', 'CREB3L4', 'STAT3', 'CACNB3', 'STAT1', 'COL6A5', 'LAMA1', 'E2F2', 'ITGB4', 'PDK4', 'B3GNT5', 'SLCO1B1', 'SLC27A1', 'RASA2', 'GRIN1', 'COL6A6', 'SMAD6', 'BAAT', 'GDNF', 'SHOC2', 'IL2RA', 'NGF', 'ETS1', 'ALOX5AP', 'FGF20', 'COL9A3', 'PLD1', 'MAPK3', 'ILK', 'CYP4A11', 'LIF', 'F2R', 'HIF1A', 'FGF4', 'ITGA8', 'IL2RG', 'CCL20', 'PTPRC', 'IFNB1', 'CAV3', 'HSPA1A', 'PTPN5', 'ABCC2', 'CSF3', 'GNAI3', 'CPEB4', 'SLC2A3', 'VEGFA', 'COX-2', 'RGL1', 'CLEC6A', 'MET', 'TSC22D3', 'MEF2D', 'FGF2', 'GSTM2', 'LIPE', 'IL1R2', 'WNT6', 'F2RL3', 'NPC1', 'PIP5K1A', 'MAPK9', 'HLA-DQB2', 'HSPB1', 'ROCK2', 'TNFRSF11A', 'MAP3K5', 'PITX2', 'ENO1', 'MAPKAPK5', 'JUND', 'E2F1', 'CDH7', 'CSNK2B', 'AQP4', 'DLL3', 'ARAF', 'NF2', 'PHLPP2', 'RDX', 'PPP3CC', 'FGF21', 'VAV3', 'CACNG6', 'HLA-DPB1', 'RB1', 'CRTC2', 'PXN', 'DUSP9', 'WNT16', 'CALM1', 'PCK2', 'IL3RA', 'MAP2K4', 'FGF8', 'ITGB6', 'CXCR7', 'ITGA10', 'PRKAG1', 'LPAR3', 'PEBP1', 'PTK2', 'TSG101', 'TNR', 'SLC6A4', 'CYP2C9', 'PGM2', 'GNAQ', 'ARHGEF1', 'VEGFD', 'ATF2', 'COL1A1', 'SLC6A19', 'SLC6A18', 'TRAF6', 'CREB3L2', 'MAP3K1', 'FGF16', 'SLC6A6', 'MDH1', 'PHLPP1', 'CALM2', 'IL4R', 'IQGAP1', 'TJP1', 'NTF3', 'PTGS2', 'TYMP', 'SDPR', 'LATS2', 'CSF1', 'NQO1', 'PKN2', 'FOXA1', 'NTRK1', 'PGM1', 'ANKRD1', 'E2F3', 'WNT5B', 'LDHD', 'MYLK2', 'PPARD', 'PPARGC1A', 'STMN1', 'STK11', 'DUSP8', 'HCK', 'HLA-DQA1', 'RAB11B', 'CD14', 'FZD3', 'MAP2K2', 'RAP1B', 'PARVA', 'KDM5C', 'MEF2C', 'FGF1', 'GSTM4', 'IFNA8', 'CACNA1C', 'SLC2A10', 'FANCI', 'TCF7', 'GCLM', 'CD19', 'TPCN2', 'DNAJC7', 'PIAS3', 'PPP2R3C', 'TAOK3', 'CALML4', 'ME1', 'MGAM', 'PIK3R1', 'MKNK1', 'DUSP7', 'MYL3', 'GSTA3', 'ARHGEF7', 'GPI', 'DOCK1', 'EFNA3', 'PRKCQ', 'ANXA2', 'PPP2R2A', 'COL6A2', 'IL12A', 'RASA1', 'HLA-DRB3', 'GSTM1', 'CACNA1S', 'PAK4', 'PLCG2', 'SRGN', 'INS', 'FZD6', 'DIAP3', 'VDR', 'SLC6A1', 'OTUD5', 'DEPTOR', 'GNG10', 'IL4', 'PLCB2', 'TXK', 'PAK2', 'TEAD3', 'TIAM1', 'ENO2', 'FGFR2', 'ZEB1', 'CYP4F12', 'MAP2K7', 'CCNE2', 'MAP2K5', 'FZD7', 'IL7', 'CBP', 'XYLT1', 'APC', 'KAT2B', 'CDH16', 'SLC6A2', 'PLA2G1B', 'NR1I3', 'TNFSF13', 'AHR', 'SLC2A7', 'RRAS', 'RTN4', 'IFNA16', 'PLAC8', 'RASAL3', 'IFNA2', 'GNAL', 'APOA1', 'ACACA', 'WNT1', 'STK3', 'PLA2G2A', 'FGF11', 'RASGRF2', 'CLEC4M', 'GSTA4', 'MTDH', 'VAV1', 'BLK', 'SMAD5', 'FKBP5', 'AGTR1', 'PKN3', 'SLC6A20', 'NCOA6', 'ANG1', 'NOTCH2', 'ITGAX', 'SLC5A11', 'CDH10', 'MAP3K2', 'CDC42EP3', 'LRRK2', 'CCR2', 'SERPINE1', 'CSNK2A1', 'SELP', 'FGR', 'PLA2G2D', 'NGFR', 'FKHR', 'NR1H3', 'MSH6', 'ITGB2', 'FLNC', 'ITGA7', 'PRKCA', 'PIK3R4', 'PGF', 'PLA2G2E', 'STAT5A', 'SRXN1', 'PPP1CA', 'CACNG8', 'PIK3CD', 'SLC39A10', 'DAPK1', 'PIK3R5', 'TCL1A', 'MAPK10', 'LAMA4', 'FOS', 'FZD8', 'UGT1A3', 'EP300', 'MERTK', 'CLTA', 'KSR2', 'COL11A1', 'RIPK4', 'ERK1', 'GAPDH', 'NR0B2', 'CACNG7', 'MYL12A', 'DDIT4', 'VPS4A', 'TOX2', 'THEM4', 'CAMK1', 'BAK1', 'CYP2A6', 'ACAA1', 'THBS4', 'SERPINB9', 'GHR', 'PPP2R5A', 'TNFSF11', 'PKLR', 'CDKN2C', 'GFAP', 'TCF7L1', 'LPAR1', 'BRCA1', 'CCNG1', 'LTB', 'PLXNA2', 'MAPK8IP3', 'PAK1', 'DVL1', 'DAXX', 'SRPX2', 'CRABP2', 'FTH1', 'CBR3', 'INHBB', 'PGK1', 'CDC42', 'BIRC5', 'FABP1', 'MYLK', 'KLF4', 'FGF13', 'ASGR1', 'FZD9', 'MLST8', 'BIRC3', 'PPP1R14C', 'RASAL2', 'NAV3', 'CYP3A4', 'BLVRB', 'SCIN', 'FCGR2A', 'CTF1', 'CDKN1B', 'AFDN', 'CDC37', 'CYP3A7', 'VPS41', 'ITGAD', 'ERBB1', 'MAP2K3', 'PIP5KL1', 'PDGFRB', 'ERBB4', 'AMPK', 'BAX', 'AGTR2', 'PIK3R3', 'FLT3LG', 'ANGPTL4', 'GRIN2A', 'CDH13', 'SULT2A1', 'HLA-C', 'CAB39L', 'SLC6A15', 'JAG2', 'PFKFB2', 'PTK6', 'RAB5A', 'WNT2B', 'RRAS2', 'SULT1A1', 'NCOA3', 'ETS2', 'SLIT1', 'TRAF2', 'KIT', 'GAS6', 'BMPR1A', 'SLC39A14', 'YY1', 'NCK1', 'CASP7', 'ARHGAP35', 'COL5A1', 'RASA4', 'GAP43', 'FLT1', 'MBP', 'WNT2', 'CSH2', 'TNK1', 'HMOX1', 'SLC6A7', 'KRAS', 'PRKAB1', 'AKT2', 'PRKAG2', 'RFC3', 'ITGB3', 'CAP2', 'PPP1CB', 'RGS2', 'IRF7', 'PRKRA', 'IFNGR2', 'TYK2', 'NR1I2', 'MEF2B', 'HLA-DOB', 'NCAN', 'UBE2T', 'CRKL', 'TAB2', 'LIMK1', 'LATS1', 'CALM3', 'HGF', 'IL7R', 'ENO3', 'MAFF', 'MYL5', 'NR1H4', 'CACNG1', 'ZFP36', 'SLC2A13', 'IL20RA', 'SPINK13', 'GNB2', 'EIF2AK2', 'HSP90AB1', 'LPAR2', 'DDB2', 'SRF', 'BRCA2', 'FASLG', 'RHEB', 'GNAS', 'FZD2', 'TNFAIP3', 'HLA-DRB5', 'NOS2', 'EIF4EBP1', 'ICAM1', 'CEBP', 'PGBD5', 'ARRB1', 'CDH24', 'PKM', 'RARB', 'ICAM3', 'MAP3K7', 'SSH2', 'BMPR2', 'SELENOP', 'CREBBP', 'EGF', 'ANXA1', 'MAP4K1', 'NCOA1', 'GJB2', 'LEP', 'CDC25B', 'EIF4E1B', 'PRKAR1A', 'P23', 'DCAF1', 'TRIM28', 'GAST', 'STYK1', 'CHGA', 'WNT4', 'BMPR1B', 'ZAK', 'SLC5A10', 'GRB2', 'RHOB', 'IFNA5', 'KDR', 'COL4A3', 'IFNA17', 'C11orf13', 'LILRB3', 'IL22RA1', 'COL5A3', 'ABCB11', 'IGFBP1', 'SCNN1A', 'PRKCI', 'GSTP1', 'PRRG4', 'CHTF8', 'KPNA1', 'SLC2A8', 'ACTN1', 'SLC2A11', 'CHST11', 'ERK', 'RELB', 'CES4A', 'CD300A', 'GSTA1', 'CDH9', 'XIAP', 'SPRY2', 'RPS6KB1', 'PRL', 'EPHA3', 'TXNRD1', 'ATF6B', 'PRKAG3', 'DLL1', 'G6PC', 'ACAN', 'PIK3IP1', 'MRE11A', 'CREB1', 'HLA-E', 'PGR', 'RHOD', 'KLK8', 'YAP1', 'HAVCR2', 'NR4A1', 'CYFIP2', 'IL2', 'CTGF', 'TXNRD3', 'DAPK2', 'HLA-B', 'TIMD4', 'SYK', 'BTG2', 'RAB10', 'PIP5K2B', 'SSPN', 'IL1R1', 'CXCL1', 'IBSP', 'GH2', 'PDGFRA', 'INSR', 'CD47', 'KTN1', 'GSTM3', 'PIK3CB', 'CD63', 'GH1', 'THBS2', 'PBRM1', 'RPS6KA4', 'TNC', 'RELA', 'COL4A4', 'PRKD3', 'SPP1', 'PPP2R5B', 'VHL', 'IL12RB2', 'MAFG', 'LIN28B', 'CAPN2', 'ITGA2', 'VIL1', 'HLA-DPA1', 'CREB3L3', 'CDK4', 'ULK1', 'STRADA', 'PAK3', 'YY1AP1', 'EIF4E', 'SCP2', 'IRF3', 'ARG1', 'PPP1R12A', 'EFNA4', 'PIP5K1C', 'CDKN2A', 'BCAR1', 'CACNA1E', 'SERTAD2', 'E2F5', 'TNF', 'GAB1', 'PTGR1', 'CHRM4', 'BDKRB1', 'EIF4B', 'SREBF1', 'CACNA2D2', 'IGF2', 'CES5A', 'MSK1', 'SLC27A5', 'C5', 'CACNA1H', 'AKT3', 'PIP5K2A', 'GNG5', 'FGFR4', 'LDHC', 'NFE2L2', 'GNA12', 'RHOA', 'FGF14', 'AMOT', 'PMP2', 'FLNA', 'SLC39A12', 'MTOR', 'PHGDH', 'PDGFB', 'ERBB3', 'PPP2R3A', 'PRKACA', 'FGFR3', 'PNCK', 'GNB4', 'CLEC10A', 'IL8', 'EPAS1', 'MAP4K2', 'ALAS1', 'BMP2', 'LPAR4', 'IFNA14', 'CALML3', 'TAZ', 'GAB2', 'KSR1', 'SHH', 'TGFB1', 'BAP1', 'ATP2C2', 'PPP2R5C', 'RAB14', 'TGFBR1', 'ABCB1', 'IP6K3', 'PLA2G4B', 'CACNB1', 'PDE4B', 'CCND1', 'CD36', 'DUSP6', 'EIF4G', 'FRAT1', 'CEBPA', 'RAB5C', 'ANG4', 'TAOK1', 'RELN', 'FGF19', 'RS1', 'GRIN2B', 'FGF5', 'SLC5A1', 'WNT7A', 'FGF18', 'ROCK1', 'CDK1', 'EGFR', 'PFKFB3', 'F2RL2', 'ESR2', 'ARHGAP5', 'SLC6A11', 'GRLF1', 'GRB14', 'PAK5', 'OSMR', 'MRAS', 'BIRC2', 'CACYBP', 'CYP3A5', 'WNT10A', 'SORBS2', 'MMP1', 'PRKCE', 'SEC14L1', 'CDH6', 'PPP3CA', 'USF2', 'DDX58', 'GNG12', 'FYN', 'BDNF', 'DUSP4', 'IKBKB', 'TLR2', 'PRKACB', 'PTPRR', 'GNG4', 'ATAD2', 'PTPRA', 'AIF1', 'LIFR', 'EPHA2', 'VEGF', 'RPTOR', 'PPP2R5D', 'SLC5A6', 'ACLY', 'CCL2', 'UBE2C', 'HTR7', 'PDGFA', 'ACACB', 'IFNA21', 'SLC2A14', 'HK1', 'PPP5D1', 'TEAD1', 'CDKN1C', 'PFKP', 'PLA2G5', 'MMP9', 'SLC39A1', 'SLC2A4', 'CTNNB1', 'VEGFR2', 'SNURF', 'RBL2', 'CLTC', 'MAP4K4', 'AXL', 'CACNA2D1', 'TFF2', 'OSM', 'SAV1', 'SERPINB2', 'APOA5', 'CDH19', 'HK3', 'NRAS', 'ARF6', 'ANG2', 'TBC1D1', 'MAP2K1', 'IKBKE', 'PKN1', 'CAV2', 'DUSP10', 'ZIC2', 'LAMC2', 'TBK1', 'CACNA1D', 'ALDOA', 'POLK', 'CYP1A1', 'FGF7', 'GSTT2', 'VGLL4', 'GSTA5', 'CETN3', 'SLC6A13', 'ABI2', 'SHC1', 'EML4', 'LAMA3', 'SHC4', 'STAT5B', 'HSP90B1', 'RAD17', 'MAPKAPK2', 'PPARG', 'SMAD1', 'GSTT1', 'ABCC3', 'LMNB2', 'ACVR1', 'MAPK12', 'SSH3', 'CEP290', 'TPI1', 'CES3', 'MYOF', 'SLC39A3', 'EIF4E2']

In [4]:
def get_common_genes(df):
    
    df = df.drop(columns=['EGFR_Mutation', 'EGFR_Location', "EGFR_Mutation_Status"])
    prot_genes_list = df.columns.values.tolist()
    common_genes = [] 
    for gene in interacting_genes:
        if gene + "_proteomics" in prot_genes_list:
            common_genes.append(gene + "_proteomics")
    return common_genes

In [5]:
#load cptac data 
brain = cptac.Gbm()
kidney = cptac.Ccrcc()
Ovar = cptac.Ovarian()
colon = cptac.Colon()
brca = cptac.Brca()
luad = cptac.Luad()
Hnscc = cptac.Hnscc()
Lscc = cptac.Lscc()
en = cptac.Endometrial()

Checking that ccrcc index is up-to-date...



Checking that hnscc index is up-to-date...  



                                                

# Step 1: Make Data frame


For each cancer, get proteomics and mutation data. Filter out non tumor samples. 

# Step2: Get common genes

For each cancer, get the list of EGFR interacting genes that are in the proteomic data set. Use the get_common_genes function which extracts the protein names from the df and loops to see which ones are interacting genes. Returns list of common genes

# Step 3: Run Get Pearson Correlation Function 

For each cancer, run pearson correlation function. Use data frame filter to only have common genes. Save two dfs as csv, one with Cancer Type column and one without. 


# GBM

In [6]:
df1 = brain.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
Gbm_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]



In [7]:
common_genes = get_common_genes(Gbm_tumor)

In [8]:
Gbm_tumor = Gbm_tumor[common_genes]
df =(wrap_lin_regression(Gbm_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
df.to_csv("csv_files/GBM_EGFR_interacting_pearson.csv")
df['Cancer Type']='GBM'
df.to_csv("csv_files/GBM_EGFR_interacting_pearson2.csv")
df

Unnamed: 0,Comparison,Correlation,P_value,Cancer Type
989,EGFR_proteomics,1.000e+00,0.000e+00,GBM
870,GRB2_proteomics,-6.109e-01,1.886e-11,GBM
148,CDH4_proteomics,5.592e-01,1.790e-09,GBM
1055,SHC1_proteomics,-5.405e-01,7.695e-09,GBM
170,WAS_proteomics,-5.019e-01,1.205e-07,GBM
...,...,...,...,...
549,GCC1_proteomics,-7.676e-04,9.940e-01,GBM
154,MDM4_proteomics,6.007e-04,9.966e-01,GBM
416,ARHGEF6_proteomics,-3.424e-04,9.973e-01,GBM
968,CALML3_proteomics,3.265e-04,9.976e-01,GBM


# Kidney 

In [9]:
df_kidney = kidney.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_kidney.columns = df_kidney.columns.droplevel(1)
kidney_tumor = df_kidney.loc[df_kidney['Sample_Status'] == "Tumor"]



In [10]:
common_genes = get_common_genes(kidney_tumor)  

In [11]:
kidney_tumor = kidney_tumor[common_genes]
df_kidney =(wrap_lin_regression(kidney_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
df_kidney.to_csv("csv_files/Kidney_EGFR_interacting_pearson.csv")
df_kidney['Cancer Type']='Kidney'
df_kidney.to_csv("csv_files/Kidney_EGFR_interacting_pearson2.csv")

# Endometrial 

In [12]:
df_en = en.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
en_tumor = df_en.loc[df_en['Sample_Status'] == "Tumor"]



In [13]:
common_genes = get_common_genes(en_tumor)  

In [14]:
en_tumor = en_tumor[common_genes]
en_df =(wrap_lin_regression(en_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
en_df.to_csv("csv_files/Endo_EGFR_interacting_pearson.csv")
en_df['Cancer Type']='Endometrial'
en_df.to_csv("csv_files/Endo_EGFR_interacting_pearson2.csv")


KeyError: "['GAST_proteomics'] not in index"

# Ovarian 

In [None]:
df_ovar = Ovar.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_ovar.columns = df_ovar.columns.droplevel(1)
df_ovar = df_ovar.loc[df_ovar['Sample_Status'] == "Tumor"]



In [None]:
common_genes = get_common_genes(df_ovar)  

In [None]:
df_ovar = df_ovar[common_genes]
ovar_df =(wrap_lin_regression(df_ovar,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
ovar_df.to_csv("Ovar_EGFR_interacting_pearson.csv")
ovar_df['Cancer Type']='Ovarian'
ovar_df.to_csv("Ovar_EGFR_interacting_pearson2.csv")


# Colon

In [None]:
df_colon = colon.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_colon = df_colon.loc[df_colon['Sample_Status'] == "Tumor"]

In [None]:
common_genes = get_common_genes(df_colon)  

In [None]:
colon_tumor = df_colon[common_genes]
colon_df =(wrap_lin_regression(colon_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
colon_df.to_csv("Colon_EGFR_interacting_pearson.csv")
colon_df['Cancer Type']='Colon'
colon_df.to_csv("Colon_EGFR_interacting_pearson2.csv")


# Brca 

In [None]:
df_brca = brca.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_brca.columns = df_brca.columns.droplevel(1)
df_brca = df_brca.loc[df_brca['Sample_Status'] == "Tumor"]

In [None]:
common_genes = get_common_genes(df_brca) 

In [None]:
#en_tumor = en_tumor.dropna(axis='columns', how="all")
brca_tumor = df_brca[common_genes]
brca_df =(wrap_lin_regression(brca_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
brca_df.to_csv("brca_EGFR_interacting_pearson.csv")
brca_df['Cancer Type']='Brca'
brca_df.to_csv("Brca_EGFR_interacting_pearson2.csv")


# LUAD

In [None]:
df_luad = luad.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_luad.columns = df_luad.columns.droplevel(1)
df_luad = df_luad.loc[df_luad['Sample_Status'] == "Tumor"]

In [None]:
common_genes = get_common_genes(df_luad) 

In [None]:
#en_tumor = en_tumor.dropna(axis='columns', how="all")
luad_tumor = df_luad[common_genes]
luad_df =(wrap_lin_regression(luad_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
luad_df.to_csv("luad_EGFR_interacting_pearson.csv")
luad_df['Cancer Type']='Luad'
luad_df.to_csv("Luad_EGFR_interacting_pearson2.csv")
luad_df

# HNSCC

In [None]:
df_hnscc = Hnscc.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_hnscc = df_hnscc.loc[df_hnscc['Sample_Status'] == "Tumor"]

In [None]:
common_genes = get_common_genes(df_hnscc) 

In [None]:
hnscc_tumor = df_hnscc[common_genes]
hnscc_df =(wrap_lin_regression(hnscc_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
hnscc_df.to_csv("hnscc_EGFR_interacting_pearson.csv")
hnscc_df['Cancer Type']='Hnscc'
hnscc_df.to_csv("Hnscc_EGFR_interacting_pearson2.csv")

# LSCC

In [None]:
df_lscc = Lscc.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df_lscc.columns = df_lscc.columns.droplevel(1)
df_lscc = df_lscc.loc[df_lscc['Sample_Status'] == "Tumor"]

In [None]:
common_genes = get_common_genes(df_lscc)         

In [None]:
lscc_tumor = df_lscc[common_genes]
lscc_df =(wrap_lin_regression(lscc_tumor,"EGFR_proteomics",comparison_columns = common_genes, return_all = True))
lscc_df.to_csv("lscc_EGFR_interacting_pearson.csv")
lscc_df['Cancer Type']='Lscc'
lscc_df.to_csv("Lscc_EGFR_interacting_pearson2.csv")
