# Exploring Combined Data frames

This data frame is looking at the large data frames for both all interacting proteins and bonferroni significant proteins.  Its purpose is to find interesting genes that display differnet correlations in different cancer types

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
import statsmodels.stats.multitest

import cptac
import cptac.utils as u
import plot_utils as p 

  import pandas.util.testing as tm


In [17]:
def HasPosNeg(row):
    hasPos = False
    hasNeg= False

    for item in row:
        if pd.isnull(item):
            continue
        if item < -0.3:
            hasNeg = True
        if item > 0.3:
            hasPos = True
            
    if hasPos & hasNeg:
        return True
    return False

def Pvalue_sig(row):
    numSig = 0

    for item in row:
        if pd.isnull(item):
            continue
        if item < 0.05:
            numSig += 1
            
    return numSig

def CountPosNeg(row):
    hasPos = False
    hasNeg= False
    counter = 0
    for item in row:
        if pd.isnull(item):
            continue
        if item < -0.3:
            hasNeg = True
            counter += 1
        if item > 0.3:
            hasPos = True
            counter += 1
    return counter

# Read in significant correlation data frame

Read in the wide version of the data frame then set comparison to be the index. Then filter the data frame to only hae the correlation columns  

In [18]:
df_sig = pd.read_csv("combining_pearson_dfs/csv_files/pancan_EGFR_all_sig_wide.csv")
df_sig = df_sig.drop(['Unnamed: 0'], axis=1)
df_sig = df_sig.set_index('Comparison')
df_sig = df_sig.drop(['EGFR_proteomics'], axis=0)
df_sig

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
PHLDA1_proteomics,0.816848,6.553435e-25,,,,,,,,,0.664271,3.426615e-15,,,0.709812,3.919195e-18
GRB2_proteomics,-0.610889,1.886384e-11,,,,,,,,,-0.532341,2.559824e-09,,,,
SOCS2_proteomics,0.562720,1.343464e-09,,,,,,,,,,,,,,
CDH4_proteomics,0.559180,1.790048e-09,,,,,,,,,,,,,,
PLA2G15_proteomics,-0.556624,2.197562e-09,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WLS_proteomics,,,,,,,,,,,,,,,0.431664,2.496726e-06
NPTN_proteomics,,,,,,,,,,,,,,,0.427819,3.133199e-06
PLEKHA1_proteomics,,,,,,,,,,,,,,,0.426354,3.413811e-06
FAM160A1_proteomics,,,,,,,,,,,,,,,0.423148,4.113199e-06


In [14]:
col = ["Correlation_Gbm","Correlation_kidney","Correlation_Ovar","Correlation_Brca","Correlation_Luad","Correlation_hnscc","Correlation_colon","Correlation_Lscc"]
df_sig = df_sig[col]


# Find Proteins with both pos and neg Correlations 

apply HasPosNeg fucntion on df to find rows that have both pos and neg correlations. Then use new column to filter out those without both pos and neg correlations 

In [19]:
df_sig["Pos_Neg"] = df_sig.apply(HasPosNeg, axis = 1)


In [20]:
df_sig = df_sig[df_sig['Pos_Neg']==True]

pos_neg_list_sig = df_sig.index.values.tolist()
df_sig

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc,Pos_Neg
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
HSD17B11_proteomics,-0.491843,2.340296e-07,,,,,0.481667,5.044047e-08,,,,,,,,,True
PPP2R3A_proteomics,-0.503777,4.157858e-07,,,,,0.446137,5.820667e-07,,,,,,,,,True
HAAO_proteomics,-0.481974,4.393106e-07,,,,,0.462999,1.887327e-07,,,,,,,,,True
CTNND2_proteomics,0.470002,9.188543e-07,,,,,-0.468108,1.325474e-07,,,,,,,,,True
EHBP1L1_proteomics,-0.460074,1.659417e-06,,,,,0.420966,2.804677e-06,,,,,,,,,True
MICALL1_proteomics,,,-0.435501,2e-06,,,0.433383,1.311735e-06,,,,,,,0.464094,3.279137e-07,True
RARA_proteomics,,,,,,,-0.483562,4.392067e-08,0.476297,1.444608e-07,,,,,,,True
CELSR1_proteomics,,,,,,,-0.452337,3.873966e-07,,,,,,,0.505329,1.798822e-08,True


# Exploring all Interacting Proteins data frame 


In [44]:
df_interacting = pd.read_csv("combining_pearson_dfs/csv_files/pancan_EGFR_prot_interacting_wide.csv")
df_interacting = df_interacting.drop(['Unnamed: 0'], axis=1)
df_interacting = df_interacting.set_index('Comparison')
df_interacting = df_interacting.drop(['EGFR_proteomics'], axis=0)
df_interacting

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
GRB2_proteomics,-0.610889,1.886384e-11,-0.217427,0.022506,-0.190090,0.085202,-0.177379,0.057899,-0.302439,0.001321,-0.532341,2.559824e-09,0.150960,0.139949,-0.174810,0.067764
CDH4_proteomics,0.559180,1.790048e-09,0.148407,0.257781,,,,,,,,,,,,
SHC1_proteomics,-0.540524,7.695118e-09,,,0.175098,0.113358,0.090908,0.333927,-0.135844,0.262164,,,0.203250,0.045855,0.123922,0.197109
WAS_proteomics,-0.501918,1.205168e-07,-0.279814,0.003071,-0.090413,0.416288,0.153496,0.101463,-0.337909,0.000306,-0.503427,2.411333e-08,0.282401,0.005070,-0.177954,0.062892
NPC2_proteomics,-0.498791,1.484249e-07,-0.319133,0.000678,-0.183193,0.097375,0.279599,0.002476,0.292520,0.001928,,,0.040840,0.691234,-0.013310,0.890238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IRS4_proteomics,,,,,,,,,,,,,,,0.075801,0.665175
RELN_proteomics,,,,,,,,,,,,,,,0.040423,0.792057
BMP2_proteomics,,,,,,,,,,,,,,,-0.031509,0.795679
ALK_proteomics,,,,,,,,,,,,,,,0.014807,0.906807


In [45]:
col = ["Correlation_Gbm","Correlation_kidney","Correlation_Ovar","Correlation_Brca","Correlation_Luad","Correlation_hnscc","Correlation_colon","Correlation_Lscc"]
df_interacting_corr = df_interacting[col]
df_interacting_corr["Pos_Neg"] = df_interacting_corr.apply(HasPosNeg, axis = 1)
df_interacting_corr = df_interacting_corr[df_interacting_corr['Pos_Neg']==True]
pos_neg_list_interacting_corr = df_interacting_corr.index.values.tolist()
df_interacting_corr


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,Correlation_Gbm,Correlation_kidney,Correlation_Ovar,Correlation_Brca,Correlation_Luad,Correlation_hnscc,Correlation_colon,Correlation_Lscc,Pos_Neg
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
CTSB_proteomics,-0.496895,-0.226310,-0.033211,0.341048,-0.249333,,0.096105,0.142704,True
PPP2R3A_proteomics,-0.503777,0.285735,0.103830,0.446137,0.348490,,,0.235523,True
COL6A2_proteomics,-0.482774,,0.170011,0.158021,0.060439,,0.303509,-0.006658,True
PIK3CD_proteomics,-0.466265,-0.188653,-0.211141,0.099009,-0.331760,,0.475177,-0.136597,True
RASAL3_proteomics,-0.464579,-0.203080,-0.171440,0.025792,-0.282623,-0.424931,0.326434,-0.208108,True
...,...,...,...,...,...,...,...,...,...
PRKCI_proteomics,0.011144,0.162683,0.059136,-0.307493,0.303722,,0.088019,0.119383,True
UBE2T_proteomics,-0.010798,,-0.436850,0.023665,-0.137250,,0.544972,-0.157148,True
SMAD3_proteomics,-0.008201,0.152903,0.070396,-0.324801,-0.250872,,0.055987,0.323183,True
TNXB_proteomics,0.007045,-0.181327,0.303272,0.000828,-0.327411,,0.257114,-0.229124,True


# P values sig some not others

In [53]:
col = ["P_value_Gbm","P_value_kidney","P_value_Ovar","P_value_Brca","P_value_Luad","P_value_hnscc","P_value_colon","P_value_Lscc"]
df_interacting_pval = df_interacting[col]
df_interacting_pval["Pvalue_sig"] = df_interacting_pval.apply(Pvalue_sig, axis = 1)
df_interacting_pval

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,P_value_Gbm,P_value_kidney,P_value_Ovar,P_value_Brca,P_value_Luad,P_value_hnscc,P_value_colon,P_value_Lscc,Pvalue_sig
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
GRB2_proteomics,1.886384e-11,0.022506,0.085202,0.057899,0.001321,2.559824e-09,0.139949,0.067764,2
CDH4_proteomics,1.790048e-09,0.257781,,,,,,,1
SHC1_proteomics,7.695118e-09,,0.113358,0.333927,0.262164,,0.045855,0.197109,1
WAS_proteomics,1.205168e-07,0.003071,0.416288,0.101463,0.000306,2.411333e-08,0.005070,0.062892,2
NPC2_proteomics,1.484249e-07,0.000678,0.097375,0.002476,0.001928,,0.691234,0.890238,1
...,...,...,...,...,...,...,...,...,...
IRS4_proteomics,,,,,,,,0.665175,0
RELN_proteomics,,,,,,,,0.792057,0
BMP2_proteomics,,,,,,,,0.795679,0
ALK_proteomics,,,,,,,,0.906807,0


In [47]:
df_interacting_pval.sort_values(by=['Pvalue_sig'],ascending=False)

Unnamed: 0_level_0,P_value_Gbm,P_value_kidney,P_value_Ovar,P_value_Brca,P_value_Luad,P_value_hnscc,P_value_colon,P_value_Lscc,Pvalue_sig
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
EML4_proteomics,0.000191,7.059436e-01,0.014789,6.934519e-05,0.000018,,0.000215,6.888475e-01,4
LAMC2_proteomics,,1.872509e-01,0.000451,1.917328e-05,0.098958,,0.000378,1.433448e-07,4
SLC2A1_proteomics,0.882389,2.739013e-04,0.537230,2.674763e-09,0.593503,,0.733224,8.077329e-06,3
PDK1_proteomics,0.840148,1.068217e-06,0.021848,9.383917e-05,0.000092,,0.397201,4.267203e-02,3
PFKP_proteomics,0.271550,9.503928e-07,0.915161,9.562825e-11,0.011385,2.184808e-07,0.005514,9.839940e-02,3
...,...,...,...,...,...,...,...,...,...
CLTB_proteomics,0.206836,,0.031119,9.721446e-01,0.295254,,0.082520,8.359661e-02,0
CAPN2_proteomics,0.207763,7.187026e-01,0.049550,1.559450e-01,0.096171,,0.016570,2.859702e-02,0
GSR_proteomics,0.210775,8.637475e-01,0.409702,1.096474e-01,0.002421,,0.471578,9.155747e-01,0
CREB1_proteomics,0.213122,1.020545e-01,0.003181,2.144922e-01,0.015819,,0.069824,4.732055e-01,0


In [48]:
pval_3_4 = df_interacting_pval[df_interacting_pval['Pvalue_sig'].isin([3, 4])]
pval_3_4

Unnamed: 0_level_0,P_value_Gbm,P_value_kidney,P_value_Ovar,P_value_Brca,P_value_Luad,P_value_hnscc,P_value_colon,P_value_Lscc,Pvalue_sig
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
WAS_proteomics,1.205168e-07,0.00307075,0.416288,0.1014634,0.0003058061,2.411333e-08,0.00507,0.06289182,3
PPP2R3A_proteomics,4.157858e-07,0.002478699,0.350241,5.820667e-07,0.0001907963,,,0.01325248,3
CTNNB1_proteomics,1.586832e-06,0.02436436,0.017285,0.003846357,0.0001997006,,0.013663,7.213703e-05,3
PIK3AP1_proteomics,8.12587e-06,0.3867047,0.564339,0.000275528,0.07577893,2.000795e-07,0.027557,0.01904173,3
PGM1_proteomics,9.781084e-06,0.001408217,0.000106,0.0001352162,0.1183574,,0.000812,0.8870194,3
NFKB2_proteomics,2.017015e-05,,0.480099,0.8787809,9.732742e-06,,0.000326,0.1300228,3
EML4_proteomics,0.0001905986,0.7059436,0.014789,6.934519e-05,1.781504e-05,,0.000215,0.6888475,4
MET_proteomics,0.0004779015,0.01098491,0.259385,7.889147e-05,7.518188e-08,,0.372204,0.03287605,3
ICAM1_proteomics,0.05373432,0.0001430774,0.283561,6.377631e-07,0.0004207913,,0.018382,0.8680469,3
PFKP_proteomics,0.2715499,9.503928e-07,0.915161,9.562825e-11,0.01138515,2.184808e-07,0.005514,0.0983994,3


In [42]:


pvals_list = pval_3_4.index.values.tolist()
pvals_list

['WAS_proteomics',
 'PPP2R3A_proteomics',
 'CTNNB1_proteomics',
 'PIK3AP1_proteomics',
 'PGM1_proteomics',
 'NFKB2_proteomics',
 'EML4_proteomics',
 'MET_proteomics',
 'ICAM1_proteomics',
 'PFKP_proteomics',
 'PRKCB_proteomics',
 'PDK1_proteomics',
 'SLC2A1_proteomics',
 'LAMC2_proteomics']

In [49]:
df1 = df_interacting[df_interacting.index.isin(pvals_list)]
df1

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
WAS_proteomics,-0.501918,1.205168e-07,-0.279814,0.00307075,-0.090413,0.416288,0.153496,0.1014634,-0.337909,0.0003058061,-0.503427,2.411333e-08,0.282401,0.00507,-0.177954,0.06289182
PPP2R3A_proteomics,-0.503777,4.157858e-07,0.285735,0.002478699,0.10383,0.350241,0.446137,5.820667e-07,0.34849,0.0001907963,,,,,0.235523,0.01325248
CTNNB1_proteomics,0.460836,1.586832e-06,0.2146,0.02436436,0.260722,0.017285,0.267531,0.003846357,0.347483,0.0001997006,,,-0.249644,0.013663,0.369198,7.213703e-05
PIK3AP1_proteomics,-0.431734,8.12587e-06,-0.083341,0.3867047,-0.066263,0.564339,0.333079,0.000275528,-0.170016,0.07577893,-0.473445,2.000795e-07,0.263444,0.027557,-0.22328,0.01904173
PGM1_proteomics,0.428255,9.781084e-06,0.300791,0.001408217,0.412561,0.000106,0.348497,0.0001352162,0.152606,0.1183574,,,0.334486,0.000812,-0.013702,0.8870194
NFKB2_proteomics,-0.414292,2.017015e-05,,,0.078581,0.480099,0.014378,0.8787809,-0.407887,9.732742e-06,,,0.357365,0.000326,0.145248,0.1300228
EML4_proteomics,-0.366514,0.0001905986,0.036379,0.7059436,0.266729,0.014789,0.362273,6.934519e-05,0.396712,1.781504e-05,,,-0.367283,0.000215,0.038605,0.6888475
MET_proteomics,-0.344604,0.0004779015,0.241638,0.01098491,0.13663,0.259385,0.451064,7.889147e-05,0.485692,7.518188e-08,,,0.092582,0.372204,0.20362,0.03287605
ICAM1_proteomics,-0.194484,0.05373432,0.354767,0.0001430774,0.119093,0.283561,0.444728,6.377631e-07,0.33053,0.0004207913,,,0.239018,0.018382,0.016023,0.8680469
PFKP_proteomics,0.111576,0.2715499,0.447511,9.503928e-07,-0.011873,0.915161,0.557645,9.562825e-11,-0.240483,0.01138515,0.472135,2.184808e-07,0.279773,0.005514,0.158388,0.0983994


In [54]:
pvals_any_sig = df_interacting_pval[df_interacting_pval['Pvalue_sig'].isin([2,3,4])]
pvals_any_sig

Unnamed: 0_level_0,P_value_Gbm,P_value_kidney,P_value_Ovar,P_value_Brca,P_value_Luad,P_value_hnscc,P_value_colon,P_value_Lscc,Pvalue_sig
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
GRB2_proteomics,1.886384e-11,0.02250643,0.085202,0.05789878,0.001321265,2.559824e-09,0.139949,0.06776395,2
WAS_proteomics,1.205168e-07,0.00307075,0.416288,0.1014634,0.0003058061,2.411333e-08,0.00507,0.06289182,2
PPP2R3A_proteomics,4.157858e-07,0.002478699,0.350241,5.820667e-07,0.0001907963,,,0.01325248,2
PLCG2_proteomics,7.918099e-07,0.0007400138,0.886773,0.003700474,9.591168e-06,,0.040929,0.01244665,2
RASAL3_proteomics,1.271949e-06,0.03335259,0.121212,0.7843862,0.002775645,4.115124e-06,0.002031,0.02913433,2
FCGR2A_proteomics,2.085765e-06,0.1278211,0.125484,2.657478e-05,0.6473181,,0.244725,0.3299939,2
VAV1_proteomics,3.486858e-06,0.002482501,0.412679,0.6504773,0.5118113,1.745944e-06,0.036185,0.05531002,2
PIK3AP1_proteomics,8.12587e-06,0.3867047,0.564339,0.000275528,0.07577893,2.000795e-07,0.027557,0.01904173,2
HCK_proteomics,1.10172e-05,,0.903102,8.302972e-06,0.2256718,,0.008756,0.2918598,2
HGF_proteomics,1.175508e-05,0.01659887,0.166508,0.6837758,9.837413e-06,,,0.0117207,2


# All proteins FDR cutoff

Find pos and neg

In [3]:
prot_FDR = pd.read_csv("combining_pearson_dfs/csv_files/pancan_EGFR_all_FDR_wide.csv")
prot_FDR = prot_FDR.drop(['Unnamed: 0'], axis=1)

#prot_FDR.drop_duplicates(subset ="Comparison", 
#                     keep = False, inplace = True)
prot_FDR = prot_FDR.set_index('Comparison')
prot_FDR = prot_FDR.drop(['EGFR_proteomics'], axis=0)
prot_FDR

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
PHLDA1_proteomics,0.816848,6.553435e-25,,,,,0.364797,0.000190,,,0.664271,3.426615e-15,,,0.709812,3.919195e-18
GRB2_proteomics,-0.610889,1.886384e-11,,,,,,,-0.302439,0.001321,-0.532341,2.559824e-09,,,,
SOCS2_proteomics,0.562720,1.343464e-09,,,,,,,,,,,,,0.478574,2.191662e-04
CDH4_proteomics,0.559180,1.790048e-09,,,,,,,,,,,,,,
PLA2G15_proteomics,-0.556624,2.197562e-09,-0.298029,0.001566,,,0.274185,0.003025,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SLIT3_proteomics,,,,,,,,,,,,,,,-0.291391,2.011415e-03
AAK1_proteomics,,,,,,,,,,,,,,,0.290954,2.044509e-03
COMMD3-BMI1_proteomics,,,,,,,,,,,,,,,-0.290800,2.056267e-03
LIMA1_proteomics,,,,,,,,,,,,,,,0.290115,2.109252e-03


In [4]:
col = ["Correlation_Gbm","Correlation_kidney","Correlation_Ovar","Correlation_Brca","Correlation_Luad","Correlation_hnscc","Correlation_colon","Correlation_Lscc"]
FDR_corr = prot_FDR[col]
FDR_corr["Pos_Neg"] = FDR_corr.apply(HasPosNeg, axis = 1)
FDR_corr = FDR_corr[FDR_corr['Pos_Neg']==True]
pos_neg_list_FDR_corr = FDR_corr.index.values.tolist()
FDR_corr
FDR_corr.to_csv("csv_files/pancan_EGFR_all_pos_neg_FDR")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [5]:
col = ["P_value_Gbm","P_value_kidney","P_value_Ovar","P_value_Brca","P_value_Luad","P_value_hnscc","P_value_colon","P_value_Lscc"]
FDR_pval = prot_FDR[col]
FDR_pval["Pvalue_sig"] = FDR_pval.apply(Pvalue_sig, axis = 1)
FDR_pval

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,P_value_Gbm,P_value_kidney,P_value_Ovar,P_value_Brca,P_value_Luad,P_value_hnscc,P_value_colon,P_value_Lscc,Pvalue_sig
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
PHLDA1_proteomics,6.553435e-25,,,0.000190,,3.426615e-15,,3.919195e-18,4
GRB2_proteomics,1.886384e-11,,,,0.001321,2.559824e-09,,,3
SOCS2_proteomics,1.343464e-09,,,,,,,2.191662e-04,2
CDH4_proteomics,1.790048e-09,,,,,,,,1
PLA2G15_proteomics,2.197562e-09,0.001566,,0.003025,,,,,3
...,...,...,...,...,...,...,...,...,...
SLIT3_proteomics,,,,,,,,2.011415e-03,1
AAK1_proteomics,,,,,,,,2.044509e-03,1
COMMD3-BMI1_proteomics,,,,,,,,2.056267e-03,1
LIMA1_proteomics,,,,,,,,2.109252e-03,1


In [6]:
pvals_atleast2 = FDR_pval[FDR_pval['Pvalue_sig'].isin([2,3,4])]
pvals_atleast2.sort_values(by=['Pvalue_sig'],ascending=False)

pvals_atleast2.to_csv("csv_files/pancan_EGFR_all_FDR_atleast2_sig")

In [18]:

col = ["Correlation_Gbm","Correlation_kidney","Correlation_Ovar","Correlation_Brca","Correlation_Luad","Correlation_hnscc","Correlation_colon","Correlation_Lscc"]
corr_FDR = prot_FDR[col]

corr_FDR["Pos_Neg"] = corr_FDR.apply(HasPosNeg, axis = 1)
corr_neg_or_pos = corr_FDR[corr_FDR['Pos_Neg']==False]
corr_neg_or_pos_list = corr_neg_or_pos.index.values.tolist()
corr_neg_or_pos["Num_corr"] = corr_neg_or_pos.apply(CountPosNeg, axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [22]:
corr_neg_or_pos.sort_values(by=['Num_corr'],ascending=False)
corr_neg_or_pos_atleast2 = corr_neg_or_pos[corr_neg_or_pos['Num_corr'].isin([2,3,4,5,6])]

In [24]:
corr_neg_or_pos_atleast2.to_csv("csv_files/pancan_EGFR_all_FDR_atleast2_same_corr")  