# EGFR Mutation Trans Effect on Phosphoproteomics

### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import warnings
warnings.filterwarnings("ignore")

import cptac
import cptac.utils as al

brain = cptac.Gbm()

                                    

### Select Gene

In [2]:
gene = "EGFR"

### Investigate Proteomics, Phosphoproteomics, Acetylproteomics, or Transcriptomics

In [3]:
#omics = "proteomics"
#omics = "transcriptomics"
omics = "phosphoproteomics"
#omics = "acetylproteomics"

### Track all significant comparisons in Dataframe

In [4]:
all_significant_comparisons = pd.DataFrame(columns=['Cancer_Type', 'Gene', 'Comparison','Interacting_Protein','P_Value'])

In [5]:
def add_to_all_significant_comparisons(df, cancer, interacting, all_sig_comp):
    expanded = df
    expanded['Gene'] = gene
    expanded['Cancer_Type'] = cancer
    expanded['Interacting_Protein'] = interacting
    
    updated_all_comparisons = pd.concat([all_sig_comp, expanded], sort=False)
    
    return updated_all_comparisons

# Interacting Proteins: Phosphoproteomics

### Generate interacting protein list

Make a call to cptac.utils to get the interacting proteins method, which interacts with the uniprot and string databases to generate a list of known interacting partners with the given gene.

In [6]:
# Use get interacting proteins method to generate list of interacting proteins
interacting_proteins = al.get_interacting_proteins(gene)

print("Interacting Proteins:")
for interacting_protein in interacting_proteins:
    print(interacting_protein)

Interacting Proteins:
NKTR
PNN
TP53
STAG2
H3F3B
SETDB1
NIPBL
PHIP
BPTF
RSF1
PRKDC
MUS81
ARID4A
RAD52
BDP1
XRCC6
SMC5
H3F3A
TPR
SCAF11
ATRX
DAXX
RAD51
MECP2
XRCC3
HIST2H3PS2
EIF4A2
EZH2
Mecp2
CBX5


### Test for significant comparisons in any of interacting proteins

In [7]:
# Create dataframe in order to do comparisons with wrap_ttest
protdf = brain.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = brain.reduce_multiindex(protdf, levels_to_drop=1, flatten = True)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

# Create the binary valued column needed to do the comparison
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

# Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)


# Make list of columns to be compared using t-tests
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

# Call wrap_ttest, pass in formatted dataframe
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

# Print results, if anything significant was found
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Gbm", True, all_significant_comparisons)




Doing t-test comparisons

                                           Comparison       P_Value
0   ATRX_phosphoproteomics_LTVS*DGES*GEEK_NP_000480.3  5.515398e-10
1   ATRX_phosphoproteomics_ATSSS*NPSS*PAPDWYK_NP_0...  8.003286e-08
2   ATRX_phosphoproteomics_RPTETNPVTS*NS*DEECNETVK...  1.297310e-07
3     ATRX_phosphoproteomics_KKDELS*DYAEK_NP_000480.3  5.809567e-07
4   ATRX_phosphoproteomics_YVES*DDEKPLDDETVNEDASNE...  8.009581e-07
5   ATRX_phosphoproteomics_ATSSSNPSS*PAPDWYK_NP_00...  2.049500e-06
6    ATRX_phosphoproteomics_LTVS*DGESGEEK_NP_000480.3  2.208274e-06
7      ATRX_phosphoproteomics_DFDS*S*EDEK_NP_000480.3  2.250502e-06
8   ATRX_phosphoproteomics_YVES*DDEKPLDDETVNEDASNE...  2.512040e-06
9     ATRX_phosphoproteomics_VYEHTSRFS*PK_NP_000480.3  1.074143e-05
10  ATRX_phosphoproteomics_LTVS*DGES*GEEKK_NP_0004...  1.247822e-05
11  NIPBL_phosphoproteomics_DEESSEGT*HHAK_NP_597677.2  3.293932e-05





## All Proteins: Phosphoproteomics

In [8]:
try:
    print("\nGene: ", gene)

    # Use all proteins
    proteomics = brain.get_proteomics()
    all_proteins = proteomics.columns
    #all_proteins = all_proteins[:100]

    # Create dataframe in order to do comparisons with wrap_ttest
    protdf = brain.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = brain.reduce_multiindex(protdf, flatten = True)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    # Create the binary valued column needed to do the comparison
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    # Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    # Make list of columns to be compared using t-tests
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    # Call wrap_ttest, pass in formatted dataframe
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    # Print results, if anything significant was found
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Gbm", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  ATRX




Doing t-test comparisons

                                          Comparison       P_Value
0  ATRX_phosphoproteomics_S1348S1352_LTVS*DGES*GE...  5.515398e-10
1  ATRX_phosphoproteomics_S1992S1996_ATSSS*NPSS*P...  8.003286e-08
2  MAP2_phosphoproteomics_S1564_RGVSGDRDENSFS*LNS...  9.091350e-08
3  ATRX_phosphoproteomics_S675S677_RPTETNPVTS*NS*...  1.297310e-07
4  TERF2_phosphoproteomics_S431_LVLEEDSQSTEPSAGLN...  1.707428e-07
5  TCEAL3_phosphoproteomics_S65_REDEGEPGDEGQLEDEG...  3.212614e-07





### Print all significant comparisons

In [9]:
if len(all_significant_comparisons) > 0:
    display(all_significant_comparisons)
    
else:
    print('No Significant Comparisons!')

Unnamed: 0,Cancer_Type,Gene,Comparison,Interacting_Protein,P_Value
0,Gbm,ATRX,ATRX_phosphoproteomics_LTVS*DGES*GEEK_NP_000480.3,True,5.515398e-10
1,Gbm,ATRX,ATRX_phosphoproteomics_ATSSS*NPSS*PAPDWYK_NP_0...,True,8.003286e-08
2,Gbm,ATRX,ATRX_phosphoproteomics_RPTETNPVTS*NS*DEECNETVK...,True,1.29731e-07
3,Gbm,ATRX,ATRX_phosphoproteomics_KKDELS*DYAEK_NP_000480.3,True,5.809567e-07
4,Gbm,ATRX,ATRX_phosphoproteomics_YVES*DDEKPLDDETVNEDASNE...,True,8.009581e-07
5,Gbm,ATRX,ATRX_phosphoproteomics_ATSSSNPSS*PAPDWYK_NP_00...,True,2.0495e-06
6,Gbm,ATRX,ATRX_phosphoproteomics_LTVS*DGESGEEK_NP_000480.3,True,2.208274e-06
7,Gbm,ATRX,ATRX_phosphoproteomics_DFDS*S*EDEK_NP_000480.3,True,2.250502e-06
8,Gbm,ATRX,ATRX_phosphoproteomics_YVES*DDEKPLDDETVNEDASNE...,True,2.51204e-06
9,Gbm,ATRX,ATRX_phosphoproteomics_VYEHTSRFS*PK_NP_000480.3,True,1.074143e-05


### Write significant comparisons (if any) to shared CSV file

In [10]:
'''existing_results = pd.read_csv(gene+'_Trans_Results.csv')

updated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)

updated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)'''

"existing_results = pd.read_csv(gene+'_Trans_Results.csv')\n\nupdated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)\n\nupdated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)"