# KRAS Mutation Trans Effect on Phosphoproteomics

This notebook analyzes the trans effect of KRAS mutation on interacting and other proteins Phosphoproteomics, in Endometrial, Colon, and Ovarian cancer.

### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import warnings
warnings.filterwarnings("ignore")

import cptac
import cptac.algorithms as al

en = cptac.Endometrial()
co = cptac.Colon()
ov = cptac.Ovarian()

                                    

### Select Gene

In [2]:
gene = "KRAS"

### Investigate Proteomics, Phosphoproteomics, Acetylproteomics, or Transcriptomics

In [3]:
#omics = "proteomics"
#omics = "transcriptomics"
omics = "phosphoproteomics"
#omics = "acetylproteomics"

### Track all significant comparisons in Dataframe

In [4]:
all_significant_comparisons = pd.DataFrame(columns=['Cancer_Type', 'Gene', 'Comparison','Interacting_Protein','P_Value'])

In [5]:
def add_to_all_significant_comparisons(df, cancer, interacting, all_sig_comp):
    expanded = df
    expanded['Gene'] = gene
    expanded['Cancer_Type'] = cancer
    expanded['Interacting_Protein'] = interacting
    
    updated_all_comparisons = pd.concat([all_sig_comp, expanded], sort=False)
    
    return updated_all_comparisons

# Interacting Proteins: Phosphoproteomics

### Generate interacting protein list

In [6]:
'''Use get interacting proteins method to generate list of interacting proteins'''
interacting_proteins = al.get_interacting_proteins(gene)

print("Interacting Proteins:")
for interacting_protein in interacting_proteins:
    print(interacting_protein)

Interacting Proteins:
RAF1
MAPK1
KRAS
MAP2K2
MAPK3
PIK3CA
EGF
ERBB3
ERBB2
EGFR
BRAF
PIK3CB
ARAF
MAP2K1
PTPN11
RET
NF1
PIK3CG
NRAS
RALGDS
SRC
GRB2
SOS1
HRAS
CDKN2A
PRRT2
RAP1GDS1
HNRNPC
RASSF2
RASSF5
CALM1


## Endometrial

### Test for significant comparisons in any of interacting proteins

In [7]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = en.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)


'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Endometrial", True, all_significant_comparisons)


EGF did not match any columns in phosphoproteomics dataframe. EGF_phosphoproteomics column inserted, but filled with NaN.
PIK3CB did not match any columns in phosphoproteomics dataframe. PIK3CB_phosphoproteomics column inserted, but filled with NaN.
RET did not match any columns in phosphoproteomics dataframe. RET_phosphoproteomics column inserted, but filled with NaN.
HRAS did not match any columns in phosphoproteomics dataframe. HRAS_phosphoproteomics column inserted, but filled with NaN.
CDKN2A did not match any columns in phosphoproteomics dataframe. CDKN2A_phosphoproteomics column inserted, but filled with NaN.
PRRT2 did not match any columns in phosphoproteomics dataframe. PRRT2_phosphoproteomics column inserted, but filled with NaN.
CALM1 did not match any columns in phosphoproteomics dataframe. CALM1_phosphoproteomics column inserted, but filled with NaN.
Doing t-test comparisons

                    Comparison   P_Value
0  RAF1-T330_phosphoproteomics  0.000008
1  ARAF-S260_pho

## Colon

### Test for significant comparisons in any of interacting proteins

In [8]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = co.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Colon", True, all_significant_comparisons)


RAF1 did not match any columns in phosphoproteomics dataframe. RAF1_phosphoproteomics column inserted, but filled with NaN.
MAPK1 did not match any columns in phosphoproteomics dataframe. MAPK1_phosphoproteomics column inserted, but filled with NaN.
KRAS did not match any columns in phosphoproteomics dataframe. KRAS_phosphoproteomics column inserted, but filled with NaN.
MAP2K2 did not match any columns in phosphoproteomics dataframe. MAP2K2_phosphoproteomics column inserted, but filled with NaN.
MAPK3 did not match any columns in phosphoproteomics dataframe. MAPK3_phosphoproteomics column inserted, but filled with NaN.
PIK3CA did not match any columns in phosphoproteomics dataframe. PIK3CA_phosphoproteomics column inserted, but filled with NaN.
EGF did not match any columns in phosphoproteomics dataframe. EGF_phosphoproteomics column inserted, but filled with NaN.
ERBB3 did not match any columns in phosphoproteomics dataframe. ERBB3_phosphoproteomics column inserted, but filled with N

## Ovarian

In [9]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = ov.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins) 
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.loc[ind,'Label'] = 'Mutated'
    else:
        protdf.loc[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)
protdf = protdf.loc[:,~protdf.columns.duplicated()]

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Ovarian", True, all_significant_comparisons)


KRAS did not match any columns in phosphoproteomics dataframe. KRAS_phosphoproteomics column inserted, but filled with NaN.
EGF did not match any columns in phosphoproteomics dataframe. EGF_phosphoproteomics column inserted, but filled with NaN.
PIK3CB did not match any columns in phosphoproteomics dataframe. PIK3CB_phosphoproteomics column inserted, but filled with NaN.
PIK3CG did not match any columns in phosphoproteomics dataframe. PIK3CG_phosphoproteomics column inserted, but filled with NaN.
NRAS did not match any columns in phosphoproteomics dataframe. NRAS_phosphoproteomics column inserted, but filled with NaN.
HRAS did not match any columns in phosphoproteomics dataframe. HRAS_phosphoproteomics column inserted, but filled with NaN.
CDKN2A did not match any columns in phosphoproteomics dataframe. CDKN2A_phosphoproteomics column inserted, but filled with NaN.
PRRT2 did not match any columns in phosphoproteomics dataframe. PRRT2_phosphoproteomics column inserted, but filled with N

# All Proteins: Phosphoproteomics

## Endometrial

In [10]:
try:
    print("\nGene: ", gene)

    '''Use all proteins'''
    proteomics = en.get_proteomics()
    all_proteins = proteomics.columns
    #all_proteins = all_proteins[:100]

    '''Create dataframe in order to do comparisons with wrap_ttest'''
    protdf = en.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    '''Create the binary valued column needed to do the comparison'''
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    '''Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    '''Make list of columns to be compared using t-tests'''
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    '''Call wrap_ttest, pass in formatted dataframe'''
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    '''Print results, if anything significant was found'''
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Endometrial", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  KRAS
Doing t-test comparisons

                          Comparison       P_Value
0      DTNBP1-S297_phosphoproteomics  1.239489e-09
1      IGF2R-S2479_phosphoproteomics  3.548371e-09
2      DTNBP1-S300_phosphoproteomics  6.909004e-09
3        TPR-S1662_phosphoproteomics  1.383305e-08
4     PLEKHS1-S185_phosphoproteomics  2.395489e-08
5     LRRFIP1-S300_phosphoproteomics  3.795856e-08
6         KRT8-Y38_phosphoproteomics  1.362150e-07
7         KRT8-S63_phosphoproteomics  3.383255e-07
8       PLEC-S4408_phosphoproteomics  3.449881e-07
9      MACF1-S1377_phosphoproteomics  3.491278e-07
10       HDGF-S130_phosphoproteomics  3.878632e-07
11     PRKAA1-S521_phosphoproteomics  4.422808e-07
12  ARHGEF18-S1160_phosphoproteomics  5.761535e-07
13   IVNS1ABP-S269_phosphoproteomics  6.137179e-07
14     TNRC6B-S803_phosphoproteomics  6.748903e-07





## Colon

In [11]:
try:
    print("\nGene: ", gene)

    '''Use all proteins'''
    proteomics = co.get_proteomics()
    all_proteins = proteomics.columns
    #all_proteins = all_proteins[:100]

    '''Create dataframe in order to do comparisons with wrap_ttest'''
    protdf = co.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    '''Create the binary valued column needed to do the comparison'''
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    '''Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    '''Make list of columns to be compared using t-tests'''
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    '''Call wrap_ttest, pass in formatted dataframe'''
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    '''Print results, if anything significant was found'''
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Colon", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  KRAS
Doing t-test comparisons

No significant comparisons.


## Ovarian

In [12]:

print("\nGene: ", gene)

'''Use all proteins'''
proteomics = ov.get_proteomics()
all_proteins = list(set(proteomics.columns))

'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = ov.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
protdf = protdf.loc[:,~protdf.columns.duplicated()]

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the datafram correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Ovarian", False, all_significant_comparisons)


Gene:  KRAS
Doing t-test comparisons

No significant comparisons.


### Print all significant comparisons

In [13]:
if len(all_significant_comparisons) > 0:
    display(all_significant_comparisons)
    
else:
    print('No Significant Comparisons!')

Unnamed: 0,Cancer_Type,Gene,Comparison,Interacting_Protein,P_Value
0,Endometrial,KRAS,RAF1-T330_phosphoproteomics,True,8.43213e-06
1,Endometrial,KRAS,ARAF-S260_phosphoproteomics,True,2.141738e-05
0,Endometrial,KRAS,DTNBP1-S297_phosphoproteomics,False,1.239489e-09
1,Endometrial,KRAS,IGF2R-S2479_phosphoproteomics,False,3.548371e-09
2,Endometrial,KRAS,DTNBP1-S300_phosphoproteomics,False,6.909004e-09
3,Endometrial,KRAS,TPR-S1662_phosphoproteomics,False,1.383305e-08
4,Endometrial,KRAS,PLEKHS1-S185_phosphoproteomics,False,2.395489e-08
5,Endometrial,KRAS,LRRFIP1-S300_phosphoproteomics,False,3.795856e-08
6,Endometrial,KRAS,KRT8-Y38_phosphoproteomics,False,1.36215e-07
7,Endometrial,KRAS,KRT8-S63_phosphoproteomics,False,3.383255e-07


### Write significant comparisons (if any) to shared CSV file

In [14]:
existing_results = pd.read_csv(gene+'_Trans_Results.csv')

updated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)

updated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)