# KRAS Mutation Trans Effect on Transcriptomics

This notebook analyzes the trans effect of KRAS mutation on interacting and other proteins Transcriptomics, in Endometrial, Colon, and Ovarian cancer.

### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import collections

import warnings
warnings.filterwarnings("ignore")

import cptac
import cptac.algorithms as al

en = cptac.Endometrial()
co = cptac.Colon()
ov = cptac.Ovarian()

                                    

### Select Gene

In [2]:
gene = "KRAS"

### Investigate Proteomics, Phosphoproteomics, Acetylproteomics, or Transcriptomics

In [3]:
#omics = "proteomics"
omics = "transcriptomics"
#omics = "phosphoproteomics"
#omics = "acetylproteomics"

### Track all significant comparisons in Dataframe

In [4]:
all_significant_comparisons = pd.DataFrame(columns=['Cancer_Type', 'Gene', 'Comparison','Interacting_Protein','P_Value'])

In [5]:
def add_to_all_significant_comparisons(df, cancer, interacting, all_sig_comp):
    expanded = df
    expanded['Gene'] = gene
    expanded['Cancer_Type'] = cancer
    expanded['Interacting_Protein'] = interacting
    
    updated_all_comparisons = pd.concat([all_sig_comp, expanded], sort=False)
    
    return updated_all_comparisons

# Interacting Proteins: Transcriptomics

### Generate interacting protein list

Make a call to the cptac.algorithms get interacting proteins method, which interacts with the uniprot and string databases to generate a list of known interacting partners with the given gene

In [6]:
'''Use get interacting proteins method to generate list of interacting proteins'''
interacting_proteins = al.get_interacting_proteins(gene)

'''Show interacting protein list'''
print("Interacting Proteins:")
for interacting_protein in interacting_proteins:
    print(interacting_protein)

Interacting Proteins:
RAF1
MAPK1
KRAS
MAP2K2
MAPK3
PIK3CA
EGF
ERBB3
ERBB2
EGFR
BRAF
PIK3CB
ARAF
MAP2K1
PTPN11
RET
NF1
PIK3CG
NRAS
RALGDS
SRC
GRB2
SOS1
HRAS
CDKN2A
PRRT2
RAP1GDS1
HNRNPC
RASSF2
RASSF5
CALM1


## Endometrial

### Test for significant comparisons in any of interacting proteins

In [7]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = en.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)


'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Endometrial", True, all_significant_comparisons)
        

Doing t-test comparisons

No significant comparisons.


## Colon

### Test for significant comparisons in any of interacting proteins

In [8]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = co.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Colon", True, all_significant_comparisons)


EGF did not match any columns in transcriptomics dataframe. EGF_transcriptomics column inserted, but filled with NaN.
RET did not match any columns in transcriptomics dataframe. RET_transcriptomics column inserted, but filled with NaN.
PRRT2 did not match any columns in transcriptomics dataframe. PRRT2_transcriptomics column inserted, but filled with NaN.
Doing t-test comparisons

No significant comparisons.


## Ovarian

### Test for significant comparisons in any of interacting proteins

In [9]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = ov.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Ovarian", True, all_significant_comparisons)


MAPK3 did not match any columns in transcriptomics dataframe. MAPK3_transcriptomics column inserted, but filled with NaN.
NRAS did not match any columns in transcriptomics dataframe. NRAS_transcriptomics column inserted, but filled with NaN.
PRRT2 did not match any columns in transcriptomics dataframe. PRRT2_transcriptomics column inserted, but filled with NaN.
HNRNPC did not match any columns in transcriptomics dataframe. HNRNPC_transcriptomics column inserted, but filled with NaN.
Doing t-test comparisons

No significant comparisons.


# All Proteins: Transcriptomics

Expand our search for significant comparisons to all proteins in our dataset

## Endometrial

In [10]:
try:
    print("\nGene: ", gene)

    '''Use all proteins'''

    '''Create dataframe in order to do comparisons with wrap_ttest'''
    protdf = en.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    '''Create the binary valued column needed to do the comparison'''
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    '''Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    '''Make list of columns to be compared using t-tests'''
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    '''Call wrap_ttest, pass in formatted dataframe'''
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    '''Print results, if anything significant was found'''
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Endometrial", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  KRAS
Doing t-test comparisons

              Comparison       P_Value
0  DUSP4_transcriptomics  3.002087e-08
1  SPRY1_transcriptomics  2.500902e-07





## Colon

In [11]:
try:
    print("\nGene: ", gene)

    '''Use all proteins'''

    '''Create dataframe in order to do comparisons with wrap_ttest'''
    protdf = co.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    '''Create the binary valued column needed to do the comparison'''
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    '''Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    '''Make list of columns to be compared using t-tests'''
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    '''Call wrap_ttest, pass in formatted dataframe'''
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    '''Print results, if anything significant was found'''
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Colon", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  KRAS
Doing t-test comparisons

                Comparison       P_Value
0  GALNT10_transcriptomics  6.801284e-09
1    TMCC1_transcriptomics  4.559702e-08
2    FAIM2_transcriptomics  2.082800e-07
3  TMEM211_transcriptomics  4.705642e-07
4    TGFBI_transcriptomics  6.646078e-07
5     LY6E_transcriptomics  8.340164e-07
6   PHLDA1_transcriptomics  1.263308e-06
7    DHX58_transcriptomics  1.880427e-06





## Ovarian

In [12]:

print("\nGene: ", gene)

'''Use all proteins'''

'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = ov.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
protdf = protdf.loc[:,~protdf.columns.duplicated()]

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the datafram correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Ovarian", False, all_significant_comparisons)


Gene:  KRAS
Doing t-test comparisons

                       Comparison       P_Value
0           OR1L3_transcriptomics  8.293846e-28
1       FAM90A27P_transcriptomics  1.188688e-23
2           TRPM6_transcriptomics  7.448553e-18
3          VPS13A_transcriptomics  4.204536e-17
4       SERPINA11_transcriptomics  8.462675e-17
5           GABRP_transcriptomics  9.499937e-17
6          MOGAT1_transcriptomics  2.312500e-16
7           OR5M1_transcriptomics  2.313998e-16
8        PRAMEF17_transcriptomics  2.313998e-16
9           VPS52_transcriptomics  2.313998e-16
10           AQP2_transcriptomics  2.313998e-16
11           BRD2_transcriptomics  2.313998e-16
12       C18orf64_transcriptomics  2.313998e-16
13        CREB3L4_transcriptomics  2.313998e-16
14           DAXX_transcriptomics  2.313998e-16
15          GLRA1_transcriptomics  2.313998e-16
16          IQCF2_transcriptomics  2.313998e-16
17         KCNK18_transcriptomics  2.313998e-16
18       KRTAP9-2_transcriptomics  2.313998e-16
1

### Print All Signififant Comparisons

In [13]:
if len(all_significant_comparisons) == 0:
    print('No Significant Comparisons!')
    
else:
    display(all_significant_comparisons)

Unnamed: 0,Cancer_Type,Gene,Comparison,Interacting_Protein,P_Value
0,Endometrial,KRAS,DUSP4_transcriptomics,False,3.002087e-08
1,Endometrial,KRAS,SPRY1_transcriptomics,False,2.500902e-07
0,Colon,KRAS,GALNT10_transcriptomics,False,6.801284e-09
1,Colon,KRAS,TMCC1_transcriptomics,False,4.559702e-08
2,Colon,KRAS,FAIM2_transcriptomics,False,2.082800e-07
3,Colon,KRAS,TMEM211_transcriptomics,False,4.705642e-07
4,Colon,KRAS,TGFBI_transcriptomics,False,6.646078e-07
5,Colon,KRAS,LY6E_transcriptomics,False,8.340164e-07
6,Colon,KRAS,PHLDA1_transcriptomics,False,1.263308e-06
7,Colon,KRAS,DHX58_transcriptomics,False,1.880427e-06


### Write Significant Comparisons (if any) to Shared CSV file

In [14]:
existing_results = pd.read_csv(gene+'_Trans_Results.csv')

updated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)

updated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)