# Pancancer frequently mutated genes

## Step 1: Library Imports

Run this cell to import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import sys 
sys.path.append('C:\\Users\\brittany henderson\\GitHub\\WhenMutationsMatter\\Brittany\\')
import functions as f

import cptac
import cptac.algorithms as al

## Step 2: Find the frequently mutated genes for Endometrial Cancer

Enter the type of cancer and the cutoff for mutation frequency that you would like to use.

In [2]:
en_object = cptac.Endometrial()
desired_cutoff = .1

endometrial_freq_mut = al.get_frequently_mutated(en_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(endometrial_freq_mut), '\n', endometrial_freq_mut.head())
endometrial_freq_mut = endometrial_freq_mut.reset_index()

                                    

Number of Frequently Mutated Genes: 232 
         Unique_Samples_Mut  Missence_Mut  Truncation_Mut
Gene                                                    
ABCA12            0.147368      0.094737        0.073684
ABCA13            0.115789      0.105263        0.042105
ACVR2A            0.105263      0.010526        0.094737
ADGRG4            0.136842      0.126316        0.021053
ADGRV1            0.115789      0.094737        0.052632


# Repeat with the Colon dataset

Go through the same process, this time using the Colon dataset. We will only print five genes from the frequently mutated data frame for simplicity.

In [3]:
colon_object = cptac.Colon()
desired_cutoff = .1

colon_freq_mut = al.get_frequently_mutated(colon_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(colon_freq_mut), '\n', colon_freq_mut.head())
colon_freq_mut =colon_freq_mut.reset_index()

                                    

Number of Frequently Mutated Genes: 612 
         Unique_Samples_Mut  Missence_Mut  Truncation_Mut
Gene                                                    
ABCA13            0.195876      0.164948        0.103093
ABCA2             0.175258      0.164948        0.030928
ABCA4             0.144330      0.082474        0.061856
ABCB4             0.134021      0.061856        0.072165
ABCB6             0.103093      0.061856        0.041237


# Repeat with the Ovarian dataset


In [4]:
ovarian_object = cptac.Ovarian()
desired_cutoff = .1

ovarian_freq_mut = al.get_frequently_mutated(ovarian_object, cutoff=desired_cutoff)
ovarian_freq_mut = ovarian_freq_mut.reset_index()

                                    

# Repeat with the Renal dataset

In [5]:
rc_object = cptac.RenalCcrcc()
desired_cutoff = .1

                                    

In [6]:
m = rc_object.get_mutations()
m['Mutation'].unique()

array(['Nonsense_Mutation', 'Missense_Mutation', 'Frame_Shift_Ins',
       'Frame_Shift_Del', 'Silent', 'Splice_Site', 'In_Frame_Del',
       'Nonstop_Mutation', 'In_Frame_Ins'], dtype=object)

In [15]:
rc_freq_mut = al.get_frequently_mutated(rc_object, cutoff=desired_cutoff)
rc_freq_mut = rc_freq_mut.reset_index()
rc_freq_mut

Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,BAP1,0.154545,0.063636,0.090909
1,KDM5C,0.172727,0.036364,0.145455
2,MUC16,0.118182,0.072727,0.0
3,PBRM1,0.4,0.072727,0.336364
4,SETD2,0.145455,0.018182,0.118182
5,TTN,0.209091,0.090909,0.036364
6,VHL,0.745455,0.3,0.445455


# Step 6: Compare cis effects between pancancer frequently mutated genes

Create a dataframe of the frequently mutated genes in common between Colon and Endometrial and Ovarian datasets. To compare two cancers uncomment the print statement with that comparison.

In [41]:
endometrial_genes = endometrial_freq_mut['Gene']
colon_genes = colon_freq_mut['Gene']
ovarian_genes = ovarian_freq_mut['Gene']
rc_genes = rc_freq_mut['Gene']

# Compare frequently mutated genes between Endometrial and Colon tumors
en_co_genes = pd.merge(endometrial_genes, colon_genes, how='inner')
#print('endometrial and colon commonly mutated genes:\n\n', en_co_genes['Gene'], '\n')

# Compare frequently mutated genes between Endometrial and Ovarian tumors
en_ov_genes = pd.merge(endometrial_genes, ovarian_genes, how='inner')
#print('endometrial and ovarian commonly mutated genes:\n\n', en_ov_genes['Gene'], '\n')

# Compare frequently mutated genes between Endometrial and Renal tumors
en_rc_genes = pd.merge(endometrial_genes, rc_genes, how='inner')
print('endometrial and kidney commonly mutated genes:\n\n', en_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Colon and Renal tumors
co_rc_genes = pd.merge(colon_genes, rc_genes, how='inner')
print('kidney and colon commonly mutated genes:\n\n', co_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Ovarian and Renal tumors
ov_rc_genes = pd.merge(ovarian_genes, rc_genes, how='inner')
print('kidney and ovarian commonly mutated genes:\n\n', ov_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Colon and Ovarian tumors
co_ov_genes = pd.merge(ovarian_genes, colon_genes, how='inner')
#print('colon and ovarian commonly mutated genes:\n\n', en_ov_genes['Gene'], '\n')

# Compare endo, colon, ovarian
tricancer_genes = pd.merge(en_co_genes, ovarian_genes, how='inner')
print('endo, colon, and ovarian frequently mutated genes:\n\n', tricancer_genes, '\n')

#Compare frequently mutated genes between all tumors
pancancer_commonly_mutated_genes = pd.merge(en_co_genes, ov_rc_genes, how='inner')
print('pancancer frequently mutated genes:\n\n', pancancer_commonly_mutated_genes, '\n')

endometrial and kidney commonly mutated genes:

 0    MUC16
Name: Gene, dtype: object 

kidney and colon commonly mutated genes:

 0    MUC16
1      TTN
Name: Gene, dtype: object 

kidney and ovarian commonly mutated genes:

 0    MUC16
1      TTN
Name: Gene, dtype: object 

endo, colon, and ovarian frequently mutated genes:

     Gene
0  FSIP2
1  MUC16
2  MUC17
3  MUC5B
4   TP53
5  WDFY4 

pancancer frequently mutated genes:

     Gene
0  MUC16 



Check significant difference in omics data between cancers. Pick a gene in common in the cancers you want to compare.