# Pancancer frequently mutated genes

## Step 1: Library Imports

Run this cell to import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import sys 
sys.path.append('C:\\Users\\brittany henderson\\GitHub\\WhenMutationsMatter\\Brittany\\')
import functions as f

import cptac
import cptac.algorithms as al

## Step 2: Find the frequently mutated genes for Endometrial Cancer

Enter the type of cancer and the cutoff for mutation frequency that you would like to use.

In [4]:
en_object = cptac.Endometrial()
desired_cutoff = .2

endometrial_freq_mut = al.get_frequently_mutated(en_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(endometrial_freq_mut), '\n', endometrial_freq_mut.head())
endometrial_freq_mut

                                    

Number of Frequently Mutated Genes: 10 
      Gene  Unique_Samples_Mut  Missence_Mut  Truncation_Mut
0  ARID1A            0.452632      0.136842        0.400000
1    CTCF            0.284211      0.094737        0.242105
2  CTNNB1            0.305263      0.305263        0.000000
3   KMT2B            0.242105      0.115789        0.126316
4    KRAS            0.326316      0.326316        0.000000


Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,ARID1A,0.452632,0.136842,0.4
1,CTCF,0.284211,0.094737,0.242105
2,CTNNB1,0.305263,0.305263,0.0
3,KMT2B,0.242105,0.115789,0.126316
4,KRAS,0.326316,0.326316,0.0
5,PIK3CA,0.494737,0.484211,0.010526
6,PIK3R1,0.389474,0.231579,0.189474
7,PTEN,0.789474,0.463158,0.568421
8,TP53,0.221053,0.157895,0.073684
9,ZFHX3,0.221053,0.084211,0.168421


In [7]:
rc_freq_mut

Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,PBRM1,0.4,0.072727,0.336364
1,VHL,0.745455,0.3,0.445455


# Repeat with the Colon dataset

Go through the same process, this time using the Colon dataset. We will only print five genes from the frequently mutated data frame for simplicity.

In [8]:
colon_object = cptac.Colon()
desired_cutoff = .2

colon_freq_mut = al.get_frequently_mutated(colon_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(colon_freq_mut), '\n', colon_freq_mut)

                                    

Number of Frequently Mutated Genes: 39 
        Gene  Unique_Samples_Mut  Missence_Mut  Truncation_Mut
0    ACVR2A            0.257732      0.020619        0.257732
1    AHNAK2            0.216495      0.195876        0.030928
2       APC            0.845361      0.113402        0.824742
3    ARID1A            0.237113      0.072165        0.185567
4   CCDC168            0.298969      0.195876        0.195876
5    COL5A1            0.216495      0.185567        0.061856
6     CSMD3            0.268041      0.226804        0.082474
7     DNAH5            0.268041      0.216495        0.103093
8     DOCK3            0.216495      0.082474        0.144330
9      FAT3            0.288660      0.268041        0.030928
10     FAT4            0.268041      0.257732        0.030928
11    FSIP2            0.226804      0.175258        0.092784
12    HERC2            0.216495      0.175258        0.051546
13    HMCN1            0.237113      0.226804        

# Repeat with the Ovarian dataset


In [9]:
ovarian_object = cptac.Ovarian()
desired_cutoff = .2

ovarian_freq_mut = al.get_frequently_mutated(ovarian_object, cutoff=desired_cutoff)

                                    

In [6]:
ovarian_freq_mut

Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,BIRC6,0.120482,0.108434,0.024096
1,FSIP2,0.108434,0.096386,0.024096
2,MT-CO1,0.120482,0.120482,0.0
3,MT-CO3,0.120482,0.108434,0.012048
4,MT-ND5,0.108434,0.084337,0.024096
5,MUC12,0.144578,0.144578,0.012048
6,MUC16,0.144578,0.144578,0.012048
7,MUC17,0.144578,0.144578,0.0
8,MUC4,0.325301,0.313253,0.012048
9,MUC5B,0.108434,0.108434,0.0


# Repeat with the Renal dataset

In [5]:
rc_object = cptac.RenalCcrcc()
desired_cutoff = .2

                                    

In [6]:
m = rc_object.get_mutations()
m['Mutation'].unique()

array(['Nonsense_Mutation', 'Missense_Mutation', 'Frame_Shift_Ins',
       'Frame_Shift_Del', 'Silent', 'Splice_Site', 'In_Frame_Del',
       'Nonstop_Mutation', 'In_Frame_Ins'], dtype=object)

In [6]:
rc_freq_mut = al.get_frequently_mutated(rc_object, cutoff=desired_cutoff)
rc_freq_mut

Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,PBRM1,0.4,0.072727,0.336364
1,VHL,0.745455,0.3,0.445455


# Step 6: Compare cis effects between pancancer frequently mutated genes

Create a dataframe of the frequently mutated genes in common between Colon and Endometrial and Ovarian datasets. To compare two cancers uncomment the print statement with that comparison.

In [14]:
endometrial_genes = endometrial_freq_mut['Gene']
colon_genes = colon_freq_mut['Gene']
ovarian_genes = ovarian_freq_mut['Gene']
rc_genes = rc_freq_mut['Gene']

# Compare frequently mutated genes between Endometrial and Colon tumors
en_co_genes = pd.merge(endometrial_genes, colon_genes, how='inner')
#print('endometrial and colon commonly mutated genes:\n\n', en_co_genes['Gene'], '\n')

# Compare frequently mutated genes between Endometrial and Ovarian tumors
en_ov_genes = pd.merge(endometrial_genes, ovarian_genes, how='inner')
#print('endometrial and ovarian commonly mutated genes:\n\n', en_ov_genes['Gene'], '\n')

# Compare frequently mutated genes between Endometrial and Renal tumors
en_rc_genes = pd.merge(endometrial_genes, rc_genes, how='inner')
#print('endometrial and kidney commonly mutated genes:\n\n', en_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Colon and Renal tumors
co_rc_genes = pd.merge(colon_genes, rc_genes)
#print('kidney and colon commonly mutated genes:\n\n', co_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Ovarian and Renal tumors
ov_rc_genes = pd.merge(ovarian_genes, rc_genes, how='inner')
#print('kidney and ovarian commonly mutated genes:\n\n', ov_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Colon and Ovarian tumors
co_ov_genes = pd.merge(ovarian_genes, colon_genes)
print('colon and ovarian commonly mutated genes:\n\n', en_ov_genes['Gene'], '\n')

# Compare endo, colon, ovarian
tricancer_genes = pd.merge(en_co_genes, ovarian_genes, how='inner')
#print('endo, colon, and ovarian frequently mutated genes:\n\n', tricancer_genes, '\n')

#Compare frequently mutated genes between all tumors
pancancer_commonly_mutated_genes = pd.merge(en_co_genes, ov_rc_genes, how='inner')
#print('pancancer frequently mutated genes:\n\n', pancancer_commonly_mutated_genes, '\n')

print(ovarian_genes)
colon_genes

colon and ovarian commonly mutated genes:

 0    TP53
Name: Gene, dtype: object 

0    MUC4
1    TP53
2     TTN
Name: Gene, dtype: object


0      ACVR2A
1      AHNAK2
2         APC
3      ARID1A
4     CCDC168
5      COL5A1
6       CSMD3
7       DNAH5
8       DOCK3
9        FAT3
10       FAT4
11      FSIP2
12      HERC2
13      HMCN1
14      KMT2C
15       KRAS
16      MUC16
17       MUC4
18      MUC5B
19     MYCBP2
20       NBEA
21      NCOR2
22      NRXN1
23      OBSCN
24       PCLO
25     PIK3CA
26       PLEC
27       RYR1
28       RYR2
29       SACS
30     SLC4A3
31       SPEG
32       SSPO
33      SYNE1
34     TCF7L2
35       TP53
36        TTN
37       USF3
38     ZNF469
Name: Gene, dtype: object

Check significant difference in omics data between cancers. Pick a gene in common in the cancers you want to compare.