# Pancancer frequently mutated genes

## Step 1: Library Imports

Run this cell to import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import sys 
sys.path.append('C:\\Users\\brittany henderson\\GitHub\\WhenMutationsMatter\\Brittany\\')
import functions as f

import cptac
import cptac.algorithms as al

## Step 2: Find the frequently mutated genes for Endometrial Cancer

Enter the type of cancer and the cutoff for mutation frequency that you would like to use.

In [15]:
en_object = cptac.Endometrial()
desired_cutoff = .1

endometrial_freq_mut = al.get_frequently_mutated(en_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(endometrial_freq_mut), '\n', endometrial_freq_mut.head())

                                    

Number of Frequently Mutated Genes: 232 
      Gene  Unique_Samples_Mut  Missence_Mut  Truncation_Mut
0  ABCA12            0.147368      0.094737        0.073684
1  ABCA13            0.115789      0.105263        0.042105
2  ACVR2A            0.105263      0.010526        0.094737
3  ADGRG4            0.136842      0.126316        0.021053
4  ADGRV1            0.115789      0.094737        0.052632


# Repeat with the Colon dataset

Go through the same process, this time using the Colon dataset. We will only print five genes from the frequently mutated data frame for simplicity.

In [16]:
colon_object = cptac.Colon()
desired_cutoff = .1

colon_freq_mut = al.get_frequently_mutated(colon_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(colon_freq_mut), '\n', colon_freq_mut)

                                    

Number of Frequently Mutated Genes: 612 
          Gene  Unique_Samples_Mut  Missence_Mut  Truncation_Mut
0      ABCA13            0.195876      0.164948        0.103093
1       ABCA2            0.175258      0.164948        0.030928
2       ABCA4            0.144330      0.082474        0.061856
3       ABCB4            0.134021      0.061856        0.072165
4       ABCB6            0.103093      0.061856        0.041237
5      ABCC11            0.103093      0.082474        0.020619
6       ABCC5            0.113402      0.051546        0.092784
7       ACACB            0.123711      0.113402        0.010309
8      ACVR2A            0.257732      0.020619        0.257732
9    ADAMTS16            0.144330      0.134021        0.010309
10   ADAMTS18            0.113402      0.061856        0.051546
11    ADAMTS2            0.103093      0.092784        0.010309
12   ADAMTS20            0.103093      0.092784        0.010309
13    ADAMTS5           

# Repeat with the Ovarian dataset


In [17]:
ovarian_object = cptac.Ovarian()
desired_cutoff = .1

ovarian_freq_mut = al.get_frequently_mutated(ovarian_object, cutoff=desired_cutoff)

                                    

In [6]:
ovarian_freq_mut

Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,BIRC6,0.120482,0.108434,0.024096
1,FSIP2,0.108434,0.096386,0.024096
2,MT-CO1,0.120482,0.120482,0.0
3,MT-CO3,0.120482,0.108434,0.012048
4,MT-ND5,0.108434,0.084337,0.024096
5,MUC12,0.144578,0.144578,0.012048
6,MUC16,0.144578,0.144578,0.012048
7,MUC17,0.144578,0.144578,0.0
8,MUC4,0.325301,0.313253,0.012048
9,MUC5B,0.108434,0.108434,0.0


# Repeat with the Renal dataset

In [18]:
rc_object = cptac.RenalCcrcc()
desired_cutoff = .1

                                    

In [6]:
m = rc_object.get_mutations()
m['Mutation'].unique()

array(['Nonsense_Mutation', 'Missense_Mutation', 'Frame_Shift_Ins',
       'Frame_Shift_Del', 'Silent', 'Splice_Site', 'In_Frame_Del',
       'Nonstop_Mutation', 'In_Frame_Ins'], dtype=object)

In [19]:
rc_freq_mut = al.get_frequently_mutated(rc_object, cutoff=desired_cutoff)
rc_freq_mut

Unnamed: 0,Gene,Unique_Samples_Mut,Missence_Mut,Truncation_Mut
0,BAP1,0.154545,0.063636,0.090909
1,KDM5C,0.172727,0.036364,0.145455
2,PBRM1,0.4,0.072727,0.336364
3,SETD2,0.136364,0.018182,0.118182
4,TTN,0.118182,0.090909,0.036364
5,VHL,0.745455,0.3,0.445455


# Step 6: Compare cis effects between pancancer frequently mutated genes

Create a dataframe of the frequently mutated genes in common between Colon and Endometrial and Ovarian datasets. To compare two cancers uncomment the print statement with that comparison.

In [21]:
endometrial_genes = endometrial_freq_mut['Gene']
colon_genes = colon_freq_mut['Gene']
ovarian_genes = ovarian_freq_mut['Gene']
rc_genes = rc_freq_mut['Gene']

# Compare frequently mutated genes between Endometrial and Colon tumors
en_co_genes = pd.merge(endometrial_genes, colon_genes, how='inner')
print('endometrial and colon commonly mutated genes:\n\n', en_co_genes['Gene'], '\n')

# Compare frequently mutated genes between Endometrial and Ovarian tumors
en_ov_genes = pd.merge(endometrial_genes, ovarian_genes, how='inner')
print('endometrial and ovarian commonly mutated genes:\n\n', en_ov_genes['Gene'], '\n')

# Compare frequently mutated genes between Endometrial and Renal tumors
en_rc_genes = pd.merge(endometrial_genes, rc_genes, how='inner')
print('endometrial and kidney commonly mutated genes:\n\n', en_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Colon and Renal tumors
co_rc_genes = pd.merge(colon_genes, rc_genes)
print('kidney and colon commonly mutated genes:\n\n', co_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Ovarian and Renal tumors
ov_rc_genes = pd.merge(ovarian_genes, rc_genes, how='inner')
print('kidney and ovarian commonly mutated genes:\n\n', ov_rc_genes['Gene'], '\n')

# Compare frequently mutated genes between Colon and Ovarian tumors
co_ov_genes = pd.merge(ovarian_genes, colon_genes, how='inner')
print('colon and ovarian commonly mutated genes:\n\n', en_ov_genes['Gene'], '\n')

# Compare endo, colon, ovarian
tricancer_genes = pd.merge(en_co_genes, ovarian_genes, how='inner')
print('endo, colon, and ovarian frequently mutated genes:\n\n', tricancer_genes, '\n')

#Compare frequently mutated genes between all tumors
pancancer_commonly_mutated_genes = pd.merge(en_co_genes, ov_rc_genes, how='inner')
print('pancancer frequently mutated genes:\n\n', pancancer_commonly_mutated_genes, '\n')

endometrial and colon commonly mutated genes:

 0       ABCA13
1       ACVR2A
2       ADGRG4
3       ADGRV1
4        AHNAK
5       AHNAK2
6         ANK1
7         ANK2
8         ANK3
9      ANKRD11
10         APC
11        APOB
12      ARID1A
13       ASCC3
14       ASTN1
15     CACNA1A
16     CACNA1B
17     CACNA1D
18       CADPS
19     CCDC168
20      CELSR1
21      CELSR3
22      CEP295
23      CEP350
24      CFAP65
25        CHD3
26        CHD4
27     COL11A1
28     COL12A1
29     COL18A1
        ...   
117       SPEN
118      SPTA1
119      SRCAP
120      STAB1
121       SVIL
122      SYNE1
123      SYNE2
124       SZT2
125      TACC2
126      TENM1
127      TENM3
128     TNRC6B
129       TP53
130     TRANK1
131       UBR4
132       UBR5
133      USH2A
134     VPS13A
135      WDFY3
136      WDFY4
137      WDR87
138      XIRP2
139     ZC3H13
140      ZDBF2
141      ZFHX3
142      ZFHX4
143     ZNF292
144     ZNF469
145     ZNF536
146     ZNF831
Name: Gene, Length: 147, dtype: objec

Check significant difference in omics data between cancers. Pick a gene in common in the cancers you want to compare.