# Make Figure 2: Mitotic Cell Cycle

This notebooks takes all the trans genes that are significant in multiple cancers and runs a GSEA using Reactome. It then takes a subset of genes from the Mitotic Cell Cycle pathway and creates a heatmap. 

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u
import plot_utils as p

# Step 1: Run GSEA

First read in sig_pval_heatmap.csv into a df. This csv file contains only genes with a significant p-value in at least one cancer. 

In [4]:
sig_df = pd.read_csv('../Make_Tables/csv/mult_sig_pval_heatmap.csv') 

In [3]:
prot_list = list(sig_df.Proteomics.unique()) # list of genes with a sig pval in mult cancers
prot_enr = gp.enrichr(gene_list = prot_list, description='Tumor_partition', gene_sets='Reactome_2016', outdir='/Enrichr')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
prot_enr.res2d.head(5)

# Step 2: Get the list of significant genes 

In [5]:
index = 1
trans = prot_enr.res2d.Genes[index]
genes = trans.split(';')
print(prot_enr.res2d.Term[index])
print('total genes:',len(genes))

NameError: name 'prot_enr' is not defined

In [10]:
# Catagorize genes
narrowed_cell_cycle = ['DCTN1', 'ACTR1A', 'MAD2L1', 'MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1',
              'MCM7', 'TPR', 'MCM3', 'MCM6', 'NUP153', 'RFC3', 'CDK11B', 'XPO1','PRKCB', 'RFC2', 'NUF2', 
              'RAB8A', 'RFC4', 'RANBP2', 'CENPF', 'GINS4', 'NDC80', 'OPTN', 'PPP2R2A', 'PPP2R2D',
              'TPX2', 'GINS2', 'BUB1B', 'TOP2A', 'TOPBP1', 'TP53', 'NUP210', 'MSH2', 'MSH6']

dna_synth = ['MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1', 'MCM7', 'MCM3', 'RFC3', 
             'RFC2', 'RFC4', 'GINS4', 'GINS2', 'TOP2A']

dynactin = ['DCTN1','ACTR1A']
spindle = ['BUB1B', 'CENPF', 'MAD2L1', 'NDC80', 'NUF2', 'PCM1', 'TPX2','DCTN1','ACTR1A']

checkpoints = ['BUB1B','PPP2R2A', 'PPP2R2D', 'TOPBP1','MAD2L1']

nuclear_pore_transport = ['TPR','NUP153','NUP210', 'XPO1','RANBP2']
other_transport = ['RAB8A']
other = ['PRKCB','OPTN','CDK11B']

path_list = {'DNA_Synth':dna_synth, 'Spindle':spindle, 'Dynactin': dynactin, 'Checkpoints':checkpoints,
            'Nuclear_Pore_Transport':nuclear_pore_transport}


In [11]:
bool_df = sig_df.Proteomics.isin(narrowed_cell_cycle)
df = sig_df[bool_df]
df = df.loc[df['P_Value'] <= 0.05] # Only plot sig genes
df

Unnamed: 0,Proteomics,P_Value,Medians,Cancer
3,DCTN1,0.000015,-0.382988,Gbm
4,ACTR1A,0.000038,-0.491126,Gbm
20,MCM4,0.001303,1.236484,Gbm
23,MAD2L1,0.001303,0.546914,Gbm
26,PCNA,0.001545,0.631486,Gbm
...,...,...,...,...
2037,BUB1B,0.026111,-0.559000,Endo
2051,NUF2,0.034789,-0.612500,Endo
2056,CENPF,0.037588,-0.439000,Endo
2065,TPR,0.042634,-0.184300,Endo


In [12]:
#ADD CDK11B
# Create order of genes on x-axis in heatmap
df["Index"] = df["Proteomics"] + "_" + df["Cancer"]
df = df.set_index("Index")
df = df.reindex(['MSH2_Gbm', 'MSH6_Gbm', #mmr
             'GINS2_Gbm', 'GINS4_Gbm', 'MCM2_Gbm', 'MCM3_Gbm', 'MCM4_Gbm', 'MCM5_Gbm', 'MCM6_Gbm', #synth
             'MCM7_Gbm', 'RFC2_Gbm', 'RFC3_Gbm', 'RFC4_Gbm', 'RFC5_Gbm',  'PCNA_Gbm', 'PCM1_Gbm', 'TOP2A_Gbm',
             'PPP2R2A_Gbm', 'PPP2R2D_Gbm', 'TOPBP1_Gbm', 'TP53_Gbm', 'MAD2L1_Gbm', 'BUB1B_Gbm', #checkpoints
             'CENPF_Gbm', 'NDC80_Gbm', 'NUF2_Gbm', 'TPX2_Gbm','DCTN1_Gbm','ACTR1A_Gbm', # mitotic
             'MSH2_Gbm', 'MSH6_Gbm', #mmr
             'GINS2_Hnscc', 'GINS4_Hnscc', 'MCM2_Hnscc', 'MCM3_Hnscc', 'MCM4_Hnscc', 'MCM5_Hnscc', 'MCM6_Hnscc', #synth
             'MCM7_Hnscc', 'RFC2_Hnscc', 'RFC3_Hnscc', 'RFC4_Hnscc', 'RFC5_Hnscc',  'PCNA_Hnscc', 'PCM1_Hnscc', 'TOP2A_Hnscc', 
             'PPP2R2A_Hnscc', 'PPP2R2D_Hnscc', 'TOPBP1_Hnscc', 'MAD2L1_Hnscc', 'BUB1B_Hnscc', #checkpoints
             'CENPF_Hnscc', 'NDC80_Hnscc', 'NUF2_Hnscc', 'TPX2_Hnscc','DCTN1_Hnscc','ACTR1A_Hnscc', #mitotic
             'MSH2_Endo', 'MSH6_Endo', #mmr
             'RFC2_Endo', 'RFC5_Endo', 'PCM1_Endo', 'TOP2A_Endo', #synth
             'PPP2R2A_Endo', 'TOPBP1_Endo', 'TP53_Endo', 'MAD2L1_Endo', 'BUB1B_Endo', #checkpoints
             'CENPF_Endo', 'NDC80_Endo', 'NUF2_Endo', 'TPX2_Endo','DCTN1_Endo', 'ACTR1A_Luad']).dropna() #mitotic

In [13]:
p.plotCircleHeatMap(df, circle_var='P_Value', color_var='Medians', x_axis='Proteomics', y_axis='Cancer',
                    plot_height=350, plot_width=800, x_axis_lab='Proteomics', font_size=12, legend_max=0.05, 
                    legend_min=0.0001, save_png='Fig_PTEN_2_Cell_Cycle.png')