# Make Figure 6: Mitotic Cell Cycle

This notebooks takes all the trans genes that are significant in multiple cancers and runs a GSEA using Reactome. It then takes a subset of genes from the Mitotic Cell Cycle pathway and creates a heatmap. 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp

import cptac
import cptac.utils as u
import plot_utils as p

In [2]:
print('cptac version:', cptac.version())

cptac version: 0.8.5


# Step 1: Run GSEA

First read in sig_pval_heatmap.csv into a df. This csv file contains only genes with a significant p-value in at least one cancer. 

In [3]:
sig_df = pd.read_csv('../Make_Tables/csv/mult_sig_pval_heatmap.csv') 

In [9]:
sig_df

Unnamed: 0,Proteomics,Gbm_P_Value,Gbm_Median,Cancer,Hnscc_P_Value,Hnscc_Median,Luad_P_Value,Luad_Median,Lscc_P_Value,Lscc_Median,Brca_P_Value,Brca_Median,Ov_P_Value,Ov_Median,Endo_P_Value,Endo_Median,Colon_P_Value,Colon_Median
0,ARMH3,5.766739e-07,-0.405134,GBM,,,,,,,,,,,,,,
1,CUTC,8.514758e-07,-0.553255,GBM,,,,,,,,,,,,,,
2,PIP4K2A,2.783477e-06,-0.838882,GBM,,,,,,,,,,,,,,
3,DCTN1,1.500890e-05,-0.382988,GBM,,,,,,,,,,,,,,
4,ACTR1A,3.847715e-05,-0.491126,GBM,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2567,GYG2,,,CO,,,,,,,,,,,,,,
2568,NDC80,,,CO,,,,,,,,,,,,,,0.3455
2569,PIK3CD,,,CO,,,,,,,,,,,,,,-0.0760
2570,TNFAIP8L2,,,CO,,,,,,,,,,,,,,-0.0724


In [4]:
prot_list = list(sig_df.Proteomics.unique()) # list of genes with a sig pval in mult cancers
prot_enr = gp.enrichr(gene_list = prot_list, description='Tumor_partition', gene_sets='Reactome_2016', outdir='/Enrichr')

In [5]:
prot_enr.res2d.head(5)

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes,Gene_set
0,Gene Expression Homo sapiens R-HSA-74160,76/1631,7.790079000000001e-17,1.191882e-13,0,0,2.807059,104.116894,DDX47;WDR3;WDR4;HNRNPU;NAT10;ZC3H8;ADAR;PPP2R2...,Reactome_2016
1,"Cell Cycle, Mitotic Homo sapiens R-HSA-69278",37/462,2.324896e-15,1.778545e-12,0,0,4.824493,162.561772,TOP2A;AHCTF1;PCNA;MCM7;DCTN1;BUB1B;PPP2R2A;ORC...,Reactome_2016
2,Processing of Capped Intron-Containing Pre-mRN...,24/193,1.716598e-14,8.754651e-12,0,0,7.491104,237.436894,RANBP2;DDX5;NUP210;CPSF1;NUP155;NCBP1;NUP133;D...,Reactome_2016
3,Cell Cycle Homo sapiens R-HSA-1640170,38/566,2.579814e-13,9.867789e-11,0,0,4.044446,117.231872,TOP2A;AHCTF1;PCNA;MCM7;DCTN1;BUB1B;PPP2R2A;ORC...,Reactome_2016
4,rRNA modification in the nucleus Homo sapiens ...,14/58,4.819842e-13,1.474872e-10,0,0,14.540922,412.393134,UTP6;DDX47;IMP3;WDR3;HEATR1;NAT10;WDR75;IMP4;P...,Reactome_2016


# Step 2: Get the list of significant genes 

In [6]:
index = 1
trans = prot_enr.res2d.Genes[index]
genes = trans.split(';')
print(prot_enr.res2d.Term[index])
print('total genes:',len(genes))

Cell Cycle, Mitotic Homo sapiens R-HSA-69278
total genes: 37


In [7]:
# Catagorize genes
narrowed_cell_cycle = ['DCTN1', 'ACTR1A', 'MAD2L1', 'MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1',
              'MCM7', 'TPR', 'MCM3', 'MCM6', 'NUP153', 'RFC3', 'CDK11B', 'XPO1','PRKCB', 'RFC2', 'NUF2', 
              'RAB8A', 'RFC4', 'RANBP2', 'CENPF', 'GINS4', 'NDC80', 'OPTN', 'PPP2R2A', 'PPP2R2D',
              'TPX2', 'GINS2', 'BUB1B', 'TOP2A', 'TOPBP1', 'TP53', 'NUP210', 'MSH2', 'MSH6']

dna_synth = ['MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1', 'MCM7', 'MCM3', 'RFC3', 
             'RFC2', 'RFC4', 'GINS4', 'GINS2', 'TOP2A']

dynactin = ['DCTN1','ACTR1A']
spindle = ['BUB1B', 'CENPF', 'MAD2L1', 'NDC80', 'NUF2', 'PCM1', 'TPX2','DCTN1','ACTR1A']

checkpoints = ['BUB1B','PPP2R2A', 'PPP2R2D', 'TOPBP1','MAD2L1']

nuclear_pore_transport = ['TPR','NUP153','NUP210', 'XPO1','RANBP2']
other_transport = ['RAB8A']
other = ['PRKCB','OPTN','CDK11B']

path_list = {'DNA_Synth':dna_synth, 'Spindle':spindle, 'Dynactin': dynactin, 'Checkpoints':checkpoints,
            'Nuclear_Pore_Transport':nuclear_pore_transport}


In [8]:
bool_df = sig_df.Proteomics.isin(narrowed_cell_cycle)
df = sig_df[bool_df]
plot_df = df.loc[df['P_Value'] <= 0.05] # Only plot sig genes
plot_df

KeyError: 'P_Value'

In [None]:
ordered_prot_list = ['MSH2', 'MSH6', 'GINS2', 'GINS4', 'MCM2', 'MCM3', 'MCM4', 'MCM5', 'MCM6', 'MCM7',
                     'RFC2', 'RFC3', 'RFC4', 'RFC5',  'PCNA', 'PCM1', 'TOP2A','PPP2R2A', 'PPP2R2D', 
                     'TOPBP1', 'TP53', 'MAD2L1', 'BUB1B', 'CENPF', 'NDC80', 'NUF2', 'TPX2','DCTN1','ACTR1A']

In [None]:
# Reorder index to reorder the protein order in heatmap
plot_df["Index"] = plot_df["Proteomics"] + "_" + plot_df["Cancer"]
plot_df = plot_df.set_index("Index")

ordered_list = []
cancer_list = ['BR', 'CO', 'EC', 'GBM', 'HNSCC', 'LSCC', 'LUAD', 'OV']
for gene in ordered_prot_list:
    for cancer in cancer_list:
        ordered_list.append(gene+'_'+cancer)

ordered_df = plot_df.reindex(ordered_list)
ordered_df = ordered_df.loc[ordered_df['P_Value'] < 0.05] # Keep sig 

In [None]:
# Rename cancers with abbreviations                               
'''
ordered_df = ordered_df.replace({'Hnscc': 'HNSCC', 'Luad': 'LUAD', 'Lscc': 'LSCC', 'Ovarian': 'OV', 'Colon': 'CO',
                    'Kidney': 'ccRCC', 'Breast': 'BR', 'Gbm': 'GBM', 'Endo': 'EC'})
ordered_df['Cancer'].unique()'''

In [None]:
print('min p-value:', ordered_df.P_Value.min())

In [None]:
test = p.plotCircleHeatMap(ordered_df, circle_var='P_Value', color_var='Medians', x_axis='Proteomics', y_axis='Cancer',
                    plot_height=350, plot_width=800, x_axis_lab='', font_size=12, legend_max=0.05, 
                    legend_med = 0.001, legend_min=0.00001, save_png='Fig_6_Cell_Cycle.png')

In [None]:
from bokeh.plotting import figure, show
show(test)