# Make Figure 2: Mitotic Cell Cycle

Create a heatmap of proteins involved in the cell cycle that are signficant in multiple cancers. 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u
import plot_utils as p

# Step 1: Get df for significant genes in > 1 cancer

First read in sig_pval_heatmap.csv into a df. This csv file contains only genes with a significant p-value in at least one cancer. 

In [2]:
sig_df = pd.read_csv('csv/sig_pval_heatmap.csv') #tp53 only sig in endo

# Step 2: Get the list of significant genes 

In [3]:
narrowed_cell_cycle = ['DCTN1', 'ACTR1A', 'MAD2L1', 'MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1',
              'MCM7', 'TPR', 'MCM3', 'MCM6', 'NUP153', 'RFC3', 'CDK11B', 'XPO1','PRKCB', 'RFC2', 'NUF2', 
              'RAB8A', 'RFC4', 'RANBP2', 'CENPF', 'GINS4', 'NDC80', 'OPTN', 'PPP2R2A', 'PPP2R2D',
              'TPX2', 'GINS2', 'BUB1B', 'TOP2A', 'TOPBP1', 'TP53', 'NUP210']

In [4]:
bool_df = sig_df.Proteomics.isin(narrowed_cell_cycle)
df = sig_df[bool_df]
df = df.loc[df['P_Value'] <= 0.05]

# Step 3: Create HeatMap

Slice out genes from the DNA Replication pathway from the df with genes sig in > 1 cancer.

In [5]:
p.plotCircleHeatMap(df, circle_var='P_Value', color_var='Medians', x_axis='Proteomics', y_axis='Cancer',
                    plot_height=350, plot_width=800, x_axis_lab='Proteomics', font_size=12, legend_max=0.05, 
                    save_png='PTEN_Figure_3_Cell_Cycle.png')

In [26]:
narrowed_cell_cycle = ['DCTN1', 'ACTR1A', 'MAD2L1', 'MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1',
              'MCM7', 'TPR', 'MCM3', 'MCM6', 'NUP153', 'RFC3', 'CDK11B', 'XPO1','PRKCB', 'RFC2', 'NUF2', 
              'RAB8A', 'RFC4', 'RANBP2', 'CENPF', 'GINS4', 'NDC80', 'OPTN', 'PPP2R2A', 'PPP2R2D',
              'TPX2', 'GINS2', 'BUB1B', 'TOP2A', 'TOPBP1', 'TP53', 'NUP210', 'MSH2', 'MSH6']

dna_synth = ['MCM4', 'PCNA', 'MCM5', 'MCM2','PCM1', 'MCM7', 'MCM3', 'RFC3', 
             'RFC2', 'RFC4', 'GINS4', 'GINS2', 'TOP2A']

dynactin = ['DCTN1','ACTR1A']
spindle = ['BUB1B', 'CENPF', 'MAD2L1', 'NDC80', 'NUF2', 'PCM1', 'TPX2','DCTN1','ACTR1A']

checkpoints = ['BUB1B','PPP2R2A', 'PPP2R2D', 'TOPBP1','MAD2L1']

np_transport = ['TPR','NUP153','NUP210', 'XPO1','RANBP2']
other_transport = ['RAB8A']
other = ['PRKCB','OPTN','CDK11B']

path_list = {'DNA_Synth':dna_synth, 'Spindle':spindle, 'Dynactin': dynactin, 'Checkpoints':checkpoints,
            'Nuclear_Pore_Transport':np_transport}


In [39]:
bool_df = sig_df.Proteomics.isin(narrowed_cell_cycle)
df = sig_df[bool_df]
#df = df.loc[df['P_Value'] <= 0.05]
df

Unnamed: 0,Proteomics,P_Value,Medians,Cancer
6,DCTN1,0.000015,-0.382988,Gbm
9,ACTR1A,0.000038,-0.491126,Gbm
106,MCM6,0.001303,1.085407,Gbm
107,MCM4,0.001303,1.236484,Gbm
114,MAD2L1,0.001303,0.546914,Gbm
...,...,...,...,...
18586,XPO1,0.990629,-0.002500,Colon
18636,RFC4,0.992360,-0.104850,Colon
18725,TP53,0.996888,0.049050,Colon
18769,MCM4,0.999930,-0.017500,Colon


In [53]:
df.loc[df['Proteomics'] == 'MCM6'] 

Unnamed: 0,Proteomics,P_Value,Medians,Cancer
106,MCM6,0.001303,1.085407,Gbm
3201,MCM6,0.070154,0.543094,Hnscc
5876,MCM6,0.574807,1.09085,Luad
9400,MCM6,0.999848,0.196,Lscc
10164,MCM6,0.530903,0.2709,Brca
12400,MCM6,0.985704,-0.109024,Ov
14939,MCM6,0.102181,-0.623,En
18478,MCM6,0.987216,-0.05925,Colon


In [55]:
m2 = 0.093
m3 = 0.136
m4 = 0.138
m5 = 0.120
m6 = 0.102
m7 = 0.128

am = (m2 + m3 + m4 + m5 + m6 + m7) / 6.0
am

0.1195

In [35]:
#ADD CDK11B
df["Index"] = df["Proteomics"] + "_" + df["Cancer"]
df = df.set_index("Index")
df = df.reindex(['MSH2_Gbm', 'MSH6_Gbm', #mmr
             'GINS2_Gbm', 'GINS4_Gbm', 'MCM2_Gbm', 'MCM3_Gbm', 'MCM4_Gbm', 'MCM5_Gbm', 'MCM6_Gbm', #synth
             'MCM7_Gbm', 'RFC2_Gbm', 'RFC3_Gbm', 'RFC4_Gbm', 'RFC5_Gbm',  'PCNA_Gbm', 'PCM1_Gbm', 'TOP2A_Gbm',
             'PPP2R2A_Gbm', 'PPP2R2D_Gbm', 'TOPBP1_Gbm', 'TP53_Gbm', 'MAD2L1_Gbm', 'BUB1B_Gbm', #checkpoints
             'CENPF_Gbm', 'NDC80_Gbm', 'NUF2_Gbm', 'TPX2_Gbm','DCTN1_Gbm','ACTR1A_Gbm', # mitotic
             'MSH2_Gbm', 'MSH6_Gbm', #mmr
             'GINS2_Hnscc', 'GINS4_Hnscc', 'MCM2_Hnscc', 'MCM3_Hnscc', 'MCM4_Hnscc', 'MCM5_Hnscc', 'MCM6_Hnscc', #synth
             'MCM7_Hnscc', 'RFC2_Hnscc', 'RFC3_Hnscc', 'RFC4_Hnscc', 'RFC5_Hnscc',  'PCNA_Hnscc', 'PCM1_Hnscc', 'TOP2A_Hnscc', 
             'PPP2R2A_Hnscc', 'PPP2R2D_Hnscc', 'TOPBP1_Hnscc', 'MAD2L1_Hnscc', 'BUB1B_Hnscc', #checkpoints
             'CENPF_Hnscc', 'NDC80_Hnscc', 'NUF2_Hnscc', 'TPX2_Hnscc','DCTN1_Hnscc','ACTR1A_Hnscc', #mitotic
             'MSH2_En', 'MSH6_En', #mmr
             'RFC2_En', 'RFC5_En', 'PCM1_En', 'TOP2A_En', #synth
             'PPP2R2A_En', 'TOPBP1_En', 'TP53_En', 'MAD2L1', 'BUB1B_En', #checkpoints
             'CENPF_En', 'NDC80_En', 'NUF2_En', 'TPX2_En','DCTN1_En', 'ACTR1A_Luad']) #mitotic


                 
                 
    

In [36]:
df = df.dropna()

In [37]:
p.plotCircleHeatMap(df, circle_var='P_Value', color_var='Medians', x_axis='Proteomics', y_axis='Cancer',
                    plot_height=350, plot_width=800, x_axis_lab='Proteomics', font_size=12, legend_max=0.05, 
                    save_png='PTEN_Figure_3_Cell_Cycle.png')