# Create Combined Heatmap for significant Complexes

This notebook looks at the significant genes in at least one cancer. Pancancer heat maps are created with circle size showing significance and color showing differences in median.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p
root = R'~\Github\WhenMutationsDontMatter\PTEN\Step_3_trans_effect\csv'

# Get a list of genes from certain pathways

In [2]:
dna_rep = u.get_proteins_in_pathway('DNA Replication')
dna_damage_response = u.get_proteins_in_pathway('DNA Damage Response')

# Create Heatmap 

Uncomment the csv file needed: all_heatmap includes all proteins. sig_pval_heatmap includes proteins signficant in at least one cancer. mult_sig_pval_heatmap includes proteins significant in multiple cancers.

In [9]:
#df = pd.read_csv(root+R'\all_heatmap.csv')
df = pd.read_csv(root+R'\sig_pval_heatmap.csv')
#df = pd.read_csv(root+R'\mult_sig_pval_heatmap.csv')

gene_list = list(df.Proteomics.unique()) # list of genes 
len(gene_list)

2611

In [10]:
found = ['TOPBP1', 'TOP2A', 'GINS2', 'GINS4', 'POLA2', 'CHEK1']
for gene in found:
    dna_rep.append(gene)
dna_rep[-6:]

['TOPBP1', 'TOP2A', 'GINS2', 'GINS4', 'POLA2', 'CHEK1']

In [15]:
get = df.Proteomics.isin(dna_damage_response) # bool df where True has both pos and neg
genes_k = df[get] # Keep only genes with pos and neg
genes_k.Proteomics.unique()

array(['PRKDC', 'FANCD2', 'MRE11', 'SESN1', 'CASP3', 'RAD50', 'CHEK1',
       'TLK1', 'CDKN1B', 'APAF1', 'FAS', 'TP53'], dtype=object)

In [16]:
# Only include p-values < a certain cutoff
genes_k = genes_k.loc[genes_k['P_Value'] < 0.05]

In [17]:
p.plotCircleHeatMap(genes_k, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                     plot_height= 300, plot_width=700)

In [14]:
# See gene pvals
df.loc[df['Proteomics'] == 'TP53']

Unnamed: 0,Proteomics,P_Value,Medians,Cancer
2441,TP53,0.66825,0.25663,Gbm
3457,TP53,0.031801,0.70556,Hnscc
5968,TP53,0.239213,2.7678,Luad
9441,TP53,0.893006,0.20115,Lscc
10044,TP53,0.068427,0.111,Brca
14200,TP53,0.980145,0.196586,Ov
14577,TP53,0.001246,-0.8026,En
18589,TP53,0.96887,0.04905,Colon
