# Create Heat Map for significant interacting proteins

Pancancer heat maps are created with circle size showing significance and color showing differences in median.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p

# Step 1: Get Interacting Proteins

In [22]:
ip = u.get_interacting_proteins('PTEN')

# RAS-RAF-MEK-ERK MAPK Pathway
ip.append('GRB2')
ip.append('SHC1') # (SHC in paper)
ip.append('SOS1') # (SOS in the paper) *GEF for RAS "recruitment to the plasma membrane requires GRB2, PIP2, PA" 
ip.append('KRAS') # (RAS in paper) possibly add "RAS regulators such as PHLPP, SHP-2, and NF-2"
ip.append('RAF1') # (RAF-1 in paper)
ip.append('MAP2K1') #(MEK1 protein name in paper)
ip.append('MAP2K2 ') # (MEK2 in paper)
ip.append('MAPK3') # (ERK1 in paper) https://www.uniprot.org/uniprot/P27361
ip.append('MAPK1') # (ERK2 in paper)

#mTOR Pathway
ip.append('GAB1')
#ip.append('PI3K')
ip.append('AKT1') # (AKT in paper) *binds PIP3 (see info for isoform AKT1 at: https://www.uniprot.org/uniprot/P31749)
ip.append('PDK1') #activate AKT
ip.append('MTORC2') #activate AKT
ip.append('GSK3B') # (GSK-3B in paper)inhibit cyclin D (akt inhibits GSK3B)
ip.append('FOXO1') #(FOXO in paper)
ip.append('TSC2') # repress mTOR, AKT inhibits
ip.append('MTOR') # promote cyclin D
ip.append('BAD')
ip.append('CAS9')

ip.append('CCND1') # Cyclin D1 -> G1/S 


non_duplicate = set(ip)
ip = list(non_duplicate)
print(len(ip))

46


In [24]:
# readings

#ip.append('BRCA1') BRCA
#ip.append('RAD51') 
#ip.append('TGFBR1') # TGF-B

ip.append('TBX3') #HNSCC
ip.append('RB1') 
#ip.append('EF2')
                 
#ip.append('SOX2') #LSCC

ip.append('RAD51')
print(len(ip))

52


# Step 2: Get df with interacting proteins

In [46]:
root = R'~\Github\WhenMutationsDontMatter\PTEN\Step_3_trans_effect\csv'
#df = pd.read_csv(root+R'\all_heatmap.csv')
df = pd.read_csv(root+R'\sig_pval_heatmap.csv')
#df = pd.read_csv(root+R'\mult_sig_pval_heatmap.csv')

In [47]:
bool_df = df.Proteomics.isin(ip)
plot_df = df[bool_df]
len(plot_df.Proteomics.unique())

12

# Step 3: Create HeatMap

In [48]:
# Only include p-values < a certain cutoff
a = 0.05
plot_df = plot_df.loc[plot_df['P_Value'] <= a]

In [49]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 700)