# Create Heatmap for Immune genes Comparable to EGFR Immune Heatmap 

This notebook looks at the significant genes in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p

# Create HeatMap

In [45]:
#immune = ['CD2','CD5',"CD79A","CD79B","CD274","BLK","BTK","LCK","LYN","SYK","BLNK","CD19","PLCG2","PI3K","CD5","CD22","PCK","HS1","CCL3","CCL4","PTPN6","ZAP70","LCK","ITK","CD3E","CD3G","CD4","VAV1","GRB2","ADAP","GADS","LAT","SLP76","NFAT","PLCG1","PTPN22","SOS1","HLA-DP","HLA-DM","HLA-DOA","HLA-DOB","HLA-DQ","HLA-DRA","HLA-DRB1","GZMA","CD8A","PRF1","CIITA","INFGR1","JAK1","JAK2","STAT1","B2M","HLA-A","HLA-B","HLA-C","TAP1","TAP2","PSMB8","PSMB9","PSMB10","IFDR1","TGFB1"]
#len(immune)
immune = ['CD3E','CD3G','CD4','CD8A','LCK','ZAP70','LCP2','GRAP2','VAV1','GRB2','NFATC2','PPP3CC','RAC2','CD79A',
    'SYK','BTK','CD5','PTPN6','LYN','PTPRC','PIK3CG','INPP5D','PRKCQ', 'BLNK','PIK3CD','BLK', 'HLA-DMA', 'HLA-DMB']
len(immune)

28

In [46]:
root = R'~\Github\WhenMutationsDontMatter\PTEN\Make_Tables\csv'
all_df = pd.read_csv(root+r"\all_heatmap.csv")
sig_df = pd.read_csv(root+R'\sig_pval_heatmap.csv')
mult_df = pd.read_csv(root+R'\mult_sig_pval_heatmap.csv')

print('sig in at least one cancer:', len(sig_df.Proteomics.unique()))
print('sig in mult cancer:', len(mult_df.Proteomics.unique()))

sig in at least one cancer: 2630
sig in mult cancer: 332


In [60]:
bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('genes found in all_df:', len(plot_df.Proteomics.unique()))
plot_df

genes found in all_df: 28


Unnamed: 0,Proteomics,P_Value,Medians,Cancer
495,SYK,0.006884,-0.697550,Gbm
715,BTK,0.010900,-0.531697,Gbm
832,PIK3CD,0.013180,-0.492099,Gbm
858,GRB2,0.013560,-0.312146,Gbm
866,INPP5D,0.013739,-0.677243,Gbm
...,...,...,...,...
84285,CD3E,,0.122000,Colon
84480,GRAP2,,-0.097650,Colon
84700,PIK3CD,,-0.076000,Colon
84719,PPP3CC,,-0.206700,Colon


In [78]:
plot_df["Index"] = plot_df["Proteomics"] + "_" + plot_df["Cancer"]
plot_df = plot_df.set_index("Index")
ordered_df = plot_df.reindex(['CD4_Hnscc','LCK_Hnscc', 'ZAP70_Hnscc', 'VAV1_Hnscc', 'GRB2_Hnscc', 'NFATC2_Hnscc','RAC2_Hnscc','SYK_Hnscc','BTK_Hnscc','CD5_Hnscc','PTPRC_Hnscc','PIK3CG_Hnscc','INPP5D_Hnscc','PRKCQ_Hnscc','BLNK_Hnscc',
                                    'VAV1_Gbm', 'GRB2_Gbm', 'SYK_Gbm', 'BTK_Gbm','PTPRC_Gbm', 'INPP5D_Gbm','PRKCQ_Gbm', 'PIK3CD_Gbm','BLNK_Gbm',
                                    'CD4_Luad','LCK_Luad', 'ZAP70_Luad','RAC2_Luad','CD5_Luad','PTPRC_Luad', 'PIK3CD_Luad'])
                                    #"HLA-DMA_Gbm","HLA-DMB_Gbm"])

In [85]:
#ordered_df = ordered_df.loc[plot_df['P_Value'] < 0.05] # 25 if not < .05
ordered_df['P_Value'] = ordered_df['P_Value'].where(ordered_df['P_Value'] < 0.05, 1) # pvals > 0.05 not seen on heatmap
ordered_df

Unnamed: 0_level_0,Proteomics,P_Value,Medians,Cancer,size2,size
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CD4_Hnscc,CD4,0.038559,-0.579552,Hnscc,3.255554,9.766663
LCK_Hnscc,LCK,1.0,-0.361337,Hnscc,-0.0,-0.0
ZAP70_Hnscc,ZAP70,0.040154,-0.539451,Hnscc,3.215027,9.645081
VAV1_Hnscc,VAV1,1.0,-0.46655,Hnscc,-0.0,-0.0
GRB2_Hnscc,GRB2,1.0,-0.190047,Hnscc,-0.0,-0.0
NFATC2_Hnscc,NFATC2,0.041732,-0.375735,Hnscc,3.176476,9.529428
RAC2_Hnscc,RAC2,0.029767,-0.517985,Hnscc,3.514361,10.543082
SYK_Hnscc,SYK,0.040732,-0.225323,Hnscc,3.200752,9.602256
BTK_Hnscc,BTK,0.023959,-0.515638,Hnscc,3.731404,11.194211
CD5_Hnscc,CD5,0.034611,-0.59409,Hnscc,3.363571,10.090713


In [84]:
p.plotCircleHeatMap(ordered_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 0.01, legend_max = 0.05)