# Create Heatmap for Immune genes Comparable to EGFR Immune Heatmap 

This notebook looks at the significant genes in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p

# Create HeatMap

In [12]:
all_df = pd.read_csv("../Make_Tables/csv/all_heatmap.csv")
sig_df = pd.read_csv('../Make_Tables/csv/sig_pval_heatmap.csv')
mult_df = pd.read_csv('../Make_Tables/csv/mult_sig_pval_heatmap.csv')

print('sig in at least one cancer:', len(sig_df.Proteomics.unique()))
print('sig in mult cancer:', len(mult_df.Proteomics.unique()))

sig in at least one cancer: 2630
sig in mult cancer: 332


In [18]:
# ordered
t_only = ['CD3E', 'CD3G', 'CD4', 'CD8A', 'GRAP2', 'FYB1', 'PRKCQ']
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['LCK', 'ZAP70', 'VAV1', 'GRB2', 'PLCG1', 'NFATC2', 'RAC2', 'CD5', 'PTPN6', 'PTPRC', 
        'PIK3CG', 'INPP5D', 'NFKB2', 'PRKCB', 'PLCG2', 'ELMO1', 'DOCK2', 'WAS']
immune = t_only + b_only + both

In [73]:
# Sig PTEN # CD2? CHUK
t_only = ['CD3E', 'CD3G', 'CD4', 'CD8A', 'GRAP2', 'FYB1', 'PRKCQ'] 
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['CD2', 'CD5', 'GRB2', 'LCK', 'ZAP70', 'VAV1', 'PLCG1', 'PLCG2', 'NFATC2', 'CHUK', 'NFKB2', 'RAC2', 'PTPN6', 'PTPRC', 
        'PIK3CD', 'PIK3CG', 'INPP5D', 'PRKCB', 'ELMO1', 'DOCK2', 'WAS']
immune = t_only + b_only + both

In [74]:
bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('genes found in all_df:', len(plot_df.Proteomics.unique()))
plot_df.Proteomics.unique()

genes found in all_df: 37


array(['ELMO1', 'DOCK2', 'CHUK', 'SYK', 'PLCG1', 'PRKCB', 'BTK', 'PLCG2',
       'PIK3CD', 'GRB2', 'INPP5D', 'PRKCQ', 'FYB1', 'VAV1', 'WAS', 'BLNK',
       'PTPRC', 'NFKB2', 'PPP3CC', 'PIK3CG', 'CD4', 'PTPN6', 'NFATC2',
       'HLA-DMA', 'CD2', 'RAC2', 'LYN', 'HLA-DMB', 'CD8A', 'GRAP2',
       'ZAP70', 'CD3E', 'CD5', 'CD3G', 'LCK', 'CD79A', 'BLK'],
      dtype=object)

In [75]:
plot_df["Index"] = plot_df["Proteomics"] + "_" + plot_df["Cancer"]
plot_df = plot_df.set_index("Index")

ordered_list = []
cancer_list = ['Brca', 'Colon', 'Endo', 'Gbm', 'Hnscc', 'Lscc', 'Luad', 'Ov']
for gene in immune:
    for cancer in cancer_list:
        ordered_list.append(gene+'_'+cancer)

ordered_df = plot_df.reindex(ordered_list)
ordered_df = ordered_df.loc[ordered_df['P_Value'] < 0.05] # Keep sig 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [76]:
# Find lowest p-val for legend
ordered_df.P_Value.min()

0.0006218184807157023

In [77]:
p.plotCircleHeatMap(ordered_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 0.0001, legend_med = .001, legend_max = 0.05, save_png = "Figure_PTEN_immune.png")

In [None]:
# Sig PTEN
t_only = ['CD2', 'CD4', 'GRAP2', 'FYB1', 'PRKCQ']
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['LCK', 'ZAP70', 'VAV1', 'GRB2', 'PLCG1', 'NFATC2', 'RAC2', 'CD5', 'PTPN6', 'PTPRC', 
        'PIK3CG', 'INPP5D', 'NFKB2', 'PRKCB', 'PLCG2', 'ELMO1', 'DOCK2', 'WAS']
immune = t_only + b_only + both

Check all sig EGFR immune genes

In [15]:

#immune = ['CD2','CD5',"CD79A","CD79B","CD274","BLK","BTK","LCK","LYN","SYK","BLNK","CD19","PLCG2","PI3K","CD5","CD22","PCK","HS1","CCL3","CCL4","PTPN6","ZAP70","LCK","ITK","CD3E","CD3G","CD4","VAV1","GRB2","ADAP","GADS","LAT","SLP76","NFAT","PLCG1","PTPN22","SOS1","HLA-DP","HLA-DM","HLA-DOA","HLA-DOB","HLA-DQ","HLA-DRA","HLA-DRB1","GZMA","CD8A","PRF1","CIITA","INFGR1","JAK1","JAK2","STAT1","B2M","HLA-A","HLA-B","HLA-C","TAP1","TAP2","PSMB8","PSMB9","PSMB10","IFDR1","TGFB1"]
#len(immune)
immune = ["CD2",'CD3E','CD3G','CD4','CD8A','LCK','ZAP70','LCP2','GRAP2','VAV1','GRB2','NFATC2','PPP3CC','RAC2','CD79A',
   'SYK','BTK','CD5','PTPN6','LYN','PTPRC','PIK3CG','INPP5D','PRKCQ', 'BLNK','PIK3CD','BLK', 'HLA-DMA', 'HLA-DMB',"NFKB2","CHUK","PLCG2","PPP3CB","MAP3K8","NFATC2","PIK3CG","SYK","BTK","RAC2","PRKCB","ELMO1", "DOCK2", "RAC2", "WAS"]
len(immune)

bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('genes found in all_df:', len(plot_df.Proteomics.unique()))
plot_df.Proteomics.unique()

plot_df['P_Value'] = plot_df['P_Value'].where(plot_df['P_Value'] < 0.05, 1)
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 0.01, legend_max = 0.05)

genes found in all_df: 38


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['size'] = (df["size2"])*3
