# Make Figure 7B: PTEN Immune Heatmap 

This notebook looks at the significant genes in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [2]:
import pandas as pd
import numpy as np
import gseapy as gp

import cptac
import cptac.utils as u
import plot_utils as p

In [3]:
print('cptac version:', cptac.version())

cptac version: 0.8.5


# Run GSEA

In [5]:
mult_sig_df = pd.read_csv('../Make_Tables/csv/mult_sig_pval_heatmap.csv')

mult_sig_list = list(mult_sig_df.Proteomics.unique()) # list of genes with a sig pval in >= 1 cancer
prot_enr = gp.enrichr(gene_list = mult_sig_list, description='Tumor_partition', gene_sets='NCI-Nature_2016', 
                       outdir='/Enrichr')

In [6]:
prot_enr.res2d.head(6)

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes,Gene_set
0,BCR signaling pathway Homo sapiens acbf44e2-61...,9/64,2e-06,0.000327,0,0,8.081897,108.041859,MAP4K1;PPP3CB;PTPRC;SYK;CHUK;PIK3CA;BLNK;BTK;P...,NCI-Nature_2016
1,Class I PI3K signaling events Homo sapiens 12b...,8/48,2e-06,0.000167,0,0,9.578544,127.862042,ZAP70;SYK;PIK3CA;BLNK;PLCG2;BTK;PIK3CD;CYTH1,NCI-Nature_2016
2,IL8- and CXCR2-mediated signaling events Homo ...,6/34,2.4e-05,0.00165,0,0,10.141988,108.017599,GNG2;PRKCB;RAC2;ELMO1;DOCK2;GNAI2,NCI-Nature_2016
3,ATR signaling pathway Homo sapiens 8991cbac-61...,6/39,5.3e-05,0.002791,0,0,8.841733,86.980937,RFC3;RFC4;MCM7;RFC2;TOPBP1;MCM2,NCI-Nature_2016
4,CXCR3-mediated signaling events Homo sapiens 3...,6/43,9.4e-05,0.003935,0,0,8.019246,74.34534,GNG2;PIK3CA;ITGB2;PIK3CD;ITGAL;GNAI2,NCI-Nature_2016
5,TCR signaling in naive CD4+ T cells Homo sapie...,7/64,0.000121,0.004199,0,0,6.28592,56.721288,MAP4K1;ZAP70;CD4;PTPRC;CHUK;PRKCB;WAS,NCI-Nature_2016


In [5]:
immune_indexes = [0,2,5]
i = 0
for i in immune_indexes:
    print(prot_enr.res2d.Term[i])
    genes = prot_enr.res2d.Genes[i]
    g_list = genes.split(';')
    print(len(g_list), 'genes')
    print(g_list, '\n')

BCR signaling pathway Homo sapiens acbf44e2-618c-11e5-8ac5-06603eb7f303
9 genes
['MAP4K1', 'PPP3CB', 'PTPRC', 'SYK', 'CHUK', 'PIK3CA', 'BLNK', 'BTK', 'PLCG2'] 

IL8- and CXCR2-mediated signaling events Homo sapiens fe78e284-6193-11e5-8ac5-06603eb7f303
6 genes
['GNG2', 'PRKCB', 'RAC2', 'ELMO1', 'DOCK2', 'GNAI2'] 

TCR signaling in naive CD4+ T cells Homo sapiens 0c2862fa-6196-11e5-8ac5-06603eb7f303
7 genes
['MAP4K1', 'ZAP70', 'CD4', 'PTPRC', 'CHUK', 'PRKCB', 'WAS'] 



# Create HeatMap

In [6]:
all_df = pd.read_csv("../Make_Tables/csv/all_heatmap.csv")

In [7]:
# Proteins added were from the EGFR immune heatmap for comparison
# ordered and grouped proteins 
t_only = ['CD3E', 'CD3G', 'CD4', 'CD8A', 'GRAP2', 'FYB1', 'PRKCQ'] 
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['CD2', 'CD5', 'GRB2', 'LCK', 'ZAP70', 'VAV1', 'PTPN6', 'PTPRC', 'PIK3CD', 'PIK3CG', 'INPP5D',
        'PLCG1', 'PLCG2', 'NFATC2', 'PRKCB', 'CHUK', 'NFKB2', 'DOCK2', 'RAC2', 'ELMO1', 'WAS']
immune = t_only + b_only + both
print('Number of genes:', len(immune))

Number of genes: 37


In [8]:
# Get df with immune proteins
bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('immune proteins found in all_df:', len(plot_df.Proteomics.unique()))
plot_df.Proteomics.unique()

immune proteins found in all_df: 37


array(['ELMO1', 'DOCK2', 'CHUK', 'SYK', 'PLCG1', 'PRKCB', 'BTK', 'PLCG2',
       'PIK3CD', 'GRB2', 'INPP5D', 'PRKCQ', 'FYB1', 'VAV1', 'WAS', 'BLNK',
       'PTPRC', 'NFKB2', 'PPP3CC', 'PIK3CG', 'CD4', 'PTPN6', 'NFATC2',
       'HLA-DMA', 'CD2', 'RAC2', 'LYN', 'HLA-DMB', 'CD8A', 'GRAP2',
       'ZAP70', 'CD3E', 'CD5', 'CD3G', 'LCK', 'CD79A', 'BLK'],
      dtype=object)

In [9]:
# Reorder index to reorder protein order in heatmap
plot_df["Index"] = plot_df["Proteomics"] + "_" + plot_df["Cancer"]
plot_df = plot_df.set_index("Index")

ordered_list = []
cancer_list = ['BR', 'CO', 'EC', 'GBM', 'HNSCC', 'LSCC', 'LUAD', 'OV']
for gene in immune:
    for cancer in cancer_list:
        ordered_list.append(gene+'_'+cancer)

ordered_df = plot_df.reindex(ordered_list)
sig_ordered_df = ordered_df.loc[ordered_df['P_Value'] < 0.05] # Keep sig 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [10]:
# Find lowest p-val for legend
print('min p-value:', sig_ordered_df.P_Value.min())

min p-value: 0.0006218184807157023


In [11]:
p.plotCircleHeatMap(sig_ordered_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=300, plot_width= 800, font_size = 14, legend_min = 0.0001, 
                    legend_med = .001, legend_max = 0.05, save_png = "Fig_7B.png")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['size'] = (df["size2"])*3


# Calculate percentage of immune proteins

The manuscript mentions the percentage of proteins sig in multiple cancers that are involved with the immune system. Here is where we calculate that percentage.

In [20]:
# get all immune proteins
all_immune_prot = u.get_proteins_in_pathways('R-HSA-168256', 'reactome')
all_immune_prot = list(all_immune_prot.member)
len(all_immune_prot)

2248

In [24]:
# Get df with immune proteins from df with only proteins sig in mult cancers
bool_df = mult_sig_df.Proteomics.isin(all_immune_prot)
mult_sig_immune_df = mult_sig_df[bool_df]
mult_immune = len(mult_sig_immune_df.Proteomics.unique())
print('Immune proteins that are sig in mult cancers:', mult_immune)
mult_sig_immune_df.Proteomics.unique()

Immune proteins that are sig in mult cancers: 73


array(['DCTN1', 'ACTR1A', 'DCTN4', 'DCTN6', 'PRKDC', 'ELMO1', 'SELL',
       'DCTN5', 'ACTR10', 'AMPD3', 'GSTO1', 'TPR', 'SHOC2', 'ACTR1B',
       'NCKAP1L', 'CAB39', 'DOCK2', 'CHUK', 'ADA2', 'SYK', 'RAB4B',
       'RASAL3', 'PRKCB', 'ADAR', 'IL16', 'CSF1R', 'RPS6KA1', 'BTK',
       'APBB1IP', 'ITGAL', 'PLCG2', 'PIK3CD', 'INPP5D', 'RANBP2',
       'NUP155', 'SEC24C', 'IFIT2', 'LILRB1', 'ITGB2', 'SH3KBP1',
       'PIK3R4', 'PPP3CB', 'PECAM1', 'MYO10', 'FYB1', 'NUP85', 'KLC2',
       'LCP1', 'ARHGAP45', 'PAK1', 'WAS', 'BLNK', 'KPNA2', 'RAE1',
       'TALDO1', 'CCR1', 'KLC4', 'PTPRC', 'CYBB', 'WIPF1', 'DHX9',
       'NFKB2', 'CD53', 'CD4', 'PSTPIP1', 'NUP133', 'DHX36', 'NUP210',
       'TRIM2', 'RAC2', 'PIK3CA', 'ZAP70', 'ZBP1'], dtype=object)

In [29]:
# Calculate percentage
mult_sig_total = len(mult_sig_df.Proteomics.unique())
print('Percentage of proteins sig in mult cancers that are immune proteins:\n', 
      mult_immune, '/', mult_sig_total, '* 100 = ', round(mult_immune / mult_sig_total * 100))

Percentage of proteins sig in mult cancers that are immune proteins:
 73 / 348 * 100 =  21
