# PDGFR Circle Heatmap

This notebooks takes the genes that are a hit from the Brca NCI-Nature_2016 (PDGFR-beta pathway) and maps them on a large circle heat map. 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
import statsmodels.stats.multitest

import gseapy as gp
from gseapy.plot import barplot, dotplot

import cptac
import cptac.utils as u
import plot_utils as p 

  import pandas.util.testing as tm


Load df with all of the genes that are FDR significant. Then get list of just the gene names and use them to run a GSEA. 

In [9]:
FDR_sig = pd.read_csv("Pval_corr_table_Fig_3_return_sig.csv")
FDR_sig= FDR_sig.drop(['Unnamed: 0'], axis=1)
FDR_sig = FDR_sig.set_index("Comparison")
FDR_sig = FDR_sig[1:] #Drop EGFR 
FDR_sig

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_Lscc,P_value_Lscc
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
PHLDA1_proteomics,0.816848,3.507071e-21,,,,,0.364797,0.002164,,,0.664271,8.888640e-12,0.713420,2.644826e-14
GRB2_proteomics,-0.610889,6.729990e-08,,,,,,,,,-0.532341,3.320092e-06,,
CDH4_proteomics,0.559180,3.420388e-06,,,,,,,,,,,,
SOCS2_proteomics,0.562720,3.420388e-06,,,,,,,,,,,,
PLA2G15_proteomics,-0.556624,3.420388e-06,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LSR_proteomics,,,,,,,,,,,,,0.356431,8.801761e-03
APEX1_proteomics,,,,,,,,,,,,,-0.353686,9.741073e-03
RNF126_proteomics,,,,,,,,,,,,,0.353190,9.914599e-03
BTBD11_proteomics,,,,,,,,,,,,,0.376929,9.991267e-03


In [10]:
def HasPosNeg(row):
    hasPos = False
    hasNeg= False

    for item in row:
        if pd.isnull(item):
            continue
        if item < 0:
            hasNeg = True
        if item > 0:
            hasPos = True
            
    if hasPos & hasNeg:
        return True
    return False

In [11]:
col = ["Correlation_Gbm","Correlation_kidney","Correlation_Ovar","Correlation_Brca","Correlation_Luad","Correlation_hnscc","Correlation_Lscc"]
FDR_corr = FDR_sig[col]
FDR_corr["Pos_Neg"] = FDR_corr.apply(HasPosNeg, axis = 1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [12]:
FDR_corr_True = FDR_corr[FDR_corr['Pos_Neg']==True]
FDR_corr_True

Unnamed: 0_level_0,Correlation_Gbm,Correlation_kidney,Correlation_Ovar,Correlation_Brca,Correlation_Luad,Correlation_hnscc,Correlation_Lscc,Pos_Neg
Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
DAB2_proteomics,-0.556402,,,0.326055,,,,True
SCPEP1_proteomics,-0.531494,-0.386583,,0.399187,,,,True
FAM129B_proteomics,-0.514984,,,0.344093,,,0.360092,True
CTSB_proteomics,-0.496895,,,0.341048,,,,True
PPP1R18_proteomics,-0.497202,,,0.359142,,,,True
...,...,...,...,...,...,...,...,...
MPP1_proteomics,,,,0.298670,,-0.508400,,True
METTL7A_proteomics,,,,0.298234,,-0.359487,,True
GSPT1_proteomics,,,,-0.296472,0.421196,,,True
RAB11FIP4_proteomics,,,,-0.294174,0.334533,,,True


In [13]:
df1_transposed = FDR_corr_True.T 
df1_transposed
pos_neg_prot = df1_transposed.columns.values.tolist()
pos_neg_genes = []
for gene in pos_neg_prot :
    pos_neg_genes.append((re.sub("_proteomics", "", gene)))
len(pos_neg_genes)

220

Run GSEA using reactome 2016 set

In [16]:
pos_neg_enr = gp.enrichr(gene_list = pos_neg_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')
pos_neg_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,Bacterial invasion of epithelial cells Homo sa...,8/78,2e-06,0.000669,0,0,9.324009,121.113694,ITGB1;ARHGAP10;ARPC3;CAV1;ARPC4;ARPC5;MET;VCL
1,KEGG_2016,Endocytosis Homo sapiens hsa04144,13/259,6e-06,0.000918,0,0,4.563005,54.664482,SH3GLB1;ARFGEF1;CAV1;WIPF2;ARPC4;ARPC5;IGF1R;P...
2,KEGG_2016,Fc gamma R-mediated phagocytosis Homo sapiens ...,8/93,9e-06,0.000841,0,0,7.820137,91.1977,VASP;HCK;FCGR2A;ARPC3;LIMK2;ARPC4;ARPC5;FCGR2B
3,KEGG_2016,Proteoglycans in cancer Homo sapiens hsa05205,11/203,1.6e-05,0.001179,0,0,4.926108,54.36791,ITGB1;ITGB5;CTSL;CAV1;PLAUR;SDC1;FLNB;ITPR3;ME...
4,KEGG_2016,Regulation of actin cytoskeleton Homo sapiens ...,11/214,2.6e-05,0.001539,0,0,4.672897,49.285307,ITGB1;DIAPH2;ITGB5;ARPC3;LIMK2;ARPC4;ARPC5;PFN...


In [5]:
#get just the pdgfr genes
pos_neg_df = pos_neg_enr.res2d
comp_coag = pos_neg_df.iloc[1,9]
comp_coag = comp_coag.split(';')
len(comp_coag)


48

In [6]:
#Get append version of the df with all cancer type, fdr sig trans results
df_FDR_append = pd.read_csv("../Step3.2_combining_pearson_dfs/csv_files/pancan_EGFR_pearson_sig_all_prot_append_FDR.csv")
df_FDR_append = df_FDR_append.drop(['Unnamed: 0'], axis=1)


In [7]:
#filter down df with just pdgfr genes 
pdgfr_column_names = []
for gene in pdgfr:
    gene += "_proteomics"
    pdgfr_column_names.append(gene)

df_FDR_pdgfr = df_FDR_append[df_FDR_append.Comparison.isin(pdgfr_column_names)]
df_FDR_pdgfr

Unnamed: 0,Comparison,Correlation,P_value,fdr_bh_p_val,Cancer Type
175,ARPC1B_proteomics,-0.431473,8.240555e-06,0.000500,GBM
194,HCK_proteomics,-0.426001,1.101720e-05,0.000605,GBM
206,ARPC3_proteomics,-0.423330,1.267190e-05,0.000655,GBM
299,ARPC2_proteomics,-0.406327,3.005095e-05,0.001072,GBM
416,YES1_proteomics,0.383762,8.813311e-05,0.002262,GBM
...,...,...,...,...,...
8605,SRC_proteomics,0.480798,1.389641e-07,0.000060,Lscc
8712,BAIAP2_proteomics,0.379219,5.180059e-05,0.004465,Lscc
8815,ITGAV_proteomics,0.342779,2.818073e-04,0.013723,Lscc
8916,JUN_proteomics,0.317601,8.099733e-04,0.027683,Lscc


In [9]:
#Make plot using plot utils
p.plotCircleHeatMap(df_FDR_pdgfr, "P_value","Correlation","Comparison","Cancer Type",plot_width= 1200, plot_height = 650)

NameError: name 'plotCircleHeatMap' is not defined