# Pan Cancer CD274 (PDL1) 

This notebook compares PDL1 proteomics against genes in EGFR pathway

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
import statsmodels.stats.multitest
#sys.path.append('C:\\Users\\brittany henderson\\GitHub\\GBM_for_CPTAC\\')
#import cis_functions as f

import cptac
import cptac.utils as u
import plot_utils as p

  import pandas.util.testing as tm


In [2]:
def get_interacting_columns(df):
    prot_genes_list = df.columns.values.tolist()
    list_trans_genes = []
    for gene in trans_genes:
        if gene + "_proteomics" in prot_genes_list: 
            list_trans_genes.append(gene + "_proteomics")
    return list_trans_genes


In [3]:
def prep_df(cancer):
    
    df1 = cancer.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes= "EGFR")
    df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]
    return df1_tumor
    
 

# GBM

In [4]:
#load GBM data 

brain = cptac.Gbm()


                                        



In [5]:
trans_genes = ["KRAS","NRAS","AKT1","AKT2","AKT3","MTOR","MAP3K1",'MAP3K2',"MAPK1","MAPK3","STAT1","STAT3","VAV1","BRAF","PIK3CA","JAK2","CD274","RAC1","EGFR","CD8A","CD4","CD3E","CD3G","GRB2","PLG1","SOS1","AIF1","CCL2","CD14","CD163","CSF1R","FCG1A","HAVCR2","HLA-DRA","HLA-DRB1","IGF1","ZAP70","BTK","CD5","LCK","BLK","BLNK","PTPN6","PLCG2","LCK","LYN","SYK"]

In [6]:
gbm = prep_df(brain)
columns = get_interacting_columns(gbm)



In [7]:

df2= p.wrap_pearson_corr(gbm,"CD274_proteomics",comparison_columns= columns,correction_method='fdr_bh', return_all = False)
df2

Unnamed: 0,Comparison,Correlation,P_value
6,CD274_proteomics,1.0,0.0
19,LYN_proteomics,0.580608,2.987873e-10
18,PLCG2_proteomics,0.52612,2.23626e-08
10,AIF1_proteomics,0.519982,3.471178e-08
3,VAV1_proteomics,0.519056,3.706348e-08
11,CD14_proteomics,0.50949,7.215321e-08
15,BTK_proteomics,0.504094,1.041317e-07
17,PTPN6_proteomics,0.492678,2.216784e-07
5,JAK2_proteomics,0.486955,3.204668e-07
20,SYK_proteomics,0.484445,3.759062e-07


In [8]:
df2['Cancer Type']='GBM'
df2.to_csv("csv_files/GBM_pdl1_trans_FDR.csv")

# Kidney 

In [9]:
kidney = cptac.Ccrcc()


                                          

In [10]:
#not enough data for CD274

# Lscc

In [11]:
lscc = cptac.Lscc()

                                         



In [12]:
lscc_df = prep_df(lscc)

lscc_df.columns = lscc_df.columns.droplevel(1)
columns = get_interacting_columns(lscc_df)

df2= p.wrap_pearson_corr(lscc_df,"CD274_proteomics",comparison_columns= columns,correction_method='fdr_bh', return_all = False)
df2




Unnamed: 0,Comparison,Correlation,P_value
4,CD274_proteomics,1.0,0.0
1,STAT1_proteomics,0.632654,1.652961e-12
7,CD4_proteomics,0.411705,1.28086e-05
13,CD163_proteomics,0.408553,1.513737e-05
20,LCK_proteomics,0.408383,1.527337e-05
22,LCK_proteomics,0.408383,1.527337e-05
6,CD8A_proteomics,0.406332,1.701109e-05
9,CD3G_proteomics,0.446505,1.841631e-05
3,JAK2_proteomics,0.394897,3.062179e-05
14,HAVCR2_proteomics,0.38991,3.930727e-05


In [13]:
df2['Cancer Type']='LSCC'

df2.to_csv("csv_files/Lscc_pdl1_trans_FDR.csv")

# Ovarian 

In [14]:
#no data for Ovarian 

# Colon

In [15]:
#colon doesn't have cd274 data

# Brca 

In [16]:
#NoCD274 data

# LUAD

In [17]:
luad = cptac.Luad()

                                         



In [18]:
luad_df = prep_df(luad)
luad_df.columns = luad_df.columns.droplevel(1)

columns = get_interacting_columns(luad_df)
df2= p.wrap_pearson_corr(luad_df,"CD274_proteomics",comparison_columns= columns,correction_method='fdr_bh', return_all = False)
df2



Unnamed: 0,Comparison,Correlation,P_value
8,CD274_proteomics,1.0,0.0
4,STAT1_proteomics,0.749028,1.698691e-18
12,GRB2_proteomics,0.446361,5.159203e-06
6,VAV1_proteomics,0.442933,6.211477e-06
23,LYN_proteomics,0.43843,2.162957e-05
3,MAPK3_proteomics,-0.395006,6.811141e-05
11,CD3E_proteomics,0.391655,7.946599e-05
15,CD163_proteomics,0.406302,8.572757e-05
7,JAK2_proteomics,0.38554,0.0001048407
9,CD8A_proteomics,0.37143,0.0001946995


In [19]:
df2['Cancer Type']='Luad'

df2.to_csv("csv_files/Luad_pdl1_trans_FDR.csv")

# HNSCC

In [20]:
Hnscc = cptac.Hnscc()

                                          



In [21]:
hnscc_df = prep_df(Hnscc)
columns = get_interacting_columns(hnscc_df)
df2= p.wrap_pearson_corr(hnscc_df,"CD274_proteomics",comparison_columns= columns,correction_method='fdr_bh', return_all = False)
df2



Unnamed: 0,Comparison,Correlation,P_value
4,CD274_proteomics,1.0,0.0
1,STAT1_proteomics,0.514903,5.412521e-07
14,HLA-DRB1_proteomics,0.472699,5.620201e-06
3,JAK2_proteomics,0.388572,0.0004019528
17,LCK_proteomics,0.361633,0.0007250789
19,LCK_proteomics,0.361633,0.0007250789
10,AIF1_proteomics,0.357437,0.000844236
2,VAV1_proteomics,0.351611,0.001039255
11,CD14_proteomics,0.346914,0.001225414
15,ZAP70_proteomics,0.34214,0.00144503


In [22]:
df2['Cancer Type']='Hnscc'

df2.to_csv("csv_files/Hnscc_pdl1_trans_FDR.csv")