# Step 2 Cis effect 

This notebook graphs the cis effect of copy number and proteomics

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
#sys.path.append('C:\\Users\\brittany henderson\\GitHub\\GBM_for_CPTAC\\')
#import cis_functions as f

import cptac
import cptac.utils as u
import plot_utils as p

  import pandas.util.testing as tm


In [2]:
def myTrunc(theNumber, theDigits):

    myDigits = 10 ** theDigits
    return (int(theNumber * myDigits) / myDigits)

In [3]:
def plot_pearson(df1,x_axis, y_axis, hue = "none", title = "", ra_stats = False, show_plot = True, save_file_name = "file_name"):
    #format dfs 
    if hue != "none":   
        df1_subset = df1[[x_axis,y_axis,hue]]
        df1_subset = df1_subset.dropna(axis=0, how="any")

        x1 = df1_subset[[x_axis]].values
        y1 = df1_subset[[y_axis]].values
        x1 = x1[:,0]
        y1 = y1[:,0]
        corr, pval = scipy.stats.pearsonr(x1,y1)

        sns.set(style="darkgrid")
        plt.rcParams["figure.figsize"] = (30,22)
        graph = sns.lmplot(x= x_axis, y= y_axis, data=df1_subset, hue= hue, fit_reg=False)
        sns.regplot(x=x1, y=y1, data=df1_subset,scatter = False)
        graph.set(title = title)
    if hue == "none":
        
        df1_subset = df1[[x_axis,y_axis]]
        df1_subset = df1_subset.dropna(axis=0, how="any")

        x1 = df1_subset[[x_axis]].values
        y1 = df1_subset[[y_axis]].values
        x1 = x1[:,0]
        y1 = y1[:,0]
        corr, pval = scipy.stats.pearsonr(x1,y1)

        sns.set(style="darkgrid")
        graph = sns.lmplot(x= x_axis, y= y_axis, data=df1_subset, fit_reg=False)
        sns.regplot(x=x1, y=y1, data=df1_subset,scatter = False)
        plt.title(label = title, fontsize = 30)
        plt.xlabel('EGFR CNV', fontsize=20)
        plt.ylabel('EGFR Proteomics', fontsize=20)
        plt.xticks(fontsize = 17)
        plt.yticks(fontsize = 17)
    
    if ra_stats:
        pval = myTrunc(pval,31)
        corr = myTrunc(corr,3)
        plt.text(-0.2,3.8, "Correlation: %s\nPvalue: %s"%(corr,pval))
        
    if save_file_name != "file_name":
        plt.tight_layout()
        plt.subplots_adjust(top=0.88)
        plt.savefig(save_file_name+'.png', dpi = 300)
    if show_plot:
        plt.show()
        plt.clf()
        plt.close()

In [None]:
#Load cptac data sets
brain = cptac.Gbm()
kidney = cptac.Ccrcc()
Ovar = cptac.Ovarian()
colon = cptac.Colon()
brca = cptac.Brca()
luad = cptac.Luad()
Hnscc = cptac.Hnscc()
Lscc = cptac.Lscc()

Checking that ccrcc index is up-to-date...



Loading brca v3.1.1..                       

# Step 1 Create Data Frame

For each cancer type create a df that combines proteomics and CNV. Then merge clinical dataframe and filter df to only have tumor samples. 

# Step 2  Call Plotting Function

# GBM

In [None]:
prot_CNV = brain.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
mutation = brain.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]

In [None]:
plot_pearson(prot_CNV_Tumor ,"EGFR_CNV","EGFR_proteomics", hue = "none", title = "Glioblastoma", ra_stats = False ,show_plot = True, save_file_name = "png_files/Step2Cis_Gbm")

# Kidney 

In [None]:
prot_CNV = kidney.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
prot_CNV.columns = prot_CNV.columns.droplevel(1) #drop multi-index level
mutation = kidney.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics", hue ="none", ra_stats = False, show_plot = True, title = "Renal", save_file_name = "png_files/Step2Cis_Kidney")

# Ovarian 

In [None]:
prot_CNV = Ovar.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
prot_CNV.columns = prot_CNV.columns.droplevel(1) #drop multi-index level
mutation = Ovar.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics", hue = "none", ra_stats = False,title = "Ovarian", show_plot = True, save_file_name = "png_files/Step2Cis_Ovarian")

# Colon

In [None]:
prot_CNV = colon.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
mutation = colon.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics",hue = "none", ra_stats = False, title = "Colon", show_plot = True, save_file_name = "png_files/Step2Cis_Colon")


# Brca 

In [None]:
prot_CNV = brca.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
prot_CNV.columns = prot_CNV.columns.droplevel(1) #drop multi-index level
mutation = brca.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics", hue = "none", ra_stats = False, title = "Breast", show_plot = True, save_file_name = "png_files/Step2Cis_Brca")


# LUAD

In [None]:
prot_CNV = luad.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
prot_CNV.columns = prot_CNV.columns.droplevel(1) #drop multi-index level
mutation = luad.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics", hue = 'none', title = "Lung (LUAD)", ra_stats = False, show_plot = True, save_file_name = "png_files/Step2Cis_Luad")


# HNSCC

In [None]:
prot_CNV = Hnscc.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
mutation = Hnscc.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics", hue = "none", title = "Head and Neck", ra_stats = False, show_plot = True, save_file_name = "png_files/Step2Cis_Hnscc")


# LSCC

In [None]:
prot_CNV = Lscc.join_omics_to_omics(df1_name="proteomics", df2_name="CNV")
prot_CNV.columns = prot_CNV.columns.droplevel(1) #drop multi-index level
mutation = Lscc.get_clinical()
mergedDf = prot_CNV.merge(mutation, on="Patient_ID")
prot_CNV_Tumor = mergedDf.loc[mergedDf['Sample_Tumor_Normal'] == "Tumor"]


In [None]:
plot_pearson(prot_CNV_Tumor,"EGFR_CNV","EGFR_proteomics", hue = "none", title = "Lung (LSCC)" , ra_stats = False, show_plot = True, save_file_name = "png_files/Step2Cis_Lscc")
