In [1]:
import pandas as pd
import cptac
import statistics as st
import cptac.utils as ut
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import transform_pval 
from scipy import stats
import scipy

In [2]:
ccrcc = cptac.Ccrcc()
en = cptac.Endometrial()
luad = cptac.Luad()
hnscc  = cptac.Hnscc()
lscc = cptac.Lscc()

                                                



In [3]:
cancer_dict = {ccrcc:"ccrcc",en:"endometrial",luad:"luad", hnscc:"hnscc",lscc:"lscc"}
cancer_list = [ccrcc,en,luad,hnscc,lscc]

In [4]:
mutated_gene = 'TP53'

In [5]:
def get_omics_df(cancer):
    transcriptomics_df = cancer.get_transcriptomics(tissue_type='tumor')
    proteomics_df = cancer.get_proteomics(tissue_type='tumor')
    if isinstance(proteomics_df.columns, pd.MultiIndex):
        proteomics_df = proteomics_df.droplevel('Database_ID', axis = 1)
    if isinstance(transcriptomics_df.columns, pd.MultiIndex):
        transcriptomics_df = transcriptomics_df.droplevel('Database_ID', axis = 1)
    proteomics_df['patient_ID'] = proteomics_df.index
    transcriptomics_df['patient_ID'] = transcriptomics_df.index
    transcriptomics_df = transcriptomics_df.melt(id_vars='patient_ID', var_name = 'gene', value_name='transcriptomics')
    proteomics_df = proteomics_df.melt(id_vars='patient_ID', var_name = 'gene', value_name='proteomics')
    mutation_df = cancer.get_somatic_mutation()
    mutation_df = mutation_df[mutation_df.Gene == mutated_gene]
    omics_df = pd.merge(transcriptomics_df, proteomics_df, how = 'inner')
    omics_df['mutation_status'] = omics_df.patient_ID.isin(mutation_df.index)
    omics_df = omics_df.dropna()
    return omics_df

In [6]:
def get_corr_df(omics_df):
    mut_corrs = []
    mut_p_vals = []
    non_mut_corrs = []
    non_mut_p_vals = []
    corr_diffs = []
    corr_diff_pvals = []
    genes = []
    for gene in pd.unique(omics_df.gene):
        df = omics_df[omics_df.gene == gene]
        mut_df = df[df.mutation_status == True]
        non_mut_df = df[df.mutation_status == False]
        if len(mut_df) < 4 or len(non_mut_df) < 4:
            continue
        mut_r, mut_p = stats.pearsonr(mut_df.transcriptomics, mut_df.proteomics)
        non_mut_r, non_mut_p = stats.pearsonr(non_mut_df.transcriptomics, non_mut_df.proteomics)
        mut_corrs.append(mut_r)
        mut_p_vals.append(mut_p)
        non_mut_corrs.append(non_mut_r)
        non_mut_p_vals.append(non_mut_p)
        corr_diff = mut_r - non_mut_r
        corr_diffs.append(corr_diff)
        diff_p_val = transform_pval.compare_correlations(mut_r, non_mut_r, len(mut_df), len(non_mut_df))
        corr_diff_pvals.append(diff_p_val)
        genes.append(gene)
    correlation_df = pd.DataFrame({'gene': genes, 'mutated_correlation': mut_corrs, 'non_mutated_correlation': non_mut_corrs,
                                   'non_mutated_p_vals': non_mut_p_vals, 'mutated_p_vals': mut_p_vals,
                                   'delta_correlation': corr_diffs, 'delta_correlation_pval': corr_diff_pvals})
    return correlation_df

In [7]:
omics_dfs = []
for cancer in cancer_list:
    print(cancer_dict[cancer])
    df = get_omics_df(cancer)
    df['cancer'] = [cancer_dict[cancer]] * len(df)
    omics_dfs.append(df)
omics_df = pd.concat(omics_dfs)

ccrcc
endometrial
luad
hnscc
lscc


In [8]:
correlation_dfs = []
for df in omics_dfs:
    cancer = list(pd.unique(df['cancer']))
    print(cancer)
    corr_df = get_corr_df(df)
    corr_df['cancer'] =  cancer * len(corr_df)
    correlation_dfs.append(corr_df)
correlation_df = pd.concat(correlation_dfs)
correlation_df.to_csv('TP53_trans_effects.csv')

['ccrcc']


  cond2 = cond0 & (x <= _a)


['endometrial']
['luad']
['hnscc']
['lscc']


In [2]:
correlation_df

Unnamed: 0.1,Unnamed: 0,gene,mutated_correlation,non_mutated_correlation,non_mutated_p_vals,mutated_p_vals,delta_correlation,delta_correlation_pval,cancer
0,0,A1BG,0.104701,-0.092503,3.503288e-01,8.435229e-01,0.197204,0.735579,ccrcc
1,1,A1CF,0.949266,0.889170,2.059497e-36,3.795552e-03,0.060096,0.487926,ccrcc
2,2,A2M,-0.491574,-0.017783,8.578013e-01,3.220318e-01,-0.473791,0.374445,ccrcc
3,3,AAAS,-0.848880,0.162907,9.847157e-02,3.253011e-02,-1.011787,0.015614,ccrcc
4,4,AACS,0.738195,0.580735,1.031490e-10,9.384053e-02,0.157460,0.629146,ccrcc
...,...,...,...,...,...,...,...,...,...
49121,9673,ZWINT,0.794975,0.859211,1.407891e-01,2.749241e-17,-0.064236,0.838289,lscc
49122,9674,ZYG11B,0.364882,0.136414,8.268526e-01,1.509645e-04,0.228468,0.731297,lscc
49123,9675,ZYX,0.508387,0.923179,2.526303e-02,4.197810e-08,-0.414792,0.141643,lscc
49124,9676,ZZEF1,0.591263,0.874924,5.209261e-02,4.844711e-11,-0.283660,0.345211,lscc
