# Combined Proteomic Comparison 


## Step 1: Library Imports

Run this cell to import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 

import cptac
import cptac.utils as u

In [2]:
def format_pval_annotation(pval_symbol, x1, x2, line_start = .05, line_height=.05):
    # for manual adjustment to pval annotations
    
    y, h = line_start, line_height
    plt.plot([x1, x1, x2, x2], #draw horizontal line
             [y, y+h, y+h, y], #vertical line
             lw=1.5, color= '.3')
    plt.text((x1+x2)*.5, # half between x coord
             y+h, pval_symbol, horizontalalignment='center', verticalalignment='bottom', color = "black")

# used in pval_annotation
def get_pval_symbol(pval_df):
    if pval_df.iloc[0:,2].bool() == True:
        pval_symbol = '*'
    else:
        pval_symbol = 'ns'
    return pval_symbol

def pval_annotation(pval_df, plotted_df):
    # pval symbol
    num_pvals = len(pval_df)
    omics_col = plotted_df.columns[0]
    

    # annotation for Mutated and Wiltype
    if num_pvals == 1:
        mut_pval = pval_df.loc[pval_df.index == "Mutated/Wildtype"]
        mut_pval_symbol = get_pval_symbol(pval_df)
        
        x1, x2 = 0, 1   # columns (first column: 0, see plt.xticks())
        y, h = plotted_df[omics_col].max() + .05, .05     
        plt.plot([x1, x1, x2, x2], #draw horizontal line
                 [y, y+h, y+h, y], #vertical line
                 lw=1.5, color= '.3')
        plt.text((x1+x2)*.5, # half between x coord
                 y+h, mut_pval_symbol, horizontalalignment='center', verticalalignment='bottom', color = "black")
    
    # annotation for Missense, Truncation and Wildtype
    elif num_pvals == 2:
        # Missense
        miss_pval = pval_df.loc[pval_df.index == "Missense/Wildtype"]
        miss_pval_symbol = get_pval_symbol(miss_pval)
    
        x1, x2 = 0, 1   # columns (first column: 0, see plt.xticks())
        y, h = plotted_df[omics_col].max() + .05, .05     
        plt.plot([x1, x1, x2, x2], #draw horizontal line
                 [y, y+h, y+h, y], #vertical line
                 lw=1.5, color= '.3')
        plt.text((x1+x2)*.5, # half between x coord
                 y+h, miss_pval_symbol, horizontalalignment='center', verticalalignment='bottom', color = "black")
        
        # Truncation 
        trunc_pval = pval_df.loc[pval_df.index == "Truncation/Wildtype"]
        trunc_pval_symbol = get_pval_symbol(trunc_pval)
        
        x3, x4 = 0, 2   # columns (first column: 0, see plt.xticks())
        y2, h2 = plotted_df[omics_col].max() + .15, .05     
        plt.plot([x3, x3, x4, x4], #draw horizontal line
                 [y2, y2+h2, y2+h2, y2], #vertical line
                 lw=1.5, color= '.3')
        plt.text((x3+x4)*.5, # half between x coord
                 y2+h2, trunc_pval_symbol, horizontalalignment='center', verticalalignment='bottom', color = "black")


## Step 2: Find the mutation frequency of the gene of interest

In [3]:
brain = cptac.Gbm()
desired_cutoff = 0.05
gene = 'RB1'

                                    

## Step 3: cis comparisons for omics 

Determine if the DNA mutation has an effect on the omics measurement. In order to do this, we have a few steps in code. These steps are found in the format_cis_comparison_data function.
1. get a table with both the omics and mutation data for tumors
2. get a binary column from the mutation data to separate our samples
3. format data frame to be used in the T-test
4. send data to the T-test.

The format_cis_comparison_data does the first 3 steps (for dataframes with multi-indexes).

# Proteomics RB1 cis comparison

Proteomics: Mutated and Wildtype rb1

In [4]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "RB1") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
RB1_mut_status = tumors.drop(columns_to_drop, axis = 1)
RB1_mut_status



Name,RB1_proteomics,RB1_binary_mutations,TP53_binary_mutations
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S001,-1.799180,Mutated,Mutated
S002,-1.071307,Mutated,Mutated
S003,0.154265,Wildtype,Wildtype
S004,-0.884636,Mutated,Mutated
S005,0.041888,Wildtype,Wildtype
...,...,...,...
S100,-0.210103,Wildtype,Wildtype
S101,1.331462,Wildtype,Wildtype
S102,-0.003929,Wildtype,Wildtype
S104,-0.008174,Wildtype,Wildtype


# CDK2 

In [5]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "CDK2") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
CDK2_mut_status = tumors.drop(columns_to_drop, axis = 1)
CDK2_mut_status



Name,CDK2_proteomics,RB1_binary_mutations,TP53_binary_mutations
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S001,0.507714,Mutated,Mutated
S002,0.764774,Mutated,Mutated
S003,0.415454,Wildtype,Wildtype
S004,0.157479,Mutated,Mutated
S005,-0.423898,Wildtype,Wildtype
...,...,...,...
S100,0.386990,Wildtype,Wildtype
S101,0.678933,Wildtype,Wildtype
S102,-0.089902,Wildtype,Wildtype
S104,-0.181600,Wildtype,Wildtype


# CDK6

In [6]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "CDK6") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
CDK6_mut_status = tumors.drop(columns_to_drop, axis = 1)
CDK6_mut_status



Name,CDK6_proteomics,RB1_binary_mutations,TP53_binary_mutations
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S001,-1.011444,Mutated,Mutated
S002,-0.907507,Mutated,Mutated
S003,0.080839,Wildtype,Wildtype
S004,-1.560394,Mutated,Mutated
S005,-0.964315,Wildtype,Wildtype
...,...,...,...
S100,0.744616,Wildtype,Wildtype
S101,1.802314,Wildtype,Wildtype
S102,0.354357,Wildtype,Wildtype
S104,0.531084,Wildtype,Wildtype


# CDKN2A

In [7]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "CDKN2A") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
CDKN2A_mut_status = tumors.drop(columns_to_drop, axis = 1)
CDKN2A_mut_status



Name,CDKN2A_proteomics,RB1_binary_mutations,TP53_binary_mutations
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S001,1.249326,Mutated,Mutated
S002,1.042734,Mutated,Mutated
S003,0.046073,Wildtype,Wildtype
S004,1.275257,Mutated,Mutated
S005,1.482385,Wildtype,Wildtype
...,...,...,...
S100,0.444150,Wildtype,Wildtype
S101,-0.515793,Wildtype,Wildtype
S102,-0.486051,Wildtype,Wildtype
S104,-0.248658,Wildtype,Wildtype


# TFDP1

In [8]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "TFDP1") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
TFDP1_mut_status = tumors.drop(columns_to_drop, axis = 1)
#print("rb1 mutated: ",len(MORF_mut_status.loc[MORF_mut_status['RB1_binary_mutations'] == 'Mutated']))
#print("tp53 mutated: ",len(MORF_mut_status.loc[MORF_mut_status['TP53_binary_mutations'] == 'Mutated']))



# MORF4L2

In [9]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "MORF4L2") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
MORF4L2_mut_status = tumors.drop(columns_to_drop, axis = 1)
print("rb1 mutated: ",len(MORF4L2_mut_status.loc[MORF4L2_mut_status['RB1_binary_mutations'] == 'Mutated']))
print("tp53 mutated: ",len(MORF4L2_mut_status.loc[MORF4L2_mut_status['TP53_binary_mutations'] == 'Mutated']))



rb1 mutated:  10
tp53 mutated:  32





# CCND2

In [10]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "CCND2") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
CCND2_mut_status = tumors.drop(columns_to_drop, axis = 1)
#print("rb1 mutated: ",len(mut_status.loc[mut_status['RB1_binary_mutations'] == 'Mutated']))
#print("tp53 mutated: ",len(mut_status.loc[mut_status['TP53_binary_mutations'] == 'Mutated']))



# TP53

In [11]:
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = ['RB1','TP53'], omics_df_name = 'proteomics', omics_genes = "TP53") # single col level 

# Step 2 - Create binary column 
prot_and_mutations['RB1_binary_mutations'] = np.where(
            prot_and_mutations['RB1_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

# Step 2 - Create binary column 
prot_and_mutations['TP53_binary_mutations'] = np.where(
            prot_and_mutations['TP53_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')
prot_and_mutations

# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
tumors = prot_and_mutations[prot_and_mutations.Sample_Status == 'Tumor'] #drop Normal samples
columns_to_drop = ["RB1_Mutation", "RB1_Location", "RB1_Mutation_Status",
                   "TP53_Mutation", "TP53_Location", "TP53_Mutation_Status","Sample_Status"]
TP53_mut_status = tumors.drop(columns_to_drop, axis = 1)



In [12]:
for ind, row in TP53_mut_status.iterrows():
    if row['RB1_binary_mutations'] == 'Mutated' and row['TP53_binary_mutations'] == 'Mutated':
        TP53_mut_status.loc[ind, 'Mutation_Exclusivity'] = 'Both'
        
    elif row['RB1_binary_mutations'] == 'Mutated' and row['TP53_binary_mutations'] != 'Mutated':
        TP53_mut_status.loc[ind, 'Mutation_Exclusivity'] = 'RB1_Only'
        
    elif row['RB1_binary_mutations'] != 'Mutated' and row['TP53_binary_mutations'] == 'Mutated':
        TP53_mut_status.loc[ind, 'Mutation_Exclusivity'] = 'TP53_Only'
        
    else:
        TP53_mut_status.loc[ind, 'Mutation_Exclusivity'] = 'Neither'
TP53_mut_status

Name,TP53_proteomics,RB1_binary_mutations,TP53_binary_mutations,Mutation_Exclusivity
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
S001,0.044573,Mutated,Mutated,Both
S002,0.117856,Mutated,Mutated,Both
S003,-0.120887,Wildtype,Wildtype,Neither
S004,1.832557,Mutated,Mutated,Both
S005,-0.288708,Wildtype,Wildtype,Neither
...,...,...,...,...
S100,-0.343235,Wildtype,Wildtype,Neither
S101,,Wildtype,Wildtype,Neither
S102,,Wildtype,Wildtype,Neither
S104,-1.062906,Wildtype,Wildtype,Neither


In [13]:
# order neither, tp53 only is like wt,  rb1 only and both like mutated 

# Visualize 

In [14]:
RB1_all = pd.concat([CCND2_mut_status, TP53_mut_status])
#([RB1_mut_status, CDK2_mut_status, CDK6_mut_status, CDKN2A_mut_status, TFDP1_mut_status, MORF4L2_mut_status,TP53_mut_status])
#RB1_all = pd.merge(RB1_all, TP53_prot_miss_trunc_wildtype, on = "Sample_ID")
RB1_alldf = pd.melt(RB1_all, id_vars = ['Mutation_Exclusivity'], value_name = "omics")
RB1_alldf

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,Mutation_Exclusivity,variable,omics
0,,CCND2_proteomics,0.0207138
1,,CCND2_proteomics,0.0415693
2,,CCND2_proteomics,0.42506
3,,CCND2_proteomics,0.0578722
4,,CCND2_proteomics,1.15014
...,...,...,...
787,Neither,TP53_proteomics,-0.343235
788,Neither,TP53_proteomics,
789,Neither,TP53_proteomics,
790,Neither,TP53_proteomics,-1.06291


merge TP53 mutation type to label graph 

In [15]:
# Visualize
plt.rcParams['figure.figsize']=(14,8) #size of plot
boxplot = sns.boxplot(x='Mutation_Exclusivity', y="omics", 
                        data = RB1_alldf, showfliers = False)    
#boxplot.set_title("GBM RB1 Mutation Type effect on Proteomics Abundance")
#boxplot = sns.stripplot(x='variable', y="omics", hue = 'Mutation_Exclusivity', data = RB1_alldf, jitter = True, 
 #                          color = ".3", hue_order = ['Neither', 'RB1_Only', 'TP53_Only' 'Both'], dodge = True)

boxplot.set(xlabel = "interacting genes", ylabel = 'omics')

#boxplot.set(xlabel = "interacting genes", ylabel = 'value')

plt.show()
plt.clf()
plt.close()


ValueError: Neither the `x` nor `y` variable appears to be numeric.

In [None]:
# Visualize
plt.rcParams['figure.figsize']=(14,8) #size of plot
boxplot = sns.boxplot(x='Gene', y="value", hue = 'binary_mutations', 
                         hue_order = ['Wildtype', 'Truncation'], data = RB1_alldf, showfliers = False)    
boxplot.set_title("GBM RB1 Mutation Type effect on Proteomics Abundance")
boxplot = sns.stripplot(x='Gene', y="value", hue = 'binary_mutations', data = RB1_alldf, jitter = True, 
                           color = ".3", hue_order = ['Wildtype', 'Truncation'], dodge = True)
boxplot.set(xlabel = "interacting genes", ylabel = 'value')

plt.show()

plt.clf()
plt.close()

In [None]:
# Create Dataframe 

RB1_cis_mut = RB1_prot_miss_trunc_wildtype.assign(Gene= "RB1")
CDK2_trans_mut = CDK2_prot_miss_trunc_wildtype.assign(Gene = "CDK2")
CDK6_trans_mut = CDK6_prot_miss_trunc_wildtype.assign(Gene = "CDK6")
CDKN2A_trans_mut = CDKN2A_prot_miss_trunc_wildtype.assign(Gene = "CDKN2A")
TFDP1_trans_mut = TFDP1_prot_miss_trunc_wildtype.assign(Gene = "TFDP1")
CCND2_trans_mut = CCND2_prot_miss_trunc_wildtype.assign(Gene = "CCND2")
TP53_trans_mut = TP53_prot_miss_trunc_wildtype.assign(Gene = "TP53")

RB1_all = pd.concat([RB1_cis_mut, CDK2_trans_mut, CDK6_trans_mut, CDKN2A_trans_mut,CCND2_trans_mut, TFDP1_trans_mut])
RB1_alldf = pd.melt(RB1_all, id_vars = ["Gene", 'binary_mutations'])
RB1_alldf

In [None]:
# Visualize
plt.rcParams['figure.figsize']=(14,8) #size of plot
boxplot = sns.boxplot(x='Gene', y="value", hue = 'binary_mutations', 
                         hue_order = ['Wildtype', 'Truncation'], data = RB1_alldf, showfliers = False)    
boxplot.set_title("GBM RB1 Mutation Type effect on Proteomics Abundance")
boxplot = sns.stripplot(x='Gene', y="value", hue = 'binary_mutations', data = RB1_alldf, jitter = True, 
                           color = ".3", hue_order = ['Wildtype', 'Truncation'], dodge = True)
boxplot.set(xlabel = "interacting genes", ylabel = 'value')

plt.show()

plt.clf()
plt.close()

# Extra checks 

looking for E2F1 in transcriptomics and proteomics 

In [None]:
#num_freq_mut_genes = len(freq_mut)
num_freq_mut_genes = 20
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest - drop 
transrcpt_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'transcriptomics', omics_genes = "E2F1")
reduced_transcript_mutations = brain.reduce_multiindex(transrcpt_mutations, levels_to_drop=1) #single col labels

# Step 2 & 3 - Get binary column and format
transcript_mut_wt = format_mutated_wt_comparison(reduced_transcript_mutations)
transcript_mut_wt

In [None]:
# Step 4 - T-test
trans_col_list = ['E2F1_transcriptomics']
trans_col = 'E2F1_transcriptomics'

print("Doing t-test comparison for mutation status")
transcript_results = wrap_ttest_return_all(transcript_mut_wt, 'binary_mutations', trans_col_list, num_freq_mut_genes)
transcript_results = transcript_results.rename(index = {0: "Mutated/Wildtype"})
transcript_results

In [None]:

# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'proteomics', omics_genes = "E2F1") # single col level


# Step 2 & 3 - Get binary column and format
prot_mut_wt = format_mutated_wt_comparison(prot_and_mutations)
prot_mut_wt.loc[prot_mut_wt ["binary_mutations"] == "Mutated"]

In [None]:
#num_freq_mut_genes = len(freq_mut) 
num_freq_mut_genes = 20
prot_col_list = ["E2F1"+'_proteomics']
prot_col = "E2F1"+"_proteomics"

# Step 4 - T-test comparing means of mutated vs wildtype effect on cis omics
print("Doing t-test comparison for mutated and wildtype")
prot_results = wrap_ttest_return_all(prot_mut_wt, 'binary_mutations', prot_col_list, num_freq_mut_genes)
prot_results = prot_results.rename(index = {0: "Mutated/Wildtype"})
prot_results

In [None]:
df = brain.get_proteomics()
E2_cols = [col for col in df.columns if "E2" in col]
print (E2_cols)

In [None]:
df["E2F3"]

looking for CCNE1

In [None]:
#num_freq_mut_genes = len(freq_mut)
num_freq_mut_genes = 20
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest - drop 
transrcpt_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'transcriptomics', omics_genes = "CCNE1")
reduced_transcript_mutations = brain.reduce_multiindex(transrcpt_mutations, levels_to_drop=1) #single col labels

# Step 2 & 3 - Get binary column and format
transcript_mut_wt = format_mutated_wt_comparison(reduced_transcript_mutations)
transcript_mut_wt

In [None]:
# Step 4 - T-test
trans_col_list = ['CCNE1_transcriptomics']
trans_col = 'CCNE1_transcriptomics'

print("Doing t-test comparison for mutation status")
transcript_results = wrap_ttest_return_all(transcript_mut_wt, 'binary_mutations', trans_col_list, num_freq_mut_genes)
transcript_results = transcript_results.rename(index = {0: "Mutated/Wildtype"})
transcript_results

In [None]:

# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'proteomics', omics_genes = "CCNE1") # single col level


# Step 2 & 3 - Get binary column and format
prot_mut_wt = format_mutated_wt_comparison(prot_and_mutations)
prot_mut_wt.loc[prot_mut_wt ["binary_mutations"] == "Mutated"]

In [None]:
#num_freq_mut_genes = len(freq_mut) 
num_freq_mut_genes = 20
prot_col_list = ["CCNE1"+'_proteomics']
prot_col = "CCNE1"+"_proteomics"

# Step 4 - T-test comparing means of mutated vs wildtype effect on cis omics
print("Doing t-test comparison for mutated and wildtype")
prot_results = wrap_ttest_return_all(prot_mut_wt, 'binary_mutations', prot_col_list, num_freq_mut_genes)
prot_results = prot_results.rename(index = {0: "Mutated/Wildtype"})
prot_results

In [None]:
df = brain.get_proteomics()
CCN_cols = [col for col in df.columns if "CCN" in col]
print (CCN_cols)


In [None]:
df.columns.get_loc("CCND1")
df.iloc[0:4,1465:1480]

could not find E2F1 or CCNE1 in proteomics, but were significant in transcriptomics. 

In [None]:
#num_freq_mut_genes = len(freq_mut)
num_freq_mut_genes = 20
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest - drop 
transrcpt_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'transcriptomics', omics_genes = "CDK2")
reduced_transcript_mutations = brain.reduce_multiindex(transrcpt_mutations, levels_to_drop=1) #single col labels

# Step 2 & 3 - Get binary column and format
transcript_mut_wt = format_mutated_wt_comparison(reduced_transcript_mutations)
transcript_mut_wt

In [None]:
#num_freq_mut_genes = len(freq_mut) 
num_freq_mut_genes = 20
prot_col_list = ["CDK4"+'_proteomics']
prot_col = "CDK4"+"_proteomics"

# Step 4 - T-test comparing means of mutated vs wildtype effect on cis omics
print("Doing t-test comparison for mutated and wildtype")
prot_results = wrap_ttest_return_all(CDK2_prot_mut_wt, 'binary_mutations', prot_col_list, num_freq_mut_genes)
prot_results = prot_results.rename(index = {0: "Mutated/Wildtype"})
prot_results

# BIRC5

In [None]:
#num_freq_mut_genes = len(freq_mut)
num_freq_mut_genes = 20
# Step 1 - Create dataframe in order to do comparisons with wrap_ttest - drop 
transrcpt_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'transcriptomics', omics_genes = "BIRC5")
reduced_transcript_mutations = brain.reduce_multiindex(transrcpt_mutations, levels_to_drop=1) #single col labels

# Step 2 & 3 - Get binary column and format
transcript_mut_wt = format_mutated_wt_comparison(reduced_transcript_mutations)
transcript_mut_wt

In [None]:
# Step 4 - T-test
trans_col_list = ['BIRC5_transcriptomics']
trans_col = 'BIRC5_transcriptomics'

print("Doing t-test comparison for mutation status")
transcript_results = wrap_ttest_return_all(transcript_mut_wt, 'binary_mutations', trans_col_list, num_freq_mut_genes)
transcript_results = transcript_results.rename(index = {0: "Mutated/Wildtype"})
transcript_results

In [None]:

# Step 1 - Create dataframe in order to do comparisons with wrap_ttest  
prot_and_mutations = brain.join_omics_to_mutations(
    mutations_genes = gene, omics_df_name = 'proteomics', omics_genes = "BIRC5") # single col level


# Step 2 & 3 - Get binary column and format
prot_mut_wt = format_mutated_wt_comparison(prot_and_mutations)
prot_mut_wt.loc[prot_mut_wt ["binary_mutations"] == "Mutated"]