In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
#sys.path.append('C:\\Users\\brittany henderson\\GitHub\\GBM_for_CPTAC\\')
#import cis_functions as f

import cptac
import cptac.utils as u


In [2]:
def add_significance_col(results_df, num_comparisons):
    "bonferroni multiple hypothesis"""
    alpha = .05
    bonferroni_cutoff = alpha / num_comparisons
    
    pval = results_df['P_Value']
    if float(pval[0]) <= bonferroni_cutoff:
        results_df['Significant'] = True
    else: 
        results_df['Significant'] = False
    return results_df

def wrap_ttest_return_all(df, label_column, comparison_columns, total_tests, alpha=.05):
    try:
        #Verify precondition that label column exists and has exactly 2 unique values
        label_values = df[label_column].unique()
        if len(label_values) != 2:
            print("Incorrectly Formatted Dataframe! Label column must have exactly 2 unique values.")
            return None
        
        #Partition dataframe into two sets, one for each of the two unique values from the label column
        partition1 = df.loc[df[label_column] == label_values[0]]
        partition2 = df.loc[df[label_column] == label_values[1]]
        
        #Determine the number of real valued columns on which we will do t-tests
        #sites = len(comparison_columns.columns)
        number_of_comparisons = total_tests # ? phospho sites or num freq mut genes doing cis comp
        
        #Use a bonferroni correction to adjust for multiple testing by altering the p-value needed for acceptance
        bonferroni_cutoff = alpha/number_of_comparisons
        
        #Store all comparisons with their p-values in a dictionary
        all_comparisons = {}
        
        #Loop through each comparison column, perform the t-test, and determine whether it meets the significance cutoff'''
        for column in comparison_columns:
            stat, pval = scipy.stats.ttest_ind(partition1[column].dropna(axis=0), partition2[column].dropna(axis=0))
            all_comparisons[column] = pval
    
        #Sort dictionary to list smallest p-values first
        sorted_comparisons = sorted(all_comparisons.items(), key=lambda kv: kv[1])
        #Format as a dataframe and return to caller
        all_comparisons_df = pd.DataFrame.from_dict(sorted_comparisons)
        all_comparisons_df.columns = ['Comparison', 'P_Value']
        
                                               
        all_comparisons_sig_col = add_significance_col(all_comparisons_df, number_of_comparisons)
        return all_comparisons_sig_col
                                
    except:
        print("Incorrectly Formatted Dataframe!")
        return None


In [3]:
#cptac.download(dataset='ccrcc', version='0.0')
brain= cptac.Gbm()
desired_cutoff = 0.05
gene = 'RB1'

                                    

In [4]:
brain.list_data()

Below are the dataframes contained in this dataset:
	acetylproteomics
		Dimensions: (109, 18767)
	circular_RNA
		Dimensions: (99, 3670)
	clinical
		Dimensions: (115, 28)
	CNV
		Dimensions: (98, 19907)
	experimental_design
		Dimensions: (115, 8)
	gene_fusion
		Dimensions: (2090, 8)
	lipidomics
		Dimensions: (88, 582)
	metabolomics
		Dimensions: (87, 134)
	miRNA
		Dimensions: (87, 2883)
	phosphoproteomics
		Dimensions: (109, 101266)
	proteomics
		Dimensions: (109, 11141)
	somatic_mutation
		Dimensions: (5774, 3)
	transcriptomics
		Dimensions: (108, 60483)


In [5]:
clin_and_prot = brain.join_metadata_to_omics(metadata_df_name="clinical", omics_df_name="proteomics")
clin_and_prot = clin_and_prot.rename(columns = {"Patient_ID": "case"})
clin_and_prot = clin_and_prot.reset_index()
clin_and_prot



Name,Sample_ID,case,Sample_Tumor_Normal,age,gender,height,weight,bmi,country_of_origin,race,...,ZSCAN31_proteomics,ZSWIM8_proteomics,ZW10_proteomics,ZWILCH_proteomics,ZWINT_proteomics,ZXDC_proteomics,ZYG11B_proteomics,ZYX_proteomics,ZZEF1_proteomics,ZZZ3_proteomics
0,S001,C3L-00104,Tumor,58.0,Male,188.00,115.00,32.54,United States,White,...,,-0.047437,-0.105908,-0.347076,,0.459635,0.079452,-0.784983,-0.488441,0.167990
1,S002,C3L-00365,Tumor,59.0,Female,162.00,54.00,20.61,United States,White,...,,0.161975,-0.213093,0.235571,,0.107421,0.048724,0.138403,-0.290141,0.405037
2,S003,C3L-00674,Tumor,45.0,Male,193.00,102.00,27.44,,White,...,,-0.065534,-0.306717,0.879991,,0.883564,-0.172222,0.011876,-0.131889,-0.503581
3,S004,C3L-00677,Tumor,69.0,Female,164.00,52.00,19.32,,White,...,-0.062127,-0.254535,0.463653,0.580230,0.503044,-0.604986,0.178077,-0.720059,-0.150197,-0.268715
4,S005,C3L-01040,Tumor,77.0,Female,170.00,70.00,24.22,Russia,,...,,-0.092502,0.010639,-0.465079,,-0.500083,0.112651,1.004660,-0.230304,-0.102416
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,S111,PT-RN5K,Normal,56.0,Female,162.56,49.90,18.88,,White,...,,0.298664,-0.403930,-0.482597,0.240966,,0.609314,-1.293464,0.225910,-0.013637
111,S112,PT-RU72,Normal,59.0,Female,162.56,63.05,23.86,,Asian,...,,0.155988,0.002551,-0.834434,,-0.073908,0.637394,-0.795886,0.326046,-0.217014
112,S113,PT-UTHO,Normal,68.0,Male,182.88,62.87,18.80,,White,...,,0.016857,-0.171858,-0.462233,,1.278683,0.616285,-0.734952,0.339283,-0.419138
113,S114,PT-WVLH,Normal,58.0,Male,182.88,90.72,27.12,,White,...,0.450926,0.114686,-0.245149,-1.202774,-0.709850,-0.233022,0.732273,-1.158899,0.354608,-0.468881


In [6]:
#Read in files with TCGA subtypes
subtypes = pd.read_csv("/Users/Lindsey/Downloads/gbm_all_subtype_collections.2019-11-13.tsv", sep= "\t")

case_subtype = subtypes[["case",'rna_wang_cancer_cell_2017']] #only need subtype and case
#case_subtypes = case_subtype.drop(case_subtype.iloc[:, 4:], axis=1, inplace=True)
case_subtype = case_subtype.rename(columns = {"rna_wang_cancer_cell_2017": "TCGA_subtype"})
#case_subtype = subtypes.set_index("case")

subtypes.head()


Unnamed: 0,case,sample_type,nmf_consensus,nmf_cluster_membership,rna_wang_cancer_cell_2017,mRNA_stemness_index,dna_methyl,is_gcimp,immune,telomere,lipid,mirna,ancestry_prediction,ancestry_prediction_afr_prob,ancestry_prediction_amr_prob,ancestry_prediction_eas_prob,ancestry_prediction_eur_prob,ancestry_prediction_sas_prob,wxs_total_mutation,wgs_total_mutation
0,C3L-00104,tumor,nmf1,0.743,Proneural,0.678244,dm2,True,low,normal,,mi5,EUR,0.0,0.03,0.0,0.97,0.0,60.0,2632.0
1,C3L-00365,tumor,nmf3,0.614,Classical,0.681122,dm4,False,low,normal,TAG_enriched,mi1,EUR,0.0,0.02,0.0,0.98,0.0,57.0,7628.0
2,C3L-00674,tumor,nmf1,0.507,Mesenchymal,0.744635,dm5,False,high,normal,TAG_enriched,mi3,EUR,0.01,0.0,0.01,0.98,0.0,37.0,1233.0
3,C3L-00677,tumor,nmf1,0.536,Proneural,0.900896,dm5,False,low,long,TAG_enriched,mi5,EUR,0.02,0.11,0.0,0.85,0.02,925.0,16955.0
4,C3L-01040,tumor,nmf1,0.589,Classical,0.647288,dm5,False,low,normal,,mi1,EUR,0.0,0.03,0.0,0.97,0.0,85.0,4298.0


In [7]:
case_subtype = case_subtype.replace(np.nan, 'normal', regex=True)
case_subtype = case_subtype.set_index("case")

case_subtype


Unnamed: 0_level_0,TCGA_subtype
case,Unnamed: 1_level_1
C3L-00104,Proneural
C3L-00365,Classical
C3L-00674,Mesenchymal
C3L-00677,Proneural
C3L-01040,Classical
...,...
PT-RN5K,normal
PT-RU72,normal
PT-UTHO,normal
PT-WVLH,normal


In [8]:
# merge tgca subtypes with proteomics and clincal df
prot_subtype= clin_and_prot.merge(case_subtype, on='case')

prot_subtype = prot_subtype.set_index("case")

prot_subtype

Unnamed: 0_level_0,Sample_ID,Sample_Tumor_Normal,age,gender,height,weight,bmi,country_of_origin,race,ethnicity,...,ZSWIM8_proteomics,ZW10_proteomics,ZWILCH_proteomics,ZWINT_proteomics,ZXDC_proteomics,ZYG11B_proteomics,ZYX_proteomics,ZZEF1_proteomics,ZZZ3_proteomics,TCGA_subtype
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00104,S001,Tumor,58.0,Male,188.00,115.00,32.54,United States,White,Not-Hispanic or Latino,...,-0.047437,-0.105908,-0.347076,,0.459635,0.079452,-0.784983,-0.488441,0.167990,Proneural
C3L-00365,S002,Tumor,59.0,Female,162.00,54.00,20.61,United States,White,Not-Hispanic or Latino,...,0.161975,-0.213093,0.235571,,0.107421,0.048724,0.138403,-0.290141,0.405037,Classical
C3L-00674,S003,Tumor,45.0,Male,193.00,102.00,27.44,,White,Not-Hispanic or Latino,...,-0.065534,-0.306717,0.879991,,0.883564,-0.172222,0.011876,-0.131889,-0.503581,Mesenchymal
C3L-00677,S004,Tumor,69.0,Female,164.00,52.00,19.32,,White,Not-Hispanic or Latino,...,-0.254535,0.463653,0.580230,0.503044,-0.604986,0.178077,-0.720059,-0.150197,-0.268715,Proneural
C3L-01040,S005,Tumor,77.0,Female,170.00,70.00,24.22,Russia,,,...,-0.092502,0.010639,-0.465079,,-0.500083,0.112651,1.004660,-0.230304,-0.102416,Classical
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PT-RN5K,S111,Normal,56.0,Female,162.56,49.90,18.88,,White,Not Hispanic or Latino,...,0.298664,-0.403930,-0.482597,0.240966,,0.609314,-1.293464,0.225910,-0.013637,normal
PT-RU72,S112,Normal,59.0,Female,162.56,63.05,23.86,,Asian,,...,0.155988,0.002551,-0.834434,,-0.073908,0.637394,-0.795886,0.326046,-0.217014,normal
PT-UTHO,S113,Normal,68.0,Male,182.88,62.87,18.80,,White,,...,0.016857,-0.171858,-0.462233,,1.278683,0.616285,-0.734952,0.339283,-0.419138,normal
PT-WVLH,S114,Normal,58.0,Male,182.88,90.72,27.12,,White,,...,0.114686,-0.245149,-1.202774,-0.709850,-0.233022,0.732273,-1.158899,0.354608,-0.468881,normal


# Mesenchymal VS Normal

In [9]:
Mesenchymal = (prot_subtype.loc[prot_subtype['TCGA_subtype'] == 'Mesenchymal'])
Normal = (prot_subtype.loc[prot_subtype['Sample_Tumor_Normal'] == 'Normal'])
Normal.head()

Unnamed: 0_level_0,Sample_ID,Sample_Tumor_Normal,age,gender,height,weight,bmi,country_of_origin,race,ethnicity,...,ZSWIM8_proteomics,ZW10_proteomics,ZWILCH_proteomics,ZWINT_proteomics,ZXDC_proteomics,ZYG11B_proteomics,ZYX_proteomics,ZZEF1_proteomics,ZZZ3_proteomics,TCGA_subtype
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PT-NPJ7,S106,Normal,68.0,Female,160.02,74.84,29.23,,White,,...,0.33731,0.019538,-1.375146,,0.1641,0.767921,-1.1281,0.20948,-0.205739,normal
PT-P44H,S107,Normal,43.0,Male,177.8,112.04,35.44,,White,,...,0.335445,-0.199247,-0.798595,,0.517515,0.379683,-1.087567,0.210812,-0.176034,normal
PT-Q2AG,S108,Normal,42.0,Female,177.8,102.06,32.28,,White,,...,0.252516,-0.102542,,,,0.791386,-0.667928,0.053734,-0.22987,normal
PT-QVJO,S109,Normal,64.0,Female,170.18,55.79,19.26,,White,Not Hispanic or Latino,...,0.320193,-0.098267,-1.321314,,-1.330917,0.667393,-1.172195,0.30374,-0.556137,normal
PT-R55F,S110,Normal,55.0,Male,177.8,77.11,24.39,,White,Not Hispanic or Latino,...,0.326426,-0.109841,-0.676704,,0.080553,0.56337,-0.973314,0.478914,-0.415546,normal


In [10]:
Mesench_Normal = Mesenchymal.append(Normal)
Mesench_Normal.drop(Mesench_Normal.iloc[:, :28], axis=1, inplace=True)
#Mesench_Normal.drop(columns = ["case"])
#Mesench_Normal = Mesench_Normal.drop(columns=['age', 'gender',"height", "weight", "bmi", "country_of_origin","race","ethnicity"])
Mesench_Normal

Unnamed: 0_level_0,A1BG_proteomics,A2M_proteomics,AAAS_proteomics,AACS_proteomics,AADAT_proteomics,AAED1_proteomics,AAGAB_proteomics,AAK1_proteomics,AAMDC_proteomics,AAMP_proteomics,...,ZSWIM8_proteomics,ZW10_proteomics,ZWILCH_proteomics,ZWINT_proteomics,ZXDC_proteomics,ZYG11B_proteomics,ZYX_proteomics,ZZEF1_proteomics,ZZZ3_proteomics,TCGA_subtype
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00674,0.821991,1.09647,-0.094421,-0.106304,0.084578,0.176402,-0.248151,0.014061,-0.699773,-0.638462,...,-0.065534,-0.306717,0.879991,,0.883564,-0.172222,0.011876,-0.131889,-0.503581,Mesenchymal
C3L-01043,1.094879,0.769231,-0.011039,-0.152467,-0.457411,0.679201,-0.106294,-0.428006,-0.781859,0.039258,...,-0.026241,0.118676,0.444517,,0.835224,-0.271005,0.371339,-0.16864,0.144939,Mesenchymal
C3L-01045,-0.027903,-0.735991,0.125775,-0.285444,-0.620125,,0.166882,-0.110721,-0.74902,-0.041834,...,0.033878,0.051231,0.211483,-0.2871,-0.230124,-0.047399,0.761362,-0.376203,0.476902,Mesenchymal
C3L-01046,-0.375754,-0.037553,0.239725,0.152238,-0.505107,,-0.133864,0.191524,0.030741,-0.036606,...,0.176411,0.003886,0.720046,-0.295464,-0.543543,0.118416,0.963697,0.243561,0.013539,Mesenchymal
C3L-01049,-0.025968,-0.310086,0.087263,-0.163304,,-0.00938,0.091515,-0.393903,-0.217689,-0.195168,...,-0.053609,0.060704,0.635137,,0.199433,-0.3404,0.271082,-0.122117,0.177207,Mesenchymal
C3L-01142,-0.357994,-0.792861,0.193782,-0.605081,-0.073091,0.564998,0.061905,0.344687,0.224555,-0.172949,...,0.166675,0.055937,-0.081244,-0.306202,-0.537602,-0.516477,1.197168,0.059893,0.171441,Mesenchymal
C3L-02465,-0.038084,0.092629,-0.172513,0.180359,,0.739907,-0.067451,0.062823,0.138878,-0.290271,...,0.110463,-0.131966,0.244491,,0.369088,-0.120732,0.347082,-0.185031,0.064047,Mesenchymal
C3L-02704,-0.340327,-0.208718,-0.029955,-0.051373,0.2502,,-0.082754,0.026181,0.620461,0.299186,...,0.132509,0.206757,-0.737844,-0.182427,,0.196962,0.135143,-0.04189,0.133775,Mesenchymal
C3L-02705,0.58083,0.676467,0.093745,0.531997,1.156939,-0.425597,0.006803,0.075982,-0.041381,-0.110881,...,-0.068987,0.074188,0.012807,-0.380149,-0.281222,0.18739,0.133823,-0.10305,0.387455,Mesenchymal
C3L-02955,1.054407,1.215765,-0.047301,0.104051,-0.42223,0.532915,0.092956,0.535198,0.219513,-0.139699,...,0.170221,-0.281524,-0.579073,,-0.519946,0.215868,-0.354518,0.074586,0.178658,Mesenchymal


In [11]:
 prot_col_list = list(Mesench_Normal.columns)
prot_col_list.remove('TCGA_subtype')
#prot_col_list.remove('Patient_ID')
prot_col_list


['A1BG_proteomics',
 'A2M_proteomics',
 'AAAS_proteomics',
 'AACS_proteomics',
 'AADAT_proteomics',
 'AAED1_proteomics',
 'AAGAB_proteomics',
 'AAK1_proteomics',
 'AAMDC_proteomics',
 'AAMP_proteomics',
 'AAR2_proteomics',
 'AARS_proteomics',
 'AARS2_proteomics',
 'AARSD1_proteomics',
 'AASDHPPT_proteomics',
 'AASS_proteomics',
 'AATF_proteomics',
 'AATK_proteomics',
 'ABAT_proteomics',
 'ABCA1_proteomics',
 'ABCA2_proteomics',
 'ABCA3_proteomics',
 'ABCA5_proteomics',
 'ABCA8_proteomics',
 'ABCB1_proteomics',
 'ABCB10_proteomics',
 'ABCB6_proteomics',
 'ABCB7_proteomics',
 'ABCB8_proteomics',
 'ABCB9_proteomics',
 'ABCC1_proteomics',
 'ABCC3_proteomics',
 'ABCC4_proteomics',
 'ABCD1_proteomics',
 'ABCD2_proteomics',
 'ABCD3_proteomics',
 'ABCD4_proteomics',
 'ABCE1_proteomics',
 'ABCF1_proteomics',
 'ABCF2_proteomics',
 'ABCF3_proteomics',
 'ABCG1_proteomics',
 'ABCG2_proteomics',
 'ABHD10_proteomics',
 'ABHD11_proteomics',
 'ABHD12_proteomics',
 'ABHD13_proteomics',
 'ABHD14A-ACY1_pr

In [12]:
#Mesench_Normal = Mesench_Normal[['AATF_proteomics', 'ABCA8_proteomics','TCGA_subtype']]
#prot_col_list = ['AATF_proteomics', 'ABCA8_proteomics']





In [13]:
#Call wrap_ttest, pass in formatted dataframe

prot_all_comparisons = u.wrap_ttest(Mesench_Normal, 'TCGA_subtype', prot_col_list)
prot_num_comparisons = len(prot_col_list)
print("Number of comparisons:", prot_num_comparisons)
prot_bonferroni_cutoff = .05 / prot_num_comparisons
print("Bonferroni cutoff = ", prot_bonferroni_cutoff)
print("Logged Bonferroni cutoff = ", np.log10(prot_bonferroni_cutoff))


  **kwargs)
  ret = ret.dtype.type(ret / rcount)


5292 significant comparisons!
Number of comparisons: 11141
Bonferroni cutoff =  4.487927475092003e-06
Logged Bonferroni cutoff =  -5.347954169894016


In [14]:
prot_all_comparisons = prot_all_comparisons.dropna(axis=0)
prot_sig_comparisons = prot_all_comparisons.loc[prot_all_comparisons['P_Value'] <= prot_bonferroni_cutoff]
print("Number of significant Proteomics comparisons: ", len(prot_sig_comparisons), '\n')

if len(prot_sig_comparisons) > 0:
    print(prot_sig_comparisons)

prot_sig_comparisons_Mesench_Normal = prot_sig_comparisons

Number of significant Proteomics comparisons:  5292 

               Comparison       P_Value
0        PI4KA_proteomics  1.106746e-35
1         WDR7_proteomics  2.413637e-33
2       ANKS1B_proteomics  4.412780e-33
3        DMXL2_proteomics  1.011996e-32
4         MADD_proteomics  2.561057e-32
...                   ...           ...
5287  C12orf73_proteomics  4.344286e-06
5288     KDM4A_proteomics  4.350479e-06
5289      AGO2_proteomics  4.353342e-06
5290    ZNF618_proteomics  4.381527e-06
5291     CYTH4_proteomics  4.432903e-06

[5292 rows x 2 columns]


In [25]:
prot_sig_comparisons_Mesench_Normal.set_index("Comparison")
prot_sig_comparisons_Mesench_Normal = prot_sig_comparisons_Mesench_Normal.rename(columns = {"P_Value": "P_Value_MN"})
prot_sig_comparisons_Mesench_Normal

Unnamed: 0,Comparison,P_Value_MN
0,PI4KA_proteomics,1.106746e-35
1,WDR7_proteomics,2.413637e-33
2,ANKS1B_proteomics,4.412780e-33
3,DMXL2_proteomics,1.011996e-32
4,MADD_proteomics,2.561057e-32
...,...,...
5287,C12orf73_proteomics,4.344286e-06
5288,KDM4A_proteomics,4.350479e-06
5289,AGO2_proteomics,4.353342e-06
5290,ZNF618_proteomics,4.381527e-06


# Mesenchymal and Proneural

In [16]:
Mesenchymal = (prot_subtype.loc[prot_subtype['TCGA_subtype'] == 'Mesenchymal'])
Proneural = (prot_subtype.loc[prot_subtype['TCGA_subtype'] == 'Proneural'])
Proneural.head()

Unnamed: 0_level_0,Sample_ID,Sample_Tumor_Normal,age,gender,height,weight,bmi,country_of_origin,race,ethnicity,...,ZSWIM8_proteomics,ZW10_proteomics,ZWILCH_proteomics,ZWINT_proteomics,ZXDC_proteomics,ZYG11B_proteomics,ZYX_proteomics,ZZEF1_proteomics,ZZZ3_proteomics,TCGA_subtype
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00104,S001,Tumor,58.0,Male,188.0,115.0,32.54,United States,White,Not-Hispanic or Latino,...,-0.047437,-0.105908,-0.347076,,0.459635,0.079452,-0.784983,-0.488441,0.16799,Proneural
C3L-00677,S004,Tumor,69.0,Female,164.0,52.0,19.32,,White,Not-Hispanic or Latino,...,-0.254535,0.463653,0.58023,0.503044,-0.604986,0.178077,-0.720059,-0.150197,-0.268715,Proneural
C3L-01146,S013,Tumor,59.0,Male,175.0,96.0,31.35,Russia,,,...,-0.176744,0.134078,-0.143252,,-0.425889,-0.008761,0.277088,-0.296844,-0.054669,Proneural
C3L-01149,S014,Tumor,48.0,Male,173.0,91.0,30.41,Russia,,,...,-0.191377,0.402318,2.018452,,0.504515,0.732463,-0.613257,-0.115444,0.566719,Proneural
C3L-01157,S018,Tumor,66.0,Female,162.0,75.0,28.58,Russia,,,...,-0.059525,0.087678,-0.080362,0.013261,0.472166,0.33546,0.150904,-0.010514,-0.230476,Proneural


In [17]:
Mesench_Proneural = Mesenchymal.append(Proneural)
Mesench_Proneural.drop(Mesench_Proneural.iloc[:, :28], axis=1, inplace=True)
#Mesench_Normal.drop(columns = ["case"])
#Mesench_Normal = Mesench_Normal.drop(columns=['age', 'gender',"height", "weight", "bmi", "country_of_origin","race","ethnicity"])
Mesench_Proneural

Unnamed: 0_level_0,A1BG_proteomics,A2M_proteomics,AAAS_proteomics,AACS_proteomics,AADAT_proteomics,AAED1_proteomics,AAGAB_proteomics,AAK1_proteomics,AAMDC_proteomics,AAMP_proteomics,...,ZSWIM8_proteomics,ZW10_proteomics,ZWILCH_proteomics,ZWINT_proteomics,ZXDC_proteomics,ZYG11B_proteomics,ZYX_proteomics,ZZEF1_proteomics,ZZZ3_proteomics,TCGA_subtype
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00674,0.821991,1.096470,-0.094421,-0.106304,0.084578,0.176402,-0.248151,0.014061,-0.699773,-0.638462,...,-0.065534,-0.306717,0.879991,,0.883564,-0.172222,0.011876,-0.131889,-0.503581,Mesenchymal
C3L-01043,1.094879,0.769231,-0.011039,-0.152467,-0.457411,0.679201,-0.106294,-0.428006,-0.781859,0.039258,...,-0.026241,0.118676,0.444517,,0.835224,-0.271005,0.371339,-0.168640,0.144939,Mesenchymal
C3L-01045,-0.027903,-0.735991,0.125775,-0.285444,-0.620125,,0.166882,-0.110721,-0.749020,-0.041834,...,0.033878,0.051231,0.211483,-0.287100,-0.230124,-0.047399,0.761362,-0.376203,0.476902,Mesenchymal
C3L-01046,-0.375754,-0.037553,0.239725,0.152238,-0.505107,,-0.133864,0.191524,0.030741,-0.036606,...,0.176411,0.003886,0.720046,-0.295464,-0.543543,0.118416,0.963697,0.243561,0.013539,Mesenchymal
C3L-01049,-0.025968,-0.310086,0.087263,-0.163304,,-0.009380,0.091515,-0.393903,-0.217689,-0.195168,...,-0.053609,0.060704,0.635137,,0.199433,-0.340400,0.271082,-0.122117,0.177207,Mesenchymal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-02785,0.292303,0.691145,0.369580,0.196114,0.028247,0.878197,-0.152089,0.457016,0.017247,-0.838311,...,0.144039,0.046498,0.667878,,-0.068733,-0.198584,-0.307852,-0.092755,0.376837,Proneural
C3N-02788,-0.327487,-0.340301,-0.381604,0.176850,0.056150,-0.381644,-0.249617,0.737844,0.114329,-0.200811,...,0.196207,-0.076559,-0.723503,-0.915281,0.171156,-0.126355,-0.806935,0.017285,0.148011,Proneural
C3N-03088,0.278510,0.836600,-0.519413,0.408439,0.150180,,-0.201109,1.047781,0.235729,-0.264215,...,0.008570,-0.247399,-0.764748,-0.347858,-0.133599,-0.148703,0.000129,0.253013,0.284701,Proneural
C3N-03186,0.616640,0.767029,-0.231897,0.050841,0.304359,-0.384608,-0.205284,0.513575,0.151769,-0.307037,...,-0.110846,-0.069605,-0.183921,,-0.681170,0.095601,-0.096716,0.049811,-0.409252,Proneural


In [18]:
prot_col_list = list(Mesench_Proneural.columns)
prot_col_list.remove('TCGA_subtype')
#prot_col_list.remove('Patient_ID')
prot_col_list

['A1BG_proteomics',
 'A2M_proteomics',
 'AAAS_proteomics',
 'AACS_proteomics',
 'AADAT_proteomics',
 'AAED1_proteomics',
 'AAGAB_proteomics',
 'AAK1_proteomics',
 'AAMDC_proteomics',
 'AAMP_proteomics',
 'AAR2_proteomics',
 'AARS_proteomics',
 'AARS2_proteomics',
 'AARSD1_proteomics',
 'AASDHPPT_proteomics',
 'AASS_proteomics',
 'AATF_proteomics',
 'AATK_proteomics',
 'ABAT_proteomics',
 'ABCA1_proteomics',
 'ABCA2_proteomics',
 'ABCA3_proteomics',
 'ABCA5_proteomics',
 'ABCA8_proteomics',
 'ABCB1_proteomics',
 'ABCB10_proteomics',
 'ABCB6_proteomics',
 'ABCB7_proteomics',
 'ABCB8_proteomics',
 'ABCB9_proteomics',
 'ABCC1_proteomics',
 'ABCC3_proteomics',
 'ABCC4_proteomics',
 'ABCD1_proteomics',
 'ABCD2_proteomics',
 'ABCD3_proteomics',
 'ABCD4_proteomics',
 'ABCE1_proteomics',
 'ABCF1_proteomics',
 'ABCF2_proteomics',
 'ABCF3_proteomics',
 'ABCG1_proteomics',
 'ABCG2_proteomics',
 'ABHD10_proteomics',
 'ABHD11_proteomics',
 'ABHD12_proteomics',
 'ABHD13_proteomics',
 'ABHD14A-ACY1_pr

In [19]:
#Call wrap_ttest, pass in formatted dataframe

prot_all_comparisons = u.wrap_ttest(Mesench_Proneural, 'TCGA_subtype', prot_col_list)
prot_num_comparisons = len(prot_col_list)
print("Number of comparisons:", prot_num_comparisons)
prot_bonferroni_cutoff = .05 / prot_num_comparisons
print("Bonferroni cutoff = ", prot_bonferroni_cutoff)
print("Logged Bonferroni cutoff = ", np.log10(prot_bonferroni_cutoff))

961 significant comparisons!
Number of comparisons: 11141
Bonferroni cutoff =  4.487927475092003e-06
Logged Bonferroni cutoff =  -5.347954169894016


In [20]:
prot_all_comparisons = prot_all_comparisons.dropna(axis=0)
prot_sig_comparisons = prot_all_comparisons.loc[prot_all_comparisons['P_Value'] <= prot_bonferroni_cutoff]
print("Number of significant Proteomics comparisons: ", len(prot_sig_comparisons), '\n')

if len(prot_sig_comparisons) > 0:
    print(prot_sig_comparisons)
    
prot_sig_comparisons_Mesench_Proneural = prot_sig_comparisons

Number of significant Proteomics comparisons:  961 

             Comparison       P_Value
0     PODXL2_proteomics  2.047362e-14
1      BASP1_proteomics  2.654212e-14
2     GPRIN1_proteomics  7.094843e-14
3      PHF24_proteomics  7.994311e-14
4       SCAI_proteomics  1.492919e-13
..                  ...           ...
956  CNTNAP1_proteomics  4.392254e-06
957    HSPB1_proteomics  4.421559e-06
958    NOVA1_proteomics  4.426351e-06
959   THSD7A_proteomics  4.466200e-06
960   PARP14_proteomics  4.483369e-06

[961 rows x 2 columns]


In [21]:
prot_sig_comparisons_Mesench_Proneural.set_index("Comparison")
prot_sig_comparisons_Mesench_Proneural = prot_sig_comparisons_Mesench_Proneural.rename(columns = {"P_Value": "P_Value_MP"})
prot_sig_comparisons_Mesench_Proneural

Unnamed: 0,Comparison,P_Value_MP
0,PODXL2_proteomics,2.047362e-14
1,BASP1_proteomics,2.654212e-14
2,GPRIN1_proteomics,7.094843e-14
3,PHF24_proteomics,7.994311e-14
4,SCAI_proteomics,1.492919e-13
...,...,...
956,CNTNAP1_proteomics,4.392254e-06
957,HSPB1_proteomics,4.421559e-06
958,NOVA1_proteomics,4.426351e-06
959,THSD7A_proteomics,4.466200e-06


In [26]:
common_sig = prot_sig_comparisons_Mesench_Proneural.merge(prot_sig_comparisons_Mesench_Normal, on='Comparison')
common_sig

Unnamed: 0,Comparison,P_Value_MP,P_Value_MN
0,PODXL2_proteomics,2.047362e-14,2.036752e-21
1,BASP1_proteomics,2.654212e-14,5.600847e-23
2,GPRIN1_proteomics,7.094843e-14,8.851253e-26
3,PHF24_proteomics,7.994311e-14,1.350124e-26
4,SCAI_proteomics,1.492919e-13,3.388492e-32
...,...,...,...
909,CD47_proteomics,4.385279e-06,1.121581e-20
910,CNTNAP1_proteomics,4.392254e-06,1.404129e-18
911,HSPB1_proteomics,4.421559e-06,1.511483e-09
912,THSD7A_proteomics,4.466200e-06,1.580185e-15


In [23]:
#Calculate mean
Mesenchymal["PODXL2_proteomics"].mean()


-0.4907263216620136

In [24]:
#check to see if mean is up in normal
Normal["PODXL2_proteomics"].mean()

1.3879396382535378

In [30]:
lipidomics = brain.get_lipidomics()
lipidomics.head()

Name,anandamide(18:1)_positive,carnitine(12:0)_positive,carnitine(14:0)_positive,carnitine(14:1)_positive,carnitine(16:0)_positive,carnitine(16:1)_positive,CE(18:1)_positive,CE(18:2)_positive,CE(20:1)_positive,CE(20:2)_positive,...,PS(18:1/18:2)_negative,PS(18:1/20:1)_negative,PS(18:1/20:4)_negative,PS(18:1/22:6)_negative,PS(20:1/22:6)_negative,PS(20:4/22:6)_negative,PS(22:6/0:0)_negative,PS(22:6/22:6)_negative,PE(14:1/17:0)_IS_negative,PI(14:1/17:0)_IS_negative
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S002,14.419297,18.319,20.96765,18.938109,23.405924,20.812666,19.85043,21.634197,15.60689,18.044304,...,15.294246,20.092292,16.545489,16.71725,16.667748,12.51573,15.783338,9.722623,21.711144,18.58376
S003,11.661378,15.677124,17.888617,15.775275,21.147627,17.926037,20.384138,21.267571,17.290307,18.796077,...,15.579272,19.788314,17.149018,16.753116,15.169005,13.905648,15.109497,14.620079,20.512115,17.405063
S004,13.365002,15.426545,18.409173,16.279855,20.975075,18.162399,19.43311,21.531124,13.728536,17.4315,...,14.654211,19.145592,16.51275,16.831545,15.891739,13.057211,15.558409,13.812289,21.46546,18.480971
S006,12.916069,16.824948,18.47643,18.200602,21.360148,19.052867,19.783253,21.506716,14.686046,18.038284,...,14.909021,20.163139,16.673917,16.171299,17.031722,12.852903,13.899564,9.712713,20.912731,17.878919
S007,13.433232,14.70846,17.829488,15.410324,21.583465,17.287451,20.507001,22.323218,16.327105,19.084873,...,16.103807,18.437989,15.77658,16.458791,16.163562,12.574311,15.460876,10.992096,21.901111,18.733129
