In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import cptac.endometrial as en

Welcome to the cptac data service package. Available datasets may be
viewed using cptac.list_data(). In order to access a specific data
set, import a cptac subfolder using either 'import cptac.dataset' or
'from cptac import dataset'.
******
Version: 0.4.1
******
You have loaded the cptac endometrial dataset. To view available
dataframes, use cptac.endometrial.list_data(). To view available
functions for accessing and manipulating the dataframes, use
cptac.endometrial.list_api().
endometrial data version: 2.1

Loading Dictionary...
Loading cptac endometrial data:
Loading acetylproteomics data...
Loading clinical data...
Loading CNA data...
Loading miRNA data...
Loading phosphoproteomics_gene data...
Loading phosphoproteomics_site data...
Loading proteomics data...
Loading somatic data...
Loading somatic_binary data...
Loading transcriptomics_circular data...
Loading transcriptomics_linear data...

 ******PLEASE READ******
CPTAC is a community resource project and data are made available

In [3]:
# Version 5: Probably the best version so far since not everything is done in one function
# Works for Age, Race and BMI as expected. Functions

def binarizeCutOff(my_list, cut_off, replace_low, replace_high):
    return [ replace_low if (x < cut_off) else replace_high if x >= cut_off else x for x in my_list ]

def binarizeRange(my_list, low_bar, high_bar, in_range='In_Range', out_of_range='Out_Of_Range'):
    return [ in_range if (x > low_bar and x < high_bar) else out_of_range  
            if (x < low_bar or x > high_bar) else x for x in my_list ]

def binarizeCategorical(my_list, option1, option1_list, option2, option2_list):
    return [ option1 if (x in option1_list) else option2 if (x in option2_list) else x for x in my_list ]

In [None]:
# Example Uses for the functions one at a time

#BMI: 
new_df['BMI'] = binarizeRange(binary_columns['BMI'], 18.5, 2)

#Age: 
new_df['Age'] = binarizeCutOff(binary_columns['Age'], 50 , 'Young', 'Old')

#Race: 
new_df['Race'] = binarizeCategorical(binary_columns['Race'], 'European', ['White'], 'Not_European', 
                                     ['Black or African American', 'Not Reported', 'Asian'])



#Histologic_Grade_FIGO: 
new_df['Histologic_Grade_FIGO'] = binarizeCategorical(binary_columns['Histologic_Grade_FIGO'], 
                                                      'Grade1', ['FIGO Grade 1'], 'Not_Grade1', 
                                                      ['FIGO Grade 2', 'FIGO Grade 3'])

In [4]:
# Save data in variables

clinical = en.get_clinical()
transcriptomics = en.get_transcriptomics()

In [6]:
# How to use the functions to create a whole dataframe
binary_columns = clinical[['BMI', 'Age', 'Race']]
new_df = binary_columns.copy()
new_df = new_df.assign(**{'BMI':binarizeRange(binary_columns['BMI'], 18.5, 25), 
                          'Age':binarizeCutOff(binary_columns['Age'], 50, 'Young', 'Old'), 
                          'Race':binarizeCategoricalV2(binary_columns['Race'], 'European', ['White'], 'Not_European',
                                                       ['Black or African American', 'Not Reported', 'Asian'])
                         }
                      )

print(new_df)

print(new_df['BMI'].value_counts()) 
print('\n')
print(new_df['Age'].value_counts()) 
print('\n')
print(new_df['Race'].value_counts()) 

                    BMI    Age          Race
Sample_ID                                   
S001       Out_Of_Range    Old      European
S002       Out_Of_Range    Old      European
S003       Out_Of_Range    Old      European
S005       Out_Of_Range    Old      European
S006           In_Range    Old      European
S007       Out_Of_Range    Old      European
S008       Out_Of_Range    Old      European
S009           In_Range    Old      European
S010       Out_Of_Range    Old      European
S011       Out_Of_Range    Old      European
S012       Out_Of_Range    Old      European
S014       Out_Of_Range  Young      European
S016       Out_Of_Range    Old      European
S017       Out_Of_Range    Old      European
S018       Out_Of_Range  Young      European
S019       Out_Of_Range    Old      European
S020       Out_Of_Range    Old      European
S021       Out_Of_Range    Old  Not_European
S022           In_Range    Old      European
S023       Out_Of_Range    Old      European
S024      