# Table 1 using GP2 release 6 data
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 2-MAY-2024

## Notebook Overview
- Generating table 1 and investigation of the GP2 release 6 data

In [None]:
# Use the os package to interact with the environment
import os
import sys

# Bring in Pandas for Dataframe functionality
import pandas as pd
from functools import reduce

# Bring some visualization functionality
import seaborn as sns

# numpy for basics
import numpy as np

# Use StringIO for working with file contents
from io import StringIO

# Enable IPython to display matplotlib graphs
import matplotlib.pyplot as plt
%matplotlib inline

# Enable interaction with the FireCloud API
from firecloud import api as fapi

# Import the iPython HTML rendering for displaying links to Google Cloud Console
from IPython.core.display import display, HTML

# Import urllib modules for building URLs to Google Cloud Console
import urllib.parse

# BigQuery for querying data
from google.cloud import bigquery

!pip install tableone
from tableone import TableOne

!pip install openpyxl

from openpyxl.workbook import Workbook


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
# Utility routine for printing a shell command before executing it
def shell_do(command):
    print(f'Executing: {command}', file=sys.stderr)
    !$command

def shell_return(command):
    print(f'Executing: {command}', file=sys.stderr)
    output = !$command
    return '\n'.join(output)

# Utility routine for printing a query before executing it
def bq_query(query):
    print(f'Executing: {query}', file=sys.stderr)
    return pd.read_gbq(query, project_id=BILLING_PROJECT_ID, dialect='standard')

# Utility routine for display a message and a link
def display_html_link(description, link_text, url):
    html = f'''





    {description}
    {url}">{link_text}.

    '''

    display(HTML(html))

# Utility routines for reading files from Google Cloud Storage
def gcs_read_file(path):
    """Return the contents of a file in GCS"""
    contents = !gsutil -u {BILLING_PROJECT_ID} cat {path}
    return '\n'.join(contents)

def gcs_read_csv(path, sep=None):
    """Return a DataFrame from the contents of a delimited file in GCS"""
    return pd.read_csv(StringIO(gcs_read_file(path)), sep=sep, engine='python')

# Utility routine for displaying a message and link to Cloud Console
def link_to_cloud_console_gcs(description, link_text, gcs_path):
    url = '{}?{}'.format(
        os.path.join('https://console.cloud.google.com/storage/browser',
                     gcs_path.replace("gs://","")),
        urllib.parse.urlencode({'userProject': BILLING_PROJECT_ID}))

    display_html_link(description, link_text, url)

In [None]:
# Set up billing project and data path variables
BILLING_PROJECT_ID = os.environ['GOOGLE_PROJECT']
WORKSPACE_NAMESPACE = os.environ['WORKSPACE_NAMESPACE']
WORKSPACE_NAME = os.environ['WORKSPACE_NAME']
WORKSPACE_BUCKET = os.environ['WORKSPACE_BUCKET']

WORKSPACE_ATTRIBUTES = fapi.get_workspace(WORKSPACE_NAMESPACE, WORKSPACE_NAME).json().get('workspace',{}).get('attributes',{})

## Print the information to check we are in the proper release and billing
## This will be different for you, the user, depending on the billing project your workspace is on
print('Billing and Workspace')
print(f'Workspace Name: {WORKSPACE_NAME}')
print(f'Billing Project: {BILLING_PROJECT_ID}')
print(f'Workspace Bucket, where you can upload and download data: {WORKSPACE_BUCKET}')
print('')


In [None]:
# Create a folder on your workspace to copy to
print("Making a working directory")
WORK_DIR = '/home/jupyter/multi-ancestry-PRS_demo/PRS_test/table_one_directory/'
shell_do(f'mkdir -p {WORK_DIR}')

Making a working directory


Executing: mkdir -p /home/jupyter/multi-ancestry-PRS_demo/PRS_test/table_one_directory/


In [None]:
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_AAC_release5_unrelated.psam {WORK_DIR}')

In [None]:
#then get the file with the clinical info

shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{GP2_DATA}master_key_release5_final.csv {WORK_DIR}')


In [None]:
#now we will read these files that we just moved to our vm with python

AAC_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_AAC_release6_unrelated.psam', sep='\t')
AAC_sample.head()


In [None]:
#now the clinical file too

clin = pd.read_csv(f'{WORK_DIR}/table_one_directory/master_key_release6_final.csv')
clin.head()


In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

AAC_phenos = pd.merge(AAC_sample, clin, left_on='#FID', right_on='GP2sampleID')
AAC_phenos.head()


In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

AAC_phenos["phenotype"].value_counts(dropna=False)

Control    1018
PD          289
Other        11
Name: phenotype, dtype: int64

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_AAC_df = AAC_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_AAC_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'phenotype':'PHENO',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_AAC_df.head()

In [None]:
gp2_table_one_AAC = gp2_data_toKeep_AAC_df.copy()

gp2_table_one_AAC.groupby(['ANCESTRY', 'SEX'])['PHENO'].value_counts()

ANCESTRY  SEX  PHENO  
AAC       1    Control    380
               PD         158
               Other        7
          2    Control    638
               PD         131
               Other        4
Name: PHENO, dtype: int64

In [None]:
gp2_table_one_AAC = gp2_data_toKeep_AAC_df.copy()

gp2_table_one_AAC.groupby(['PHENO'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
PHENO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,9.0,52.888889,15.806996,23.0,44.0,57.0,65.0,74.0
PD,158.0,55.969937,14.594857,14.0,46.0,57.0,66.0,91.0


In [None]:
gp2_table_one_AAC.groupby(['PHENO'])['AGE'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
PHENO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,934.0,64.261028,12.922719,18.0,59.0,66.0,72.0,91.0
Other,2.0,71.497057,12.865181,62.4,66.948528,71.497057,76.045585,80.594114
PD,219.0,62.702182,12.773803,18.0,55.0,64.0,71.0,95.0


In [None]:
gp2_table_one_AAC.groupby(['PHENO', 'SEX'])['AAO'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
PHENO,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,5.0,57.0,11.045361,39.0,57.0,57.0,65.0,67.0
Other,2,4.0,47.75,20.982135,23.0,38.75,47.0,56.0,74.0
PD,1,84.0,56.076389,14.606217,20.0,44.75,56.0,66.0,91.0
PD,2,74.0,55.849099,14.680694,14.0,46.25,59.0,66.75,90.0


In [None]:
gp2_table_one_AAC.groupby(['PHENO', 'SEX'])['AGE'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
PHENO,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,345.0,63.248116,13.69913,18.0,59.0,65.0,72.0,89.0
Control,2,589.0,64.854329,12.419088,18.0,60.0,66.0,72.0,91.0
Other,1,2.0,71.497057,12.865181,62.4,66.948528,71.497057,76.045585,80.594114
Other,2,0.0,,,,,,,
PD,1,121.0,62.431243,13.196813,23.0,54.0,64.0,70.0,91.0
PD,2,98.0,63.036708,12.290519,18.0,56.0,64.0,72.0,95.0


In [None]:
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_AFR_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_AJ_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_AMR_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_CAS_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_EAS_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_EUR_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_MDE_release6_unrelated.psam {WORK_DIR}')
shell_do(f'gsutil -u {BILLING_PROJECT_ID} -m cp gs://{WORKSPACE_BUCKET}/to_table_1/chr1_SAS_release6_unrelated.psam {WORK_DIR}')

In [None]:
#now we will read these files that we just moved to our vm with python

AFR_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_AFR_release6_unrelated.psam', sep='\t')
AFR_sample.head()


In [None]:
#now we will read these files that we just moved to our vm with python

AJ_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_AJ_release6_unrelated.psam', sep='\t')
AJ_sample.head()

In [None]:
#now we will read these files that we just moved to our vm with python

AMR_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_AMR_release6_unrelated.psam', sep='\t')
AMR_sample.head()

In [None]:
#now we will read these files that we just moved to our vm with python

CAS_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_CAS_release6_unrelated.psam', sep='\t')
CAS_sample.head()

In [None]:
#now we will read these files that we just moved to our vm with python

EAS_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_EAS_release5_unrelated.psam', sep='\t')
EAS_sample.head()

In [None]:
#now we will read these files that we just moved to our vm with python

EUR_sample = pd.read_csv(f'{WORK_DIR}/table_one_directory/chr1_EUR_release5_unrelated.psam', sep='\t')
EUR_sample.head()

In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

AFR_phenos = pd.merge(AFR_sample, clin, left_on='#FID', right_on='GP2sampleID')
AFR_phenos.head()

In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

AJ_phenos = pd.merge(AJ_sample, clin, left_on='#FID', right_on='GP2sampleID')
AJ_phenos.head()

In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

AMR_phenos = pd.merge(AMR_sample, clin, left_on='#FID', right_on='GP2sampleID')
AMR_phenos.head()

In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

CAS_phenos = pd.merge(CAS_sample, clin, left_on='#FID', right_on='GP2sampleID')
CAS_phenos.head()

In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

EAS_phenos = pd.merge(EAS_sample, clin, left_on='#FID', right_on='GP2sampleID')
EAS_phenos.head()

In [None]:
#now we will merge! both these datasets have different names for the ID columns so we have to specify that in python with 'left_on' and 'right_on'

EUR_phenos = pd.merge(EUR_sample, clin, left_on='#FID', right_on='GP2sampleID')
EUR_phenos.head()

In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

AFR_phenos["phenotype"].value_counts(dropna=False)

Control    1436
PD          853
Other         9
Name: phenotype, dtype: int64

In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

AJ_phenos["phenotype"].value_counts(dropna=False)

PD         733
Control    385
Other      383
Name: phenotype, dtype: int64

In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

AMR_phenos["phenotype"].value_counts(dropna=False)

PD         337
Control    199
Other       22
Name: phenotype, dtype: int64

In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

CAS_phenos["phenotype"].value_counts(dropna=False)

Control    272
PD         189
Other        4
Name: phenotype, dtype: int64

In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

EAS_phenos["phenotype"].value_counts(dropna=False)

Control    1939
PD          864
Other        13
Name: phenotype, dtype: int64

In [None]:
#now that we have only the african americans merged with the clinical info, we can count how many case control other etc there are

EUR_phenos["phenotype"].value_counts(dropna=False)

PD         9090
Control    4872
Other      1067
Name: phenotype, dtype: int64

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_AFR_df = AFR_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_AFR_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_AFR_df.tail()

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_AJ_df = AJ_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_AJ_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_AJ_df.tail()

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_AMR_df = AMR_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_AMR_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_AMR_df.tail()

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_CAS_df =CAS_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_CAS_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_CAS_df.tail()

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_EAS_df = EAS_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_EAS_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_EAS_df.tail()

In [None]:
## For example, let's keep only a handful of information
gp2_data_toKeep_EUR_df = EUR_phenos[['GP2sampleID',
                                      'phenotype', 'sex_for_qc', 'age','age_of_onset',
                                      'label', 'region_for_qc']].copy()

# Rename columns
gp2_data_toKeep_EUR_df.rename(columns = {'GP2sampleID': 'IID',
                                     'sex_for_qc':'SEX',
                                     'age':'AGE',
                                      'age_of_onset':'AAO',
                                     'label':'ANCESTRY',
                                     'region_for_qc': 'REGION'
                                      }, inplace = True)




# Have a look at the abbreviated dataframe
gp2_data_toKeep_EUR_df.tail()

In [None]:
gp2_table_one_AFR = gp2_data_toKeep_AFR_df.copy()

gp2_table_one_AFR.groupby(['ANCESTRY', 'SEX'])['phenotype'].value_counts()

ANCESTRY  SEX  phenotype
AFR       1    Control      748
               PD           585
               Other          4
          2    Control      688
               PD           268
               Other          5
Name: phenotype, dtype: int64

In [None]:
gp2_table_one_AJ = gp2_data_toKeep_AJ_df.copy()

gp2_table_one_AJ.groupby(['ANCESTRY', 'SEX'])['phenotype'].value_counts()

ANCESTRY  SEX  phenotype
AJ        1    PD           532
               Control      241
               Other        196
          2    PD           201
               Other        187
               Control      144
Name: phenotype, dtype: int64

In [None]:
gp2_table_one_AMR = gp2_data_toKeep_AMR_df.copy()

gp2_table_one_AMR.groupby(['ANCESTRY', 'SEX'])['phenotype'].value_counts()

ANCESTRY  SEX  phenotype
AMR       1    PD           210
               Control       97
               Other         11
          2    PD           127
               Control      102
               Other         11
Name: phenotype, dtype: int64

In [None]:
gp2_table_one_CAS = gp2_data_toKeep_CAS_df.copy()

gp2_table_one_CAS.groupby(['ANCESTRY', 'SEX'])['phenotype'].value_counts()

ANCESTRY  SEX  phenotype
CAS       1    PD           102
               Control       97
               Other          3
          2    Control      175
               PD            87
               Other          1
Name: phenotype, dtype: int64

In [None]:
gp2_table_one_EAS = gp2_data_toKeep_EAS_df.copy()

gp2_table_one_EAS.groupby(['ANCESTRY', 'SEX'])['phenotype'].value_counts()

ANCESTRY  SEX  phenotype
EAS       1    Control      1524
               PD            508
               Other           9
          2    Control       415
               PD            356
               Other           4
Name: phenotype, dtype: int64

In [None]:
gp2_table_one_EUR = gp2_data_toKeep_EUR_df.copy()

gp2_table_one_EUR.groupby(['ANCESTRY', 'SEX'])['phenotype'].value_counts()

ANCESTRY  SEX  phenotype
EUR       1    PD           5896
               Control      2784
               Other         624
          2    PD           3194
               Control      2088
               Other         443
Name: phenotype, dtype: int64

In [None]:
gp2_table_one_AFR.groupby(['phenotype', 'SEX'])['AAO'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,0.0,,,,,,,
Other,2,4.0,50.25,13.865425,35.0,40.25,52.0,62.0,62.0
PD,1,61.0,56.590164,12.079427,27.0,49.0,55.0,64.0,84.0
PD,2,58.0,56.810345,11.451587,24.0,50.0,57.0,64.75,77.0


In [None]:
gp2_table_one_AFR.groupby(['phenotype', 'SEX'])['AGE'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,302.0,63.390728,16.048504,19.0,58.0,67.0,74.0,88.0
Control,2,434.0,62.209677,16.809483,18.0,59.0,66.0,73.0,92.0
Other,1,0.0,,,,,,,
Other,2,1.0,80.673511,,80.673511,80.673511,80.673511,80.673511,80.673511
PD,1,68.0,61.3743,11.446153,38.0,53.75,59.5,67.75,89.0
PD,2,59.0,61.152542,11.836259,26.0,55.0,61.0,68.5,84.0


In [None]:
gp2_table_one_AFR.groupby(['phenotype'])['AGE'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,736.0,62.694293,16.500679,18.0,58.0,66.0,73.0,92.0
Other,1.0,80.673511,,80.673511,80.673511,80.673511,80.673511,80.673511
PD,127.0,61.271279,11.583084,26.0,55.0,60.0,68.5,89.0


In [None]:
gp2_table_one_AFR.groupby(['phenotype'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,4.0,50.25,13.865425,35.0,40.25,52.0,62.0,62.0
PD,119.0,56.697479,11.728247,24.0,50.0,56.0,64.0,84.0


In [None]:
gp2_table_one_AMR.groupby(['phenotype', 'SEX'])['AAO'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,6.0,61.833333,10.303721,41.0,64.25,65.5,66.75,68.0
Other,2,7.0,52.714286,15.691976,29.0,43.0,54.0,62.5,75.0
PD,1,102.0,48.303105,13.745915,16.0,39.0,47.0,57.75,78.0
PD,2,71.0,45.093897,12.976321,13.0,36.5,45.0,51.5,76.416667


In [None]:
gp2_table_one_AMR.groupby(['phenotype', 'SEX'])['AGE'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,68.0,59.894118,13.109344,21.0,56.0,60.5,67.25,82.0
Control,2,73.0,61.008219,12.448536,19.0,57.0,60.0,67.0,101.0
Other,1,4.0,64.431674,10.719754,51.3,59.4,64.7,69.731674,77.026694
Other,2,1.0,49.4,,49.4,49.4,49.4,49.4,49.4
PD,1,164.0,57.958792,12.029975,22.0,49.975,59.0,67.0,83.0
PD,2,110.0,57.680861,12.23536,26.0,48.25,57.5,66.0,85.0


In [None]:
gp2_table_one_AMR.groupby(['phenotype'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,13.0,56.923077,13.774745,29.0,47.0,64.0,66.0,75.0
PD,173.0,46.986031,13.490316,13.0,38.0,46.0,54.0,78.0


In [None]:
gp2_table_one_AMR.groupby(['phenotype'])['AGE'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,141.0,60.470922,12.737888,19.0,56.0,60.0,67.0,101.0
Other,5.0,61.425339,11.461897,49.4,51.3,62.1,67.3,77.026694
PD,274.0,57.847214,12.091264,22.0,49.0,59.0,66.0,85.0


In [None]:
gp2_table_one_CAS.groupby(['phenotype', 'SEX'])['AAO'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,1.0,65.0,,65.0,65.0,65.0,65.0,65.0
Other,2,1.0,61.0,,61.0,61.0,61.0,61.0,61.0
PD,1,49.0,55.810204,13.046491,23.0,46.0,56.0,67.0,78.0
PD,2,34.0,59.0,10.976559,40.0,51.25,59.0,68.25,78.0


In [None]:
gp2_table_one_CAS.groupby(['phenotype', 'SEX'])['AGE'].describe()


Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,96.0,49.65625,13.870702,19.0,48.0,53.0,59.0,75.0
Control,2,172.0,50.901163,11.980813,19.0,49.0,54.0,58.25,76.0
Other,1,1.0,52.3,,52.3,52.3,52.3,52.3,52.3
Other,2,0.0,,,,,,,
PD,1,88.0,60.956818,11.691374,26.0,55.75,61.5,70.0,84.0
PD,2,82.0,64.302439,9.834394,29.0,59.0,64.0,72.0,81.0


In [None]:
gp2_table_one_CAS.groupby(['phenotype'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,2.0,63.0,2.828427,61.0,62.0,63.0,64.0,65.0
PD,83.0,57.116867,12.272494,23.0,49.5,57.0,67.35,78.0


In [None]:
gp2_table_one_CAS.groupby(['phenotype'])['AGE'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,268.0,50.455224,12.678458,19.0,48.0,53.0,59.0,76.0
Other,1.0,52.3,,52.3,52.3,52.3,52.3,52.3
PD,170.0,62.570588,10.933076,26.0,57.0,63.0,70.75,84.0


In [None]:
gp2_table_one_EAS.groupby(['phenotype', 'SEX'])['AAO'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,6.0,66.5,8.191459,55.0,61.0,68.0,71.25,77.0
Other,2,3.0,62.0,5.291503,56.0,60.0,64.0,65.0,66.0
PD,1,38.0,55.640351,13.0731,27.0,43.0,60.9,65.525,76.0
PD,2,17.0,55.901961,15.061801,18.0,50.0,53.0,69.0,76.0


In [None]:
gp2_table_one_EAS.groupby(['phenotype', 'SEX'])['AGE'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,464.0,64.060345,8.116366,50.0,57.0,64.0,70.0,86.0
Control,2,411.0,62.321168,7.934427,50.0,55.5,62.0,68.0,86.0
Other,1,3.0,70.919462,5.957866,64.996578,67.92334,70.850103,73.880903,76.911704
Other,2,1.0,68.744695,,68.744695,68.744695,68.744695,68.744695,68.744695
PD,1,477.0,67.500173,8.135321,33.0,62.0,68.0,73.0,88.0
PD,2,333.0,68.618107,8.555656,31.0,63.0,69.0,75.0,92.0


In [None]:
gp2_table_one_EUR.groupby(['phenotype', 'SEX'])['AGE'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,1339.0,65.417625,11.916463,0.0,59.0,66.0,73.0,101.0
Control,2,1409.0,65.076579,12.55196,19.0,58.0,65.0,73.0,103.0
Other,1,188.0,69.40441,10.319617,36.0,63.9,72.0,77.519507,89.0
Other,2,118.0,62.316976,12.735658,31.0,53.625,63.965777,71.663758,85.0
PD,1,4000.0,66.285216,11.162602,21.0,59.0,68.0,75.0,101.0
PD,2,2231.0,65.789243,11.215029,22.0,58.0,67.0,74.0,96.0


In [None]:
gp2_table_one_EAS.groupby(['phenotype'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,9.0,65.0,7.348469,55.0,59.0,66.0,69.0,77.0
PD,55.0,55.721212,13.576975,18.0,44.0,59.0,66.266667,76.0


In [None]:
gp2_table_one_EAS.groupby(['phenotype'])['AGE'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,875.0,63.243429,8.073685,50.0,57.0,63.0,69.0,86.0
Other,4.0,70.37577,4.984628,64.996578,67.807666,69.797399,72.365503,76.911704
PD,810.0,67.959768,8.323686,31.0,63.0,68.0,74.0,92.0


In [None]:
gp2_table_one_EUR.groupby(['phenotype'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,478.0,63.189331,10.166653,3.0,58.0,64.0,70.0,88.0
PD,5450.0,57.436042,12.174531,7.0,49.0,59.0,66.395833,98.0


In [None]:
gp2_table_one_EUR.groupby(['phenotype', 'SEX'])['AAO'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,257.0,63.663424,10.134859,18.0,58.0,65.0,71.0,86.0
Other,2,221.0,62.638009,10.198717,3.0,58.0,63.0,69.0,88.0
PD,1,3507.0,57.741926,12.121956,7.0,49.0,59.0,67.0,98.0
PD,2,1943.0,56.883939,12.252663,7.0,48.0,58.0,66.0,86.0


In [None]:
gp2_table_one_EUR.groupby(['phenotype'])['AGE'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,2748.0,65.242758,12.245389,0.0,59.0,66.0,73.0,103.0
Other,306.0,66.671347,11.80902,31.0,59.225,68.35,76.91102,89.0
PD,6231.0,66.107634,11.18303,21.0,59.0,68.0,74.593634,101.0


In [None]:
gp2_table_one_AJ.groupby(['phenotype', 'SEX'])['AAO'].describe()


Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,0.0,,,,,,,
Control,2,0.0,,,,,,,
Other,1,8.0,64.0,9.211794,45.0,62.5,66.0,67.75,76.0
Other,2,7.0,69.714286,5.023753,64.0,65.5,70.0,73.0,77.0
PD,1,402.0,58.97471,11.879605,12.0,50.0,60.0,68.25,87.0
PD,2,146.0,57.034247,11.942894,25.0,48.0,58.0,67.0,83.0


In [None]:
gp2_table_one_AJ.groupby(['phenotype', 'SEX'])['AGE'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,SEX,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Control,1,95.0,68.437895,11.045286,35.0,61.0,69.0,77.0,96.0
Control,2,107.0,66.368224,11.310845,21.0,60.25,66.0,73.5,95.0
Other,1,145.0,64.755512,8.536599,32.3,58.7,65.3,71.3,85.7
Other,2,143.0,64.302797,7.508005,37.8,59.4,64.5,69.85,81.3
PD,1,476.0,68.199068,10.177945,37.0,61.0,69.0,76.0,91.0
PD,2,173.0,66.76271,10.274429,40.0,60.0,68.0,75.0,86.0


In [None]:
gp2_table_one_AJ.groupby(['phenotype'])['AAO'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control,0.0,,,,,,,
Other,15.0,66.666667,7.870983,45.0,64.5,66.0,70.5,77.0
PD,548.0,58.457725,11.916541,12.0,49.0,59.0,68.0,87.0
