# UKBiobank

* **Project:** ADRD Genetic Diversity in Biobanks
* **Version:** Python/3.9 and 3.10
* **Last Updated:** 22-August-2024

## Notebook Overview
Fetch cohorts, ancestry info, remove related individuals, filter non-WGS, combine pVCFs, normalize VCFs, annotate, allele freqs, APOE genotyping, phenotype data, resilience/protective variants

# Initialize Notebook

## Import packages

In [1]:
import pyspark
import dxdata
import dxpy
import pandas as pd
from datetime import date, datetime
import os 
import numpy as np
import random
import shutil
import glob
import requests
from functools import reduce

sc = pyspark.SparkContext()
spark = pyspark.sql.SparkSession(sc)


## Initialize variables

In [2]:
gene_names = [
    "APOE",
    "APP",
    "GBA1",
    "GBA1LP",
    "GRN",
    "MAPT",
    "PSEN1",
    "PSEN2",
    "SNCA",
    "TREM2",
]


## Initialize helper functions

In [3]:
def fetch_gene_info_ensembl(gene_names, species='human', genome_version='GRCh38'):
    gene_info_dict = {}
    server = "https://rest.ensembl.org"
    
    for gene_name in gene_names:
        endpoint = f"/lookup/symbol/{species}/{gene_name}"
        headers = {"Content-Type": "application/json"}

        response = requests.get(server + endpoint, headers=headers, params={"expand": "1"})
        if not response.ok:
            print(f"Fetching failed for {gene_name}")
            continue

        data = response.json()
        gene_info = {
            "gene_name": data.get("display_name", gene_name),
            "chromosome": f"chr{data['seq_region_name']}",
            "start": int(data["start"]),
            "end": int(data["end"]),
            "genome_version": genome_version
        }

        gene_info_dict[gene_name] = gene_info

    return gene_info_dict


# Fetch cohorts

## Grab the dataset containing participant information

In [4]:
dispensed_dataset_id = dxpy.find_one_data_object(typename="Dataset", name="app*.dataset", folder="/", name_mode="glob")["id"]
dataset = dxdata.load_dataset(id=dispensed_dataset_id)
participant = dataset["participant"]


## Retrieve Cases

### AD + Non-AD Dementia Cohorts

#### Pull down the fields we need 
https://docs.google.com/document/d/1AebkQ-Nxrk63jhsDzZpn5QD-7EK4unsykHVj-saEm3U/edit?usp=sharing

In [5]:
field_names = [
    "eid", 
    "p31", 
    "p34", 
    "p22189", 
    "p22006", 
    "p21022", 
    "p42020", 
    "p42018", 
    "p22009_a1", 
    "p22009_a2", 
    "p22009_a3", 
    "p22009_a4", 
    "p22009_a5", 
    "p40000_i0",
    "p120042",
    "p26302_i2",
    "p26302_i3",
    "p21625_i2",
    "p21625_i3",
    "p62_i0",
    "p62_i1",
    "p62_i2",
    #"p32104",
    #"p32105",
    #"p32106",
    #"p32107",
    #"p32108",
    #"p32109",
]
cases_df = participant.retrieve_fields(names=field_names, coding_values="replace", engine=dxdata.connect())
cases_df = cases_df.toPandas()


#### Rename columns to be human-readable

In [6]:
cases_df = cases_df.rename(columns={
    'eid':'ID',
    'p31':'GENETIC_SEX', 
    'p34':'BIRTH_YEAR', 
    'p22189':'TOWNSEND', 
    'p22006':'ETHNICITY', 
    'p21022':'AGE_OF_RECRUIT',
    'p42020':'AD_DATE',
    'p42018':'DEM_DATE',
    'p22009_a1':'PC1',
    'p22009_a2':'PC2',
    'p22009_a3':'PC3',
    'p22009_a4':'PC4',
    'p22009_a5':'PC5',
    'p40000_i0':'DATE_OF_DEATH',
    "p120042":"COGNITIVE_SYMPTOMS_SEVERITY_PAST_WEEK",
    "p26302_i2":"SPECIFIC_COGNITIVE_ABILITY_2014",
    "p26302_i3":"SPECIFIC_COGNITIVE_ABILITY_2019",
    "p21625_i2":"TOUCHSCREEN_COGNITIVE_DURATION_2014",
    "p21625_i3":"TOUCHSCREEN_COGNITIVE_DURATION_2019",
    "p62_i0":"COGNITIVE_TEST_WILLINGNESS",
    "p62_i1":"COGNITIVE_TEST_WILLINGNESS",
    "p62_i2":"COGNITIVE_TEST_WILLINGNESS",
    #"p32104":"FORGETFULNESS_PAST_WEEK",
    #"p32105":"POOR_CONCENTRATION_PAST_WEEK",
    #"p32106":"TROUBLE_EXPRESSING_THOUGHTS_PAST_WEEK",
    #"p32107":"TROUBLE_FINDING_RIGHT_WORD_PAST_WEEK",
    #"p32108":"SLOW_THINKING_SPEED_PAST_WEEK",
    #"p32109":"TROUBLE_SOLVING_PROBLEMS_PAST_WEEK",
})


#### Find participants with AD and RD

In [7]:
# AD:
ad_df = cases_df[~cases_df[f'AD_DATE'].isna()]
ad_df = ad_df[[
    'ID', 'GENETIC_SEX', 'BIRTH_YEAR', 'TOWNSEND', 'ETHNICITY', 'AGE_OF_RECRUIT', 
    f'AD_DATE', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'DATE_OF_DEATH', 
    "COGNITIVE_SYMPTOMS_SEVERITY_PAST_WEEK", "SPECIFIC_COGNITIVE_ABILITY_2014",
    "SPECIFIC_COGNITIVE_ABILITY_2019", "TOUCHSCREEN_COGNITIVE_DURATION_2014",
    "TOUCHSCREEN_COGNITIVE_DURATION_2019", "COGNITIVE_TEST_WILLINGNESS",
    "COGNITIVE_TEST_WILLINGNESS", "COGNITIVE_TEST_WILLINGNESS",
]]
ad_df["ID"] = pd.to_numeric(ad_df["ID"])
    
# RD:
rd_df = cases_df[cases_df['AD_DATE'].isna() & ~cases_df['DEM_DATE'].isna()]
rd_df = rd_df[[
    'ID', 'GENETIC_SEX', 'BIRTH_YEAR', 'TOWNSEND', 'ETHNICITY', 'AGE_OF_RECRUIT', 
    'DEM_DATE', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'DATE_OF_DEATH', 
    "COGNITIVE_SYMPTOMS_SEVERITY_PAST_WEEK", "SPECIFIC_COGNITIVE_ABILITY_2014",
    "SPECIFIC_COGNITIVE_ABILITY_2019", "TOUCHSCREEN_COGNITIVE_DURATION_2014",
    "TOUCHSCREEN_COGNITIVE_DURATION_2019", "COGNITIVE_TEST_WILLINGNESS",
    "COGNITIVE_TEST_WILLINGNESS", "COGNITIVE_TEST_WILLINGNESS",
]]
rd_df["ID"] = pd.to_numeric(rd_df["ID"])


## Retrieve Controls

### Age 65+, no NDD, no parents with AD or PD

#### Retrieve field names of interest for each participant

In [8]:
# Date G10 first reported (huntington's disease),
# Date D11 first reported (hereditary ataxia), 
# Date G12 first reported (spinal muscular atrophy and related syndromes), 
# Date G13 first reported (systemic atrophies primarily affecting central nervous system in diseases classified elswhere), 
# Date G14 first reported (postpolio syndrome), 
# Date G20 first reported (parkinson's disease), 
# Date G21 first reported (secondary parkinsonism), 
# Date G22 first reported (parkinsonism in diseases classified elsewhere), 
# Date G23 first reported (other degenerative diseases of basal ganglia), 
# Date G24 first reported (dystonia), 
# Date G25 first reported (other extrapyramidal and movement disorders), 
# Date G30 first reported (alzheimer's disease), 
# Date G31 first reported (other degenerative diseases of nervous system, not elsewhere classified), 
# Date G32 first reported (other degenerative disorders of nervous system in diseases classified elsewhere), 
# Date G35 first reported (multiple sclerosis), 
# Date G36 first reported (other acute disseminated demyelination), 
# Date G37 first reported (other demyelinating diseases of central nervous system), 
# Date G45 first reported (transient cerebral ischaemic attacks and related syndromes), 
# Date G46 first reported (vascular syndromes of brain in cerebrovascular diseases), 
# Date G50 first reported (disorders of trigeminal nerve), 
# Date G52 first reported (disorders of other cranial nerves), 
# Date G53 first reported (cranial nerve disorders in diseases classified elsewhere), 
# Date G54 first reported (nerve root and plexus disorders), 
# Date G55 first reported (nerve root and plexus compressions in diseases classified elsewhere), 
# Date G56 first reported (mononeuropathies of upper limb), 
# Date G57 first reported (mononeuropathies of lower limb), 
# Date G58 first reported (other mononeuropathies), 
# Date G59 first reported (mononeuropathy in diseases classified elsewhere), 
# Date G60 first reported (hereditary and idiopathic neuropathy), 
# Date G61 first reported (inflammatory polyneuropathy), 
# Date G62 first reported (other polyneuropathies), 
# Date G63 first reported (polyneuropathy in diseases classified elsewhere), 
# Date G64 first reported (other disorders of peripheral nervous system), 
# Date G70 first reported (myasthenia gravis and other myoneural disorders), 
# Date G71 first reported (primary disorders of muscles), 
# Date G72 first reported (other myopathies), 
# Date G73 first reported (disorders of myoneural junction and muscle in diseases classified elsewhere), 
# Date G80 first reported (infantile cerebral palsy), 
# Date G81 first reported (hemiplegia), 
# Date G82 first reported (paraplegia and tetraplegia), 
# Date G83 first reported (other paralytic syndromes), 
# Date G90 first reported (disorders of autonomic nervous system),
# Date G91 first reported (hydrocephalus), 
# Date G92 first reported (toxic encephalopathy), 
# Date G93 first reported (other disorders of brain), 
# Date G94 first reported (other disorders of brain in diseases classified elsewhere), 
# Date G96 first reported (other disorders of central nervous system), 
# Date G97 first reported (postprocedural disorders of nervous system, not elsewhere classified),  
# Date G98 first reported (other disorders of nervous system, not elsewhere classified), 
# Date G99 first reported (other disorders of nervous system in diseases classified elsewhere), 
# Date of all cause dementia report, 
# Date of alzheimer's disease report, 
# Date of vascular dementia report, 
# Date of frontotemporal dementia report, 
# Date of motor neurone disease report, 
# Date of all cause parkinsonism report, 
# Date of parkinson's disease report, 
# Date of progressive supranuclear palsy report, 
# Date of multiple system atrophy report, 
# Genetic ethnic grouping, 
# Age at recruitment, 
# Townsend deprivation index at recruitment, 
# Sex, 
# Genetic Principal components | Array 1, 
# Genetic Principal components | Array 2, 
# Genetic Principal components | Array 3, 
# Genetic Principal components | Array 4, 
# Genetic Principal components | Array 5

field_names = ['eid', 'p131012', 'p131016', 'p131018', 'p131020', 'p131022', 'p131024', 'p131026', 'p131028', 'p131030', 'p131036', 'p131038', 'p131040', 'p131042',
               'p131046', 'p131056', 'p131058', 'p131062', 'p131066', 'p131068', 'p131070', 'p131074', 'p131076', 'p131078', 'p131080', 'p131082', 'p131084', 'p131086', 
               'p131088', 'p131090', 'p131092', 'p131094', 'p131096', 'p131098', 'p131100', 'p131102', 'p131104', 'p131106', 'p131108', 'p131110', 'p131112', 'p131114', 
               'p131116', 'p131120', 'p131122', 'p131124', 'p131126',  'p42018', 'p42020', 'p42022', 'p42024', 'p42028', 'p42030', 'p42032', 'p42034', 'p42036', 'p22006', 
               'p21022', 'p22189', 'p31', 'p22009_a1', 'p22009_a2', 'p22009_a3', 'p22009_a4', 'p22009_a5', 'p34', 'p40000_i0', 'p20110_i0', 'p20110_i1', 'p20110_i2', 
               'p20110_i3', 'p20107_i0', 'p20107_i1', 'p20107_i2', 'p20107_i3']
control_df = participant.retrieve_fields(names=field_names, coding_values="replace", engine=dxdata.connect())
control_df = control_df.toPandas()


#### Remove participants with any of the listed conditions

In [9]:
control_df = control_df[control_df['p131012'].isnull() & control_df['p131016'].isnull() & control_df['p131018'].isnull() & control_df['p131020'].isnull() 
                        & control_df['p131022'].isnull() & control_df['p131024'].isnull() & control_df['p131026'].isnull() & control_df['p131028'].isnull() 
                        & control_df['p131030'].isnull() & control_df['p131036'].isnull() & control_df['p131038'].isnull() & control_df['p131040'].isnull() 
                        & control_df['p131042'].isnull() & control_df['p131046'].isnull() & control_df['p131056'].isnull() & control_df['p131058'].isnull() 
                        & control_df['p131062'].isnull() & control_df['p131066'].isnull() & control_df['p131068'].isnull() & control_df['p131070'].isnull() 
                        & control_df['p131074'].isnull() & control_df['p131076'].isnull() & control_df['p131078'].isnull() & control_df['p131080'].isnull() 
                        & control_df['p131082'].isnull() & control_df['p131084'].isnull() & control_df['p131086'].isnull() & control_df['p131088'].isnull() 
                        & control_df['p131090'].isnull() & control_df['p131092'].isnull() & control_df['p131094'].isnull() & control_df['p131096'].isnull() 
                        & control_df['p131098'].isnull() & control_df['p131100'].isnull() & control_df['p131102'].isnull() & control_df['p131104'].isnull() 
                        & control_df['p131106'].isnull() & control_df['p131108'].isnull() & control_df['p131110'].isnull() & control_df['p131112'].isnull() 
                        & control_df['p131114'].isnull() & control_df['p131116'].isnull() & control_df['p131120'].isnull() & control_df['p131122'].isnull() 
                        & control_df['p131124'].isnull() & control_df['p131126'].isnull() & control_df['p42018'].isnull() & control_df['p42020'].isnull() 
                        & control_df['p42022'].isnull() & control_df['p42024'].isnull() & control_df['p42028'].isnull() & control_df['p42030'].isnull() 
                        & control_df['p42032'].isnull() & control_df['p42034'].isnull() & control_df['p42036'].isnull()]


#### Remove participants whose parents have AD or PD

In [10]:
# Columns defining all instances of parent illness
parent_illness_cols = ['p20110_i0', 'p20110_i1', 'p20110_i2', 'p20110_i3', 'p20107_i0', 'p20107_i1', 'p20107_i2', 'p20107_i3']

# Convert None values to empty lists
for illness_col in parent_illness_cols:
    control_df[illness_col] = control_df[illness_col].apply(lambda l: l if isinstance(l, list) else [])

# Define a condition as anybody who has never reported a parent as having AD or PD
condition = lambda participant: all(("Alzheimer's disease/dementia" not in illnesses and "Parkinson's disease" not in illnesses) for illnesses in participant[parent_illness_cols])

# Apply the condition to give all participants who have a parent who has/had AD or PD
control_df = control_df[control_df.apply(condition, axis=1)]


#### Remove participants below the defined age threshold

In [11]:
control_df = control_df[control_df['p21022'] >= 65]


#### Rename columns

In [12]:
control_df = control_df[['eid', 'p21022', 'p22189', 'p31', 'p22009_a1', 'p22009_a2', 'p22009_a3', 'p22009_a4', 'p22009_a5', 'p34', 'p22006', 'p40000_i0']]
control_df.rename(columns={
    'eid':'ID',
    'p21022':'AGE_OF_RECRUIT', 
    'p22189':'TOWNSEND', 
    'p31':'GENETIC_SEX', 
    'p22009_a1':'PC1', 
    'p22009_a2':'PC2', 
    'p22009_a3':'PC3', 
    'p22009_a4':'PC4', 
    'p22009_a5':'PC5', 
    'p34':'BIRTH_YEAR', 
    'p22006':'ETHNICITY', 
    'p40000_i0':'DATE_OF_DEATH',
}, inplace=True)
control_df["ID"] = pd.to_numeric(control_df["ID"])
control_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 59611 entries, 38 to 502267
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ID              59611 non-null  int64  
 1   AGE_OF_RECRUIT  59611 non-null  int64  
 2   TOWNSEND        59570 non-null  float64
 3   GENETIC_SEX     59611 non-null  object 
 4   PC1             57980 non-null  float64
 5   PC2             57980 non-null  float64
 6   PC3             57980 non-null  float64
 7   PC4             57980 non-null  float64
 8   PC5             57980 non-null  float64
 9   BIRTH_YEAR      59611 non-null  int64  
 10  ETHNICITY       50743 non-null  object 
 11  DATE_OF_DEATH   8986 non-null   object 
dtypes: float64(6), int64(3), object(3)
memory usage: 5.9+ MB


# Find ancestry information about each cohort

#### Read ancestry label mappings

In [13]:
ancestries = pd.read_csv("../../mnt/project/wgs_analysis/data/ukbb_imputed_genotypes_umap_linearsvc_predicted_labels.txt", sep="\t")


#### Add labels to cohort dataframes

In [14]:
control_df = control_df.merge(ancestries[["IID","label"]], left_on="ID", right_on="IID").drop("IID", axis=1)
ad_df = ad_df.merge(ancestries[["IID","label"]], left_on="ID", right_on="IID").drop("IID", axis=1)
rd_df = rd_df.merge(ancestries[["IID","label"]], left_on="ID", right_on="IID").drop("IID", axis=1)


#### Get list of IDs for each cohort

In [15]:
ad_ids = ad_df["ID"].tolist()
rd_ids = rd_df["ID"].tolist()
control_ids = control_df["ID"].tolist()


# Remove related individuals

#### Fetch relatedness data

In [16]:
full_related_df = pd.read_csv('../../mnt/project/Bulk/Genotype Results/Genotype calls/ukb_rel.dat', sep = ' ')
full_related_df = full_related_df[full_related_df['Kinship'] > 0.0884]


#### Define cohorts to maximize cases included

In [17]:
full_cohort_ids = ad_ids + rd_ids + control_ids
case_ids = ad_ids + rd_ids


#### Keep only rows with both participants in cohorts of interest

In [18]:
related_cohort_df = full_related_df.loc[full_related_df['ID1'].isin(full_cohort_ids) & full_related_df['ID2'].isin(full_cohort_ids)]
related_cohort_df = related_cohort_df.reset_index(drop=True)


#### Maximize the number of cases included

In [19]:
flipped_df = related_cohort_df[related_cohort_df["ID1"].isin(control_ids) & related_cohort_df["ID2"].isin(case_ids)].copy()
related_cohort_df = related_cohort_df[~(related_cohort_df["ID1"].isin(control_ids) & related_cohort_df["ID2"].isin(case_ids))]
flipped_df.rename(columns={"ID1":"ID2", "ID2":"ID1"}, inplace=True)
related_cohort_df = pd.concat([related_cohort_df, flipped_df])


#### Get set of participants to remove

In [20]:
ids_to_remove = set(related_cohort_df["ID2"])
print(f"Removing {len(ids_to_remove)} participants")


Removing 848 participants


#### Filter ID lists accordingly

In [21]:
ad_ids = [iid for iid in ad_ids if iid not in ids_to_remove]
rd_ids = [iid for iid in rd_ids if iid not in ids_to_remove]
control_ids = [iid for iid in control_ids if iid not in ids_to_remove]
total_ids = ad_ids + rd_ids + control_ids


#### Save the IDs of each participant to a txt file

In [22]:
with open('ad_ids_pre_VCF.txt', 'w') as file:
    for iid in ad_ids:
        file.write(f"{iid}\n")
        

In [23]:
with open('rd_ids_pre_VCF.txt', 'w') as file:
    for iid in rd_ids:
        file.write(f"{iid}\n")


In [24]:
with open('control_ids_pre_VCF.txt', 'w') as file:
    for iid in control_ids:
        file.write(f"{iid}\n")


In [25]:
with open('ids_pre_VCF.txt', 'w') as file:
    for iid in total_ids:
        file.write(f"{iid}\n")


# Filter out participants without WGS data

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr1/ukb24310_c1_b7761_v1.vcf.gz" \
-iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr1/ukb24310_c1_b7761_v1.vcf.gz.tbi" \
-icmd="bcftools query -l ukb24310_c1_b7761_v1.vcf.gz > pvcf_full_ids.txt" \
--instance-type mem1_hdd1_v2_x16 \
--destination "${projectid}:/wgs_analysis/results"


In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [26]:
!dx download /wgs_analysis/results/pvcf_full_ids.txt
!grep -Fwf pvcf_full_ids.txt ids_pre_VCF.txt > filtered_sample_ids.txt
!grep -Fwf pvcf_full_ids.txt ad_ids_pre_VCF.txt > filtered_ad_ids.txt
!grep -Fwf pvcf_full_ids.txt rd_ids_pre_VCF.txt > filtered_rd_ids.txt
!grep -Fwf pvcf_full_ids.txt control_ids_pre_VCF.txt > filtered_control_ids.txt


Error: path "/opt/notebooks/pvcf_full_ids.txt" already exists but
-f/--overwrite was not set


In [27]:
with open('filtered_ad_ids.txt', 'r') as file:
    ad_ids = [int(line.strip()) for line in file]
with open('filtered_rd_ids.txt', 'r') as file:
    rd_ids = [int(line.strip()) for line in file]
with open('filtered_control_ids.txt', 'r') as file:
    control_ids = [int(line.strip()) for line in file]


#### Get list of IDs for each cohort

In [28]:
ad_df = ad_df[ad_df["ID"].isin(ad_ids)]
rd_df = rd_df[rd_df["ID"].isin(rd_ids)]
control_df = control_df[control_df["ID"].isin(control_ids)]


In [29]:
print(f"Number of AD participants:       {len(ad_ids)}")
print(f"Number of RD participants:       {len(rd_ids)}")
print(f"Number of Control participants:  {len(control_ids)}")


Number of AD participants:       4225
Number of RD participants:       5306
Number of Control participants:  56741


In [30]:
!dx upload filtered_sample_ids.txt --path /wgs_analysis/results/sample_ids.txt
!dx upload filtered_ad_ids.txt --path /wgs_analysis/results/ad_ids.txt
!dx upload filtered_rd_ids.txt --path /wgs_analysis/results/rd_ids.txt
!dx upload filtered_control_ids.txt --path /wgs_analysis/results/control_ids.txt


ID                                file-GpkG0ZjJYBPb0z3gzjX4jBX7
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results
Name                              sample_ids.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Wed Aug  7 00:31:15 2024
Created by                        spencermg3
 via the job                      job-GpkFP58JYBPf9gjx0J49ZvPG
Last modified                     Wed Aug  7 00:31:16 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-GpkG0b8JYBPb0z3gzjX4jBX9
Class                             file
Project                  

# Save and print cohort statistics

In [31]:
control_df.to_csv("Controls.csv", header=True, index=False)
ad_df.to_csv(f'AD_cases.csv', header=True, index=False)
rd_df.to_csv(f'RD_cases.csv', header=True, index=False)


In [32]:
! dx upload Controls.csv --path /wgs_analysis/results/Controls.csv
! dx upload AD_cases.csv --path /wgs_analysis/results/AD_cases.csv
! dx upload RD_cases.csv --path /wgs_analysis/results/RD_cases.csv


ID                                file-GpkG0fjJYBPY6gzq1v1K0jxZ
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results
Name                              Controls.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Wed Aug  7 00:31:23 2024
Created by                        spencermg3
 via the job                      job-GpkFP58JYBPf9gjx0J49ZvPG
Last modified                     Wed Aug  7 00:31:24 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-GpkG0g8JYBPy975KV0z8F9V0
Class                             file
Project                    

In [33]:
print(control_df["label"].value_counts())
print(ad_df["label"].value_counts())
print(rd_df["label"].value_counts())
print("\n")

print(control_df["GENETIC_SEX"].value_counts())
print(ad_df["GENETIC_SEX"].value_counts())
print(rd_df["GENETIC_SEX"].value_counts())
print("\n")

print(f'{control_df[control_df["GENETIC_SEX"] == "Male"]["AGE_OF_RECRUIT"].mean()} +/- {control_df[control_df["GENETIC_SEX"] == "Male"]["AGE_OF_RECRUIT"].std()}')
print(f'{ad_df[ad_df["GENETIC_SEX"] == "Male"]["AGE_OF_RECRUIT"].mean()} +/- {ad_df[ad_df["GENETIC_SEX"] == "Male"]["AGE_OF_RECRUIT"].std()}')
print(f'{rd_df[rd_df["GENETIC_SEX"] == "Male"]["AGE_OF_RECRUIT"].mean()} +/- {rd_df[rd_df["GENETIC_SEX"] == "Male"]["AGE_OF_RECRUIT"].std()}')
print(f'{control_df[control_df["GENETIC_SEX"] == "Female"]["AGE_OF_RECRUIT"].mean()} +/- {control_df[control_df["GENETIC_SEX"] == "Female"]["AGE_OF_RECRUIT"].std()}')
print(f'{ad_df[ad_df["GENETIC_SEX"] == "Female"]["AGE_OF_RECRUIT"].mean()} +/- {ad_df[ad_df["GENETIC_SEX"] == "Female"]["AGE_OF_RECRUIT"].std()}')
print(f'{rd_df[rd_df["GENETIC_SEX"] == "Female"]["AGE_OF_RECRUIT"].mean()} +/- {rd_df[rd_df["GENETIC_SEX"] == "Female"]["AGE_OF_RECRUIT"].std()}')
print("\n")


EUR    54423
SAS      672
AFR      445
AJ       416
CAS      204
CAH      169
EAS      154
AAC      133
MDE       62
AMR       48
FIN       15
Name: label, dtype: int64
EUR    4051
AFR      52
SAS      49
AJ       27
AAC      12
CAS      10
MDE       9
CAH       8
EAS       7
Name: label, dtype: int64
EUR    5028
SAS      70
AFR      64
AJ       43
CAH      27
AAC      24
CAS      23
MDE      13
EAS      10
AMR       3
FIN       1
Name: label, dtype: int64


Female    28400
Male      28341
Name: GENETIC_SEX, dtype: int64
Female    2226
Male      1999
Name: GENETIC_SEX, dtype: int64
Male      2960
Female    2346
Name: GENETIC_SEX, dtype: int64


66.88828905119792 +/- 1.4783824909537675
64.68934467233616 +/- 4.394240647897201
63.82364864864865 +/- 5.075567668349701
66.88031690140845 +/- 1.4778455623561713
64.59883198562444 +/- 4.097286411577796
63.731031543052005 +/- 5.245986623303864




# Fetch pVCF chunks for each gene of interest

## GBA (chr1: 155,225,002 - 155,254,507) (b: 7761 - 7763)

In [None]:
%%bash
for b_val in {7761..7763};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr1/ukb24310_c1_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr1/ukb24310_c1_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c1_b${b_val}_v1.vcf.gz -o GBA_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done

## PSEN2 (chr1: 226,860,648 - 226,905,565) (b: 11343 - 11346)

In [None]:
%%bash
for b_val in {11343..11346};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr1/ukb24310_c1_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr1/ukb24310_c1_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c1_b${b_val}_v1.vcf.gz -o PSEN2_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## SNCA (chr4: 89,716,632 - 89,848,254) (b: 4485 - 4493)

In [None]:
%%bash
for b_val in {4485..4493};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr4/ukb24310_c4_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr4/ukb24310_c4_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c4_b${b_val}_v1.vcf.gz -o SNCA_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## TREM2 (chr6: 41,148,607 - 41,173,076) (b: 2057 - 2059)

In [None]:
%%bash
for b_val in {2057..2059};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr6/ukb24310_c6_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr6/ukb24310_c6_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c6_b${b_val}_v1.vcf.gz -o TREM2_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## PSEN1 (chr14: 73,126,590 - 73,229,275) (b: 3656 - 3662)

In [None]:
%%bash
for b_val in {3656..3662};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr14/ukb24310_c14_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr14/ukb24310_c14_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c14_b${b_val}_v1.vcf.gz -o PSEN1_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## GRN (chr17: 44,335,332 - 44,362,797) (b: 2216 - 2219)

In [None]:
%%bash
for b_val in {2216..2219};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr17/ukb24310_c17_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr17/ukb24310_c17_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c17_b${b_val}_v1.vcf.gz -o GRN_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## MAPT (chr17: 45,884,685 - 46,035,185) (b: 2294 - 2302)

In [None]:
%%bash
for b_val in {2294..2302};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr17/ukb24310_c17_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr17/ukb24310_c17_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c17_b${b_val}_v1.vcf.gz -o MAPT_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## APOE (chr19: 44,895,840 - 44,919,238) (b: 2244 - 2246)

In [None]:
%%bash
for b_val in {2244..2246};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr19/ukb24310_c19_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr19/ukb24310_c19_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c19_b${b_val}_v1.vcf.gz -o APOE_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


## APP (chr21: 25,871,670 - 26,180,987) (b: 1293 - 1310)

In [None]:
%%bash
for b_val in {1293..1310};
do
    dx run swiss-army-knife \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr21/ukb24310_c21_b${b_val}_v1.vcf.gz" \
    -iin="/Bulk/DRAGEN\ WGS/DRAGEN\ population\ level\ WGS\ variants,\ pVCF\ format\ [500k\ release]/chr21/ukb24310_c21_b${b_val}_v1.vcf.gz.tbi" \
    -iin="/wgs_analysis/results/sample_ids.txt" \
    -icmd="bcftools view -O z -S sample_ids.txt ukb24310_c21_b${b_val}_v1.vcf.gz -o APP_b${b_val}.vcf.gz" \
    --instance-type mem2_ssd1_v2_x32 \
    --destination "${projectid}:/wgs_analysis/results/1_pvcf_chunks"
done


# Combine pVCF chunks into one file for each gene

In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

## GBA

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/GBA_b7761.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/GBA_b7762.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/GBA_b7763.vcf.gz" \
-icmd="bcftools concat -O z GBA_b7761.vcf.gz GBA_b7762.vcf.gz GBA_b7763.vcf.gz -o GBA.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## PSEN2

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN2_b11343.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN2_b11344.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN2_b11345.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN2_b11346.vcf.gz" \
-icmd="bcftools concat -O z PSEN2_b11343.vcf.gz PSEN2_b11344.vcf.gz PSEN2_b11345.vcf.gz PSEN2_b11346.vcf.gz -o PSEN2.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## SNCA

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4485.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4486.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4487.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4488.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4489.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4490.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4491.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4492.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/SNCA_b4493.vcf.gz" \
-icmd="bcftools concat -O z SNCA_b4485.vcf.gz SNCA_b4486.vcf.gz SNCA_b4487.vcf.gz SNCA_b4488.vcf.gz SNCA_b4489.vcf.gz SNCA_b4490.vcf.gz SNCA_b4491.vcf.gz SNCA_b4492.vcf.gz SNCA_b4493.vcf.gz -o SNCA.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## TREM2

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/TREM2_b2057.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/TREM2_b2058.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/TREM2_b2059.vcf.gz" \
-icmd="bcftools concat -O z TREM2_b2057.vcf.gz TREM2_b2058.vcf.gz TREM2_b2059.vcf.gz -o TREM2.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## PSEN1

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3656.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3657.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3658.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3659.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3660.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3661.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/PSEN1_b3662.vcf.gz" \
-icmd="bcftools concat -O z PSEN1_b3656.vcf.gz PSEN1_b3657.vcf.gz PSEN1_b3658.vcf.gz PSEN1_b3659.vcf.gz PSEN1_b3660.vcf.gz PSEN1_b3661.vcf.gz PSEN1_b3662.vcf.gz -o PSEN1.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## GRN

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/GRN_b2216.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/GRN_b2217.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/GRN_b2218.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/GRN_b2219.vcf.gz" \
-icmd="bcftools concat -O z GRN_b2216.vcf.gz GRN_b2217.vcf.gz GRN_b2218.vcf.gz GRN_b2219.vcf.gz -o GRN.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## MAPT

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2294.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2295.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2296.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2297.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2298.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2299.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2300.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2301.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/MAPT_b2302.vcf.gz" \
-icmd="bcftools concat -O z MAPT_b2294.vcf.gz MAPT_b2295.vcf.gz MAPT_b2296.vcf.gz MAPT_b2297.vcf.gz MAPT_b2298.vcf.gz MAPT_b2299.vcf.gz MAPT_b2300.vcf.gz MAPT_b2301.vcf.gz MAPT_b2302.vcf.gz -o MAPT.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## APOE

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/APOE_b2244.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APOE_b2245.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APOE_b2246.vcf.gz" \
-icmd="bcftools concat -O z APOE_b2244.vcf.gz APOE_b2245.vcf.gz APOE_b2246.vcf.gz -o APOE.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


## APP

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1293.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1294.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1295.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1296.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1297.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1298.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1299.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1300.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1301.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1302.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1303.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1304.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1305.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1306.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1307.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1308.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1309.vcf.gz" \
-iin="/wgs_analysis/results/1_pvcf_chunks/APP_b1310.vcf.gz" \
-icmd="bcftools concat -O z APP_b1293.vcf.gz APP_b1294.vcf.gz APP_b1295.vcf.gz APP_b1296.vcf.gz APP_b1297.vcf.gz APP_b1298.vcf.gz APP_b1299.vcf.gz APP_b1300.vcf.gz APP_b1301.vcf.gz APP_b1302.vcf.gz APP_b1303.vcf.gz APP_b1304.vcf.gz APP_b1305.vcf.gz APP_b1306.vcf.gz APP_b1307.vcf.gz APP_b1308.vcf.gz APP_b1309.vcf.gz APP_b1310.vcf.gz -o APP.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/2_pvcf_genes"


# Concatenate genes together

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/2_pvcf_genes/GBA.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/PSEN2.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/SNCA.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/TREM2.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/PSEN1.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/GRN.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/MAPT.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/APOE.vcf.gz" \
-iin="/wgs_analysis/results/2_pvcf_genes/APP.vcf.gz" \
-icmd="bcftools concat -O z GBA.vcf.gz PSEN2.vcf.gz SNCA.vcf.gz TREM2.vcf.gz PSEN1.vcf.gz GRN.vcf.gz MAPT.vcf.gz APOE.vcf.gz APP.vcf.gz -o combined.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/3_pvcf_combined"


# Normalize VCFs before annotation

#### Split multiallelic sites into biallelic records

In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/3_pvcf_combined/combined.vcf.gz" \
-icmd="bcftools norm -m-both -o biallelic.vcf combined.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/4_normalized"


#### Left-align and normalize

In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/4_normalized/biallelic.vcf" \
-iin="/wgs_analysis/data/Homo_sapiens_assembly38.fasta" \
-icmd="bcftools norm -f Homo_sapiens_assembly38.fasta -o normalized.vcf biallelic.vcf" \
--instance-type mem2_ssd1_v2_x64 \
--destination "${projectid}:/wgs_analysis/results/4_normalized"


# Annotation

## Filter VCFs to only include a few participants

#### Get subset of participant IDs

In [None]:
! dx download wgs_analysis/results/ad_ids.txt --overwrite
! dx download wgs_analysis/results/rd_ids.txt --overwrite
! dx download wgs_analysis/results/control_ids.txt --overwrite


In [None]:
ad_ids = "ad_ids.txt"
rd_ids = "rd_ids.txt"
control_ids = "control_ids.txt"
output_file = "annot_ids.txt"

with open(ad_ids, "r") as f1, open(rd_ids, "r") as f2, open(control_ids, "r") as f3, open(output_file, "w") as out:
    out.write(f1.readline().strip() + "\n")
    out.write(f2.readline().strip() + "\n")
    out.write(f3.readline().strip() + "\n")


In [None]:
! dx upload annot_ids.txt --path wgs_analysis/results/annot_ids.txt


#### Get filtered VCFs

In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
-iin="/wgs_analysis/results/annot_ids.txt" \
-icmd="bcftools view -O z -S annot_ids.txt normalized.vcf -o filtered.vcf.gz" \
--instance-type mem2_ssd1_v2_x32 \
--destination "${projectid}:/wgs_analysis/results/5_annotated"


## Fetch Annovar libraries and reference genome data

In [None]:
%%capture

! wget http://www.openbioinformatics.org/annovar/download/0wgxR2rIVP/annovar.latest.tar.gz
! tar -xzf annovar.latest.tar.gz
! chmod a+x ./annovar/*.pl
! annovar/annotate_variation.pl -downdb -buildver hg38 -webfrom annovar refGene annovar/humandb/
! annovar/annotate_variation.pl -downdb -buildver hg38 -webfrom annovar avsnp150 annovar/humandb/
! annovar/annotate_variation.pl -downdb -buildver hg38 -webfrom annovar clinvar_20221231 annovar/humandb/
! annovar/annotate_variation.pl -downdb -buildver hg38 -webfrom annovar dbnsfp30a annovar/humandb/
! annovar/annotate_variation.pl -downdb -buildver hg38 -webfrom annovar gnomad40_genome annovar/humandb/
! dx download wgs_analysis/data/Homo_sapiens_assembly38.fasta --overwrite
! dx download wgs_analysis/data/Homo_sapiens_assembly38.fasta.fai --overwrite
! dx download wgs_analysis/data/Homo_sapiens_assembly38.dict --overwrite


## Perform annotation

In [None]:
%%bash

annovar/table_annovar.pl ../../mnt/project/wgs_analysis/results/5_annotated/filtered.vcf.gz annovar/humandb/ \
--buildver hg38 \
--thread 96 \
--remove \
--protocol refGene,avsnp150,clinvar_20221231,dbnsfp30a,gnomad40_genome \
--operation g,f,f,f,f \
--nopolish \
--nastring . \
--out var_calling.annovar \
--vcfinput


In [None]:
annot_df = pd.read_csv(f"var_calling.annovar.hg38_multianno.txt", sep = '\t')
annot_df.to_csv(f"annotated.csv", index=False)
! dx upload annotated.csv --path wgs_analysis/results/5_annotated/annotated.csv


# Calculate allele frequencies

## Subset IDs for all cohort-ancestry combinations

In [None]:
ad_df = pd.read_csv("../../mnt/project/wgs_analysis/results/AD_cases.csv")
rd_df = pd.read_csv("../../mnt/project/wgs_analysis/results/RD_cases.csv")
control_df = pd.read_csv("../../mnt/project/wgs_analysis/results/Controls.csv")


In [None]:
ad_df = ad_df[["ID","label"]]
rd_df = rd_df[["ID","label"]]
control_df = control_df[["ID","label"]]


In [None]:
for ancestry in ad_df["label"].unique():
    ids = ad_df[ad_df["label"] == ancestry]["ID"]
    with open(f"AD_{ancestry}.txt", 'w') as file:
        for iid in ids:
            file.write(f"{iid}\n")
    !dx upload AD_{ancestry}.txt --path wgs_analysis/data/ID_Files/AD_{ancestry}.txt

for ancestry in rd_df["label"].unique():
    ids = rd_df[rd_df["label"] == ancestry]["ID"]
    with open(f"RD_{ancestry}.txt", 'w') as file:
        for iid in ids:
            file.write(f"{iid}\n")
    !dx upload RD_{ancestry}.txt --path wgs_analysis/data/ID_Files/RD_{ancestry}.txt

for ancestry in control_df["label"].unique():
    ids = control_df[control_df["label"] == ancestry]["ID"]
    with open(f"Control_{ancestry}.txt", 'w') as file:
        for iid in ids:
            file.write(f"{iid}\n")
    !dx upload Control_{ancestry}.txt --path wgs_analysis/data/ID_Files/Control_{ancestry}.txt


## Get frequencies for each gene-cohort-ancestry combination

In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
        -iin="/wgs_analysis/data/ID_Files/${cohort}_${ancestry}.txt" \
        -icmd="plink2 --vcf normalized.vcf --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --keep ${cohort}_${ancestry}.txt --freq --out ${cohort}_${ancestry}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/6_frequencies"
    done
done


## Get zygosity info

In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
        -iin="/wgs_analysis/data/ID_Files/${cohort}_${ancestry}.txt" \
        -iin="/wgs_analysis/results/6_frequencies/${cohort}_${ancestry}.afreq" \
        -icmd="plink2 --vcf normalized.vcf --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --keep ${cohort}_${ancestry}.txt --read-freq ${cohort}_${ancestry}.afreq --export A --het --out ${cohort}_${ancestry}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/6_frequencies"
    done
done


## Find homozygous/heterozygous counts

In [None]:
for cohort in ["AD","RD","Control"]:
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        ! dx download /wgs_analysis/results/6_frequencies/{cohort}_{ancestry}.raw


In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        output_file="counts_${cohort}_${ancestry}.tsv"
        temp_output="temp_${cohort}_${ancestry}.tsv"

        > $output_file

        input_file="cut.raw"

        grep "FID" ${cohort}_${ancestry}.raw | cut -d$'\t' -f7- > header.tmp

        cut -d$'\t' -f7- "${cohort}_${ancestry}.raw" > $input_file

        declare -A count_2
        declare -A count_1
        declare -A count_0

        awk -F'\t' '
        {
            for (i=1; i<=NF; i++) {
                if ($i == 2) count_2[i]++;
                else if ($i == 1) count_1[i]++;
                else if ($i == 0) count_0[i]++;
            }
        }
        END {
            for (i=1; i<=NF; i++) {
                printf("%d", count_2[i]);
                if (i<NF) printf("\t");
            }
            print "";

            for (i=1; i<=NF; i++) {
                printf("%d", count_1[i]);
                if (i<NF) printf("\t");
            }
            print "";

            for (i=1; i<=NF; i++) {
                printf("%d", count_0[i]);
                if (i<NF) printf("\t");
            }
            print "";
        }' $input_file > count.tmp

        cat header.tmp count.tmp > $output_file

        rm $input_file count.tmp header.tmp

        echo "Counts have been appended to $output_file"
    done
done


In [None]:
for cohort in ["AD","RD","Control"]:
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        ! dx upload counts_{cohort}_{ancestry}.tsv --path wgs_analysis/results/7_zygosity/counts_{cohort}_{ancestry}.tsv


In [None]:
for cohort in ["AD","RD","Control"]:
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        ! dx download wgs_analysis/results/7_zygosity/counts_{cohort}_{ancestry}.tsv


In [None]:
for cohort in ["AD","RD","Control"]:
    zyg_cohort = []
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        try:
            df = pd.read_csv(f"counts_{cohort}_{ancestry}.tsv", sep="\t")
            variant_ids = df.columns.values
            rename_dict = {}
            for vid in variant_ids:
                rename_dict[vid] = vid.split("_")[0]
            df.rename(rename_dict, axis=1, inplace=True)
            zyg_cohort.append(df)
        except:
            print(f"No data found at counts_{cohort}_{ancestry}.tsv")
    result_zyg = reduce(lambda x, y: x + y, zyg_cohort)
    result_zyg.to_csv(f"{cohort}_Final.csv", index=False)
    ! dx upload {cohort}_Final.csv --path wgs_analysis/results/7_zygosity/{cohort}_Final.csv


In [None]:
ad_zyg = pd.read_csv(f"AD_Final.csv")
rd_zyg = pd.read_csv(f"RD_Final.csv")
control_zyg = pd.read_csv(f"Control_Final.csv")

ad_zyg = ad_zyg.T
rd_zyg = rd_zyg.T
control_zyg = control_zyg.T

ad_zyg = ad_zyg.reset_index()
rd_zyg = rd_zyg.reset_index()
control_zyg = control_zyg.reset_index()

ad_zyg.rename(columns={'index': 'ID', 0: 'AD_Homozygous_Ref', 1: 'AD_Heterozygous', 2: 'AD_Homozygous_Alt'}, inplace=True)
rd_zyg.rename(columns={'index': 'ID', 0: 'RD_Homozygous_Ref', 1: 'RD_Heterozygous', 2: 'RD_Homozygous_Alt'}, inplace=True)
control_zyg.rename(columns={'index': 'ID', 0: 'Control_Homozygous_Ref', 1: 'Control_Heterozygous', 2: 'Control_Homozygous_Alt'}, inplace=True)

final_zyg = ad_zyg.merge(rd_zyg, on="ID")
final_zyg = final_zyg.merge(control_zyg, on="ID")

display(ad_zyg.head())
display(final_zyg.head())

final_zyg.to_csv("final_zygosity.csv", index=False)
! dx upload final_zygosity.csv --path wgs_analysis/results/7_zygosity/final_zygosity.csv


# Merge annotations with allele frequency outputs

## Merge frequencies for all ancestries across each cohort

In [None]:
for cohort in ["AD","RD","Control"]:
    freq_files = glob.glob(f"../../mnt/project/wgs_analysis/results/6_frequencies/{cohort}*.afreq")

    df = pd.read_csv(freq_files[0], sep="\t")
    df = df[["ID","ALT_FREQS","OBS_CT"]]

    ancestry = freq_files[0].split("_")[-1].split(".")[0]
    df.rename({"ALT_FREQS":f"ALT_FREQS_{ancestry}_{cohort}", "OBS_CT":f"OBS_CT_{ancestry}_{cohort}"}, inplace=True, axis=1)

    for i in range(1, len(freq_files)):
        df_merge = pd.read_csv(freq_files[i], sep="\t")
        df_merge = df_merge[["ID","ALT_FREQS","OBS_CT"]]

        ancestry = freq_files[i].split("_")[-1].split(".")[0]
        df_merge.rename({"ALT_FREQS":f"ALT_FREQS_{ancestry}_{cohort}", "OBS_CT":f"OBS_CT_{ancestry}_{cohort}"}, inplace=True, axis=1)

        df = df.merge(df_merge, on="ID")

    df.to_csv(f"{cohort}.csv", index=False)


## Convert back to vcf format for external CADD calculation

In [None]:
ids_list = list(pd.read_csv("AD.csv")["ID"])
df_for_cadd = pd.DataFrame({"ID":ids_list})


In [None]:
df_for_cadd[["#CHROM","POS","REF","ALT"]] = df_for_cadd["ID"].str.split(':', expand=True)
df_for_cadd.drop("ID", axis=1, inplace=True)
df_for_cadd.insert(2, 'ID', '.')
df_for_cadd.to_csv("for_CADD.vcf", index=False, sep="\t")


In [None]:
! gzip -c for_CADD.vcf > for_CADD.vcf.gz


In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
---------------- DOWNLOAD OUTPUT FILE AND PASS THROUGH EXTERNAL CADD SCORE CALCULATOR ----------------
------------------------------ (https://cadd.gs.washington.edu/upload) -------------------------------
--------------------------- RENAME RESULT TO "CADD.tsv.gz" AND UPLOAD HERE ---------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [None]:
! gzip -d CADD.tsv.gz
! grep -v "##" CADD.tsv > CADD_prelim.tsv
cadd_df = pd.read_csv("CADD_prelim.tsv", sep="\t")
cadd_df["#Chrom"] = "chr" + cadd_df["#Chrom"].astype(str)
cadd_df["ID"] = cadd_df["#Chrom"] + ":" + cadd_df["Pos"].astype(str) + ":" + cadd_df["Ref"] + ":" + cadd_df["Alt"]
cadd_df = cadd_df[["ID","PHRED"]]
cadd_df.rename({"PHRED":"CADD"}, axis=1, inplace=True)
cadd_df.to_csv("CADD_final.csv", index=False)


## Merge CADD scores, frequency files, and annotations

In [None]:
annot_df = pd.read_csv(f"../../mnt/project/wgs_analysis/results/5_annotated/annotated.csv", low_memory=False)
annot_df.insert(1, "ID", annot_df[["Chr","Start","Ref","Alt"]].astype(str).agg(':'.join, axis=1))
filtered_columns = [col for col in annot_df.columns if "Otherinfo" not in col]
annot_df = annot_df[filtered_columns]

ad_freq_df = pd.read_csv("AD.csv")
ad_freq_df.drop(columns="ID", inplace=True)
rd_freq_df = pd.read_csv("RD.csv")
rd_freq_df.drop(columns="ID", inplace=True)
control_freq_df = pd.read_csv("Control.csv")
display(control_freq_df.head())
control_freq_df = control_freq_df.merge(final_zyg, on="ID")
display(control_freq_df.head())
control_freq_df = control_freq_df.merge(cadd_df, on="ID", how="left")
display(control_freq_df.head())
control_freq_df.drop(columns="ID", inplace=True)

merged_df = pd.concat([annot_df,ad_freq_df], axis=1)
merged_df = pd.concat([merged_df,rd_freq_df], axis=1)
merged_df = pd.concat([merged_df,control_freq_df], axis=1)

merged_df.to_csv("merged.csv", index=False)
!dx upload merged.csv --path wgs_analysis/results/8_merged/merged.csv


In [None]:
gene_info_dict = fetch_gene_info_ensembl(gene_names=gene_names, species='human', genome_version='GRCh38')
for gene in gene_info_dict:
    print(gene_info_dict[gene])


In [None]:
criteria_list = []
for gene in ["APOE", "APP", "GBA", "GRN", "MAPT", "PSEN1", "PSEN2", "SNCA", "TREM2"]:
    if gene == "GBA":
        gene = "GBA1"
    chrnum = gene_info_dict[gene]["chromosome"]
    start = gene_info_dict[gene]["start"]
    end = gene_info_dict[gene]["end"]
    criteria_list.append((merged_df["Chr"] == chrnum) 
                         & (merged_df["Start"] > start) 
                         & (merged_df["Start"] < end)
                         & (merged_df["Func.refGene"].isin(["exonic", "splicing"]))
                        )

filter_criteria = criteria_list[0]
for criterion in criteria_list[1:]:
    filter_criteria |= criterion

filtered_df = merged_df[filter_criteria]
filtered_df = filtered_df[filtered_df["Func.refGene"].isin(["exonic", "splicing"])]

print(filtered_df.shape)
display(filtered_df)


In [None]:
ancestries = ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]
ctrl_col_names = [f"ALT_FREQS_{ancestry}_Control" for ancestry in ancestries if f"ALT_FREQS_{ancestry}_Control" in filtered_df.columns.values]
ad_col_names = [f"ALT_FREQS_{ancestry}_AD" for ancestry in ancestries if f"ALT_FREQS_{ancestry}_AD" in filtered_df.columns.values]
rd_col_names = [f"ALT_FREQS_{ancestry}_RD" for ancestry in ancestries if f"ALT_FREQS_{ancestry}_RD" in filtered_df.columns.values]

final_filtered_df = filtered_df[(filtered_df[ad_col_names + rd_col_names]>0).any(axis=1)]

final_filtered_df["Disease"] = ""
final_filtered_df["Disease"][(final_filtered_df[ad_col_names]>0).any(axis=1)] = "AD"
final_filtered_df["Disease"][(final_filtered_df[rd_col_names]>0).any(axis=1)] = "RD"
final_filtered_df["Disease"][(final_filtered_df[ad_col_names]>0).any(axis=1) & (final_filtered_df[rd_col_names]>0).any(axis=1)] = "Both"
display(final_filtered_df.head())
final_filtered_df.to_csv("final_filtered.csv", index=False)


In [None]:
! dx upload final_filtered.csv --path wgs_analysis/results/8_merged/final_filtered.csv


# Remove variants expressed in controls

In [None]:
final_filtered_df = final_filtered_df[(final_filtered_df["Control_Heterozygous"] == 0) & (final_filtered_df["Control_Homozygous_Alt"] == 0)]
final_filtered_df.to_csv("final_filtered_onlycases.csv", index=False)


In [None]:
! dx upload final_filtered_onlycases.csv --path wgs_analysis/results/8_merged/final_filtered_onlycases.csv


# APOE Genotyping

## Calculate APOE Genotypes

In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
        -iin="/wgs_analysis/data/ID_Files/${cohort}_${ancestry}.txt" \
        -icmd="plink2 --vcf normalized.vcf --chr 19 --from-bp 44905791 --to-bp 44909393 --keep ${cohort}_${ancestry}.txt --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --make-bed --out ${cohort}_${ancestry}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/9_apoe_genotyping"
    done
done


In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/9_apoe_genotyping/${cohort}_${ancestry}.bim" \
        -iin="/wgs_analysis/results/9_apoe_genotyping/${cohort}_${ancestry}.bed" \
        -iin="/wgs_analysis/results/9_apoe_genotyping/${cohort}_${ancestry}.fam" \
        -iin="/wgs_analysis/data/apoe_variants.txt" \
        -icmd="plink --bfile ${cohort}_${ancestry} --extract apoe_variants.txt --make-bed --out apoe_snps_${cohort}_${ancestry}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/9_apoe_genotyping"
    done
done


In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/9_apoe_genotyping/apoe_snps_${cohort}_${ancestry}.bim" \
        -iin="/wgs_analysis/results/9_apoe_genotyping/apoe_snps_${cohort}_${ancestry}.bed" \
        -iin="/wgs_analysis/results/9_apoe_genotyping/apoe_snps_${cohort}_${ancestry}.fam" \
        -iin="/wgs_analysis/data/apoe_variants.txt" \
        -icmd="plink --bfile apoe_snps_${cohort}_${ancestry} --recode compound-genotypes --out apoe_snps_${cohort}_${ancestry}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/9_apoe_genotyping"
    done
done


In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [None]:
! dx download wgs_analysis/data/APOE_genotypes_PLINK_ped.py
for cohort in ["AD","RD","Control"]:
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        ! dx download wgs_analysis/results/9_apoe_genotyping/apoe_snps_{cohort}_{ancestry}.ped
        

In [None]:
for cohort in ["AD","RD","Control"]:
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        if os.path.exists(f"apoe_snps_{cohort}_{ancestry}.ped"):
            ! python APOE_genotypes_PLINK_ped.py -i apoe_snps_{cohort}_{ancestry}.ped -o apoe_final_{cohort}_{ancestry}
            ! dx upload apoe_final_{cohort}_{ancestry}.APOE_GENOTYPES.csv --path wgs_analysis/results/9_apoe_genotyping/apoe_final_{cohort}_{ancestry}.APOE_GENOTYPES.csv
        

## Combine these results into a table

In [None]:
for cohort in ["AD","RD","Control"]:
    cohort_counts = []
    included_ancestries = []
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        if os.path.exists(f"apoe_final_{cohort}_{ancestry}.APOE_GENOTYPES.csv"):
            apoe_geno = pd.read_csv(f"apoe_final_{cohort}_{ancestry}.APOE_GENOTYPES.csv")
            counts = apoe_geno['APOE_GENOTYPE'].value_counts()
            counts = counts.reindex(["e1/e1", "e1/e2", "e1/e4", "e2/e2", "e2/e3", "e2/e4 or e1/e3", "e3/e3", "e3/e4", "e4/e4"], fill_value=0)
            counts = counts.rename(ancestry)
            counts = pd.concat([counts, pd.Series([counts.sum()], index=['total'])])
            cohort_counts.append(counts)
            included_ancestries.append(ancestry)
    cohort_counts = pd.concat(cohort_counts, axis=1)
    cohort_counts.columns = included_ancestries
    display(cohort_counts)
    cohort_percentages = cohort_counts.div(cohort_counts.loc["total"], axis=1) * 100
    cohort_percentages.loc["total"] = cohort_counts.loc["total"]
    display(cohort_percentages)
    cohort_combined = cohort_counts.applymap(str) + ' (' + cohort_percentages.applymap(lambda x: f'{x:.2f}%') + ')'
    cohort_combined.loc["total"] = cohort_counts.loc["total"]
    display(cohort_combined)
    cohort_counts.to_csv(f"{cohort}_apoe_genotype_counts.csv")
    cohort_percentages.to_csv(f"{cohort}_apoe_genotype_percentages.csv")
    cohort_combined.to_csv(f"{cohort}_apoe_genotype_combined.csv")
    ! dx upload {cohort}_apoe_genotype_counts.csv --path wgs_analysis/results/9_apoe_genotyping/{cohort}_apoe_genotype_counts.csv
    ! dx upload {cohort}_apoe_genotype_percentages.csv --path wgs_analysis/results/9_apoe_genotyping/{cohort}_apoe_genotype_percentages.csv
    ! dx upload {cohort}_apoe_genotype_combined.csv --path wgs_analysis/results/9_apoe_genotyping/{cohort}_apoe_genotype_combined.csv
    

# Find number of controls in pathogenic variants

In [None]:
! dx download wgs_analysis/results/7_zygosity/counts_Control_AAC.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_AFR.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_AJ.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_AMR.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_CAH.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_CAS.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_EAS.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_EUR.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_FIN.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_MDE.tsv
! dx download wgs_analysis/results/7_zygosity/counts_Control_SAS.tsv


In [None]:
control_zyg = []
for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
    df = pd.read_csv(f"counts_Control_{ancestry}.tsv", sep="\t")
    variant_ids = df.columns.values
    rename_dict = {}
    for vid in variant_ids:
        rename_dict[vid] = vid.split("_")[0]
    df.rename(rename_dict, axis=1, inplace=True)
    df = df.T
    df = df.reset_index()
    df.rename(columns={'index': 'ID', 0: f'{ancestry}_Homozygous_Ref', 1: f'{ancestry}_Heterozygous', 2: f'{ancestry}_Homozygous_Alt'}, inplace=True)
    df[ancestry] = df[f'{ancestry}_Heterozygous'] + df[f'{ancestry}_Homozygous_Alt']
    df = df[["ID",ancestry]]
    control_zyg.append(df)


In [None]:
control_zyg_merged = reduce(lambda left, right: pd.merge(left, right, on="ID"), control_zyg)


In [None]:
pathogenic_vars = [
    "chr1:155235196",
    "chr1:155235217",
    "chr1:155235252",
    "chr1:155235727",
    "chr1:155235790",
    "chr1:155235823",
    "chr1:155235843",
    "chr1:155236277",
    "chr1:155237453",
    "chr1:155238174",
    "chr1:155238214",
    "chr1:155238215",
    "chr1:155238260",
    "chr1:155238630",
    "chr1:155240629",
    "chr17:44350262",
    "chr17:44350800",
    "chr17:44351409",
    "chr4:89828156",
]


In [None]:
filtered_control_var_counts = control_zyg_merged[control_zyg_merged['ID'].str.startswith(tuple(pathogenic_vars))]
filtered_control_var_counts.to_csv("filtered_control_var_counts.csv", index=False)


In [None]:
! dx upload filtered_control_var_counts.csv --path wgs_analysis/results/10_pathogenic_variants/filtered_control_var_counts.csv


# Resilience/protective variants

In [34]:
! dx download wgs_analysis/results/8_merged/final_filtered_onlycases.csv
! dx download wgs_analysis/results/8_merged/merged.csv
! dx download wgs_analysis/data/protein_var_map.csv --overwrite


Error: path "/opt/notebooks/final_filtered_onlycases.csv" already exists but
-f/--overwrite was not set
Error: path "/opt/notebooks/merged.csv" already exists but -f/--overwrite was
not set


In [50]:
filtered_var_counts = pd.read_csv("final_filtered_onlycases.csv")
filtered_var_counts = filtered_var_counts[filtered_var_counts["CADD"] >= 20]
filtered_var_ids = list(filtered_var_counts["ID"])


In [52]:
! dx download wgs_analysis/results/6_frequencies/Control_EUR.afreq --overwrite
df_freq = pd.read_csv("Control_EUR.afreq", sep="\t")
df_merged = pd.read_csv("merged.csv")
df_merged.insert(2, "VCF_ID", list(df_freq["ID"]))




In [53]:
df_merged_filtered = df_merged[df_merged["ID"].isin(filtered_var_ids)]
filtered_vcf_ids = list(df_merged_filtered["VCF_ID"])


In [54]:
file_path = 'variants_to_keep.txt'
with open(file_path, 'w') as file:
    for variant_id in filtered_vcf_ids:
        file.write(f"{variant_id}\n")
! dx upload variants_to_keep.txt --path wgs_analysis/results/11_phenotypic_data/variant_ids.txt
        

ID                                file-GpkGGXjJYBPjzXVJg5z7xfqf
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/11_phenotypic_data
Name                              variant_ids.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Wed Aug  7 00:56:43 2024
Created by                        spencermg3
 via the job                      job-GpkFP58JYBPf9gjx0J49ZvPG
Last modified                     Wed Aug  7 00:56:44 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"


In [None]:
%%bash

dx run swiss-army-knife \
-iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
-iin="/wgs_analysis/results/11_phenotypic_data/variant_ids.txt" \
-icmd="plink2 --vcf normalized.vcf --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --extract variant_ids.txt --freq --out full_cohort" \
--instance-type mem1_hdd1_v2_x16 \
--destination "${projectid}:/wgs_analysis/results/11_phenotypic_data"


In [None]:
%%bash 

dx run swiss-army-knife \
-iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
-iin="/wgs_analysis/results/11_phenotypic_data/full_cohort.afreq" \
-iin="/wgs_analysis/results/11_phenotypic_data/variant_ids.txt" \
-icmd="plink2 --vcf normalized.vcf --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --extract variant_ids.txt --read-freq full_cohort.afreq --export A --het --out full_cohort" \
--instance-type mem1_hdd1_v2_x16 \
--destination "${projectid}:/wgs_analysis/results/11_phenotypic_data"


In [55]:
! dx download wgs_analysis/results/11_phenotypic_data/full_cohort.raw --overwrite




In [56]:
df_raw = pd.read_csv("full_cohort.raw", sep="\t")
variant_ids = df_raw.columns.values
rename_dict = {}
for vid in variant_ids:
    rename_dict[vid] = vid.split("_")[0]
df_raw.rename(rename_dict, axis=1, inplace=True)
df_raw.drop(columns=["FID","PAT","MAT","SEX","PHENOTYPE"], inplace=True)
df_raw.fillna(2, inplace=True)


In [57]:
list_vars = []
list_vars_chrpos = []
list_ids = []
for variant in df_raw.columns[1:]:
    var_ids = df_raw[df_raw[variant] != 2]['IID'].tolist()
    for iid in var_ids:
        list_vars.append(variant)
        var_chrpos = ":".join(variant.split(":")[:2])
        list_vars_chrpos.append(var_chrpos)
        list_ids.append(iid)
    
#id_var_mapper = {}    
#for index, row in df_raw.iterrows():
#    iid = int(row['IID'])
#    
#    list_vars = [col for col in df_raw.columns[1:] if row[col] != 2]
#    if len(list_vars) > 0:
#        id_var_mapper[iid] = list_vars
#    

In [58]:
df_pheno = pd.DataFrame({
    "Variant_ID_Full":list_vars,
    "Variant_ID":list_vars_chrpos,
    "Participant_ID":list_ids,
})


In [59]:
protein_var_map = pd.read_csv("protein_var_map.csv")
df_pheno = df_pheno.merge(protein_var_map, on="Variant_ID", how="left")


In [60]:
display(df_pheno)

Unnamed: 0,Variant_ID_Full,Variant_ID,Participant_ID,Protein_change
0,chr1:155235769:G:A,chr1:155235769,3520686,p.R347C
1,chr1:155236249:A:G,chr1:155236249,3646248,p.I320T
2,chr1:155236249:A:G,chr1:155236249,2703400,p.I320T
3,chr1:155236262:T:G,chr1:155236262,2114999,p.S316R
4,chr1:155236262:T:G,chr1:155236262,5365927,p.S316R
...,...,...,...,...
125,chr21:25982424:G:A,chr21:25982424,1682723,p.P251S
126,chr21:26000138:T:C,chr21:26000138,2354670,p.I248V
127,chr21:26021858:C:G,chr21:26021858,3620157,p.V227L
128,chr21:26021912:C:T,chr21:26021912,4998524,p.A209T


In [45]:
! dx download wgs_analysis/results/AD_cases.csv
! dx download wgs_analysis/results/RD_cases.csv


Error: path "/opt/notebooks/AD_cases.csv" already exists but -f/--overwrite
was not set
Error: path "/opt/notebooks/RD_cases.csv" already exists but -f/--overwrite
was not set


In [61]:
df_ad = pd.read_csv("AD_cases.csv")
df_rd = pd.read_csv("RD_cases.csv")

df_ad.rename({"AD_DATE":"DATE_OF_ONSET", "label":"ANCESTRY", "ID":"Participant_ID"}, inplace=True, axis=1)
df_rd.rename({"DEM_DATE":"DATE_OF_ONSET", "label":"ANCESTRY", "ID":"Participant_ID"}, inplace=True, axis=1)

df_dem = pd.concat([df_ad, df_rd], axis=0)
df_dem.reset_index(inplace=True, drop=True)


In [62]:
df_dem["AGE_AT_ONSET"] = (pd.to_datetime(df_dem['DATE_OF_ONSET']) - pd.to_datetime(df_dem["BIRTH_YEAR"], format='%Y')).dt.days // 365.242374
df_dem["DAYS_SINCE_ONSET"] = (pd.to_datetime(df_dem['DATE_OF_DEATH'].fillna(datetime.today().strftime('%Y-%m-%d'))) - pd.to_datetime(df_dem['DATE_OF_ONSET'])).dt.days
df_dem["AGE"] = (pd.to_datetime(df_dem['DATE_OF_DEATH'].fillna(datetime.today().strftime('%Y-%m-%d'))) - pd.to_datetime(df_dem['BIRTH_YEAR'], format='%Y')).dt.days // 365.242374


In [63]:
print(df_pheno.shape)

df_pheno_1 = df_pheno.merge(df_dem[[
    "Participant_ID","GENETIC_SEX","AGE","DATE_OF_ONSET","DATE_OF_DEATH","AGE_AT_ONSET",
    "DAYS_SINCE_ONSET","ANCESTRY","COGNITIVE_SYMPTOMS_SEVERITY_PAST_WEEK", 
    "SPECIFIC_COGNITIVE_ABILITY_2014","SPECIFIC_COGNITIVE_ABILITY_2019", 
    "TOUCHSCREEN_COGNITIVE_DURATION_2014","TOUCHSCREEN_COGNITIVE_DURATION_2019", 
    "COGNITIVE_TEST_WILLINGNESS","COGNITIVE_TEST_WILLINGNESS", "COGNITIVE_TEST_WILLINGNESS",
]], on="Participant_ID", how="inner")


(130, 4)


In [64]:
display(df_pheno_1)
df_pheno_1.to_csv("pheno.csv")


Unnamed: 0,Variant_ID_Full,Variant_ID,Participant_ID,Protein_change,GENETIC_SEX,AGE,DATE_OF_ONSET,DATE_OF_DEATH,AGE_AT_ONSET,DAYS_SINCE_ONSET,ANCESTRY,COGNITIVE_SYMPTOMS_SEVERITY_PAST_WEEK,SPECIFIC_COGNITIVE_ABILITY_2014,SPECIFIC_COGNITIVE_ABILITY_2019,TOUCHSCREEN_COGNITIVE_DURATION_2014,TOUCHSCREEN_COGNITIVE_DURATION_2019,COGNITIVE_TEST_WILLINGNESS,COGNITIVE_TEST_WILLINGNESS.1,COGNITIVE_TEST_WILLINGNESS.2
0,chr1:155235769:G:A,chr1:155235769,3520686,p.R347C,Male,78.0,2017-03-23,2017-08-11,78.0,141,EUR,,,,,,Begin games,Begin games,Begin games
1,chr1:155236249:A:G,chr1:155236249,3646248,p.I320T,Male,77.0,2021-09-12,2021-10-06,77.0,24,EUR,,,,,,Begin games,Begin games,Begin games
2,chr1:155236249:A:G,chr1:155236249,2703400,p.I320T,Male,67.0,2013-05-04,2013-05-07,67.0,3,EUR,,,,,,Begin games,Begin games,Begin games
3,chr1:155236262:T:G,chr1:155236262,2114999,p.S316R,Female,73.0,2017-12-01,2018-05-24,72.0,174,EUR,,,,,,Begin games,Begin games,Begin games
4,chr1:155236262:T:G,chr1:155236262,5365927,p.S316R,Male,63.0,2014-12-22,2015-11-15,62.0,328,EUR,,,,,,Begin games,Begin games,Begin games
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,chr21:25982424:G:A,chr21:25982424,1682723,p.P251S,Male,83.0,2022-07-19,2022-07-19,83.0,0,EUR,,,,,,Begin games,Begin games,Begin games
126,chr21:26000138:T:C,chr21:26000138,2354670,p.I248V,Female,82.0,2022-07-26,,80.0,743,EUR,,,,,,Begin games,Begin games,Begin games
127,chr21:26021858:C:G,chr21:26021858,3620157,p.V227L,Female,59.0,2017-10-30,2017-12-07,59.0,38,EUR,,,,,,Begin games,Begin games,Begin games
128,chr21:26021912:C:T,chr21:26021912,4998524,p.A209T,Male,65.0,2020-10-06,2020-11-26,65.0,51,EUR,No problem,,,,,Begin games,Begin games,Begin games


## Find variants expressed by anyone in the dataset

In [None]:
%%bash

for cohort in {"AD","RD","Control"};
do
    for ancestry in {"AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"};
    do
        dx run swiss-army-knife \
        -iin="/wgs_analysis/data/protective_variants.txt" \
        -iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
        -iin="/wgs_analysis/data/ID_Files/${cohort}_${ancestry}.txt" \
        -iin="/wgs_analysis/results/6_frequencies/${cohort}_${ancestry}.afreq" \
        -icmd="plink2 --vcf normalized.vcf --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --extract protective_variants.txt --keep ${cohort}_${ancestry}.txt --read-freq ${cohort}_${ancestry}.afreq --export A --het --out ${cohort}_${ancestry}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/12_protective_variants"
    done
done


In [18]:
cohort = "AD"
ancestry = "AAC"
! dx download wgs_analysis/results/12_protective_variants/{cohort}_{ancestry}.raw --overwrite
df = pd.read_csv(f"{cohort}_{ancestry}.raw", sep="\t")
df.drop(["FID","PAT","MAT","SEX","PHENOTYPE"], axis=1, inplace=True)
df.columns = df.columns.str.split('_').str[0]
for var_id in df.columns[1:]:
    participant_ids = list(df["IID"][df[var_id] < 2])
    if len(participant_ids) > 0:
        with open(f"{cohort}_{ancestry}.txt", "w") as file:
            for item in participant_ids:
                file.write(f"{item}\n")
        ! dx upload {cohort}_{ancestry}.txt --path wgs_analysis/results/12_protective_variants/{var_id.replace(":","_")}/{cohort}_{ancestry}.txt
display(df)


chr19_44892887_C_T
ID                                file-Gq1zVg8JYBPv69029zV67Pjj
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44892887_C_T
Name                              AD_AAC.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Tue Aug 20 02:01:01 2024
Created by                        spencermg3
 via the job                      job-Gq1v4KjJYBPx11zB9b3F0BF3
Last modified                     Tue Aug 20 02:01:01 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
chr19_44905307_A_T
ID                                file-Gq1zVgQJYBPYF

Unnamed: 0,IID,chr19:44892887:C:T,chr19:44905307:A:T,chr19:44908756:C:A,chr19:44908756:C:T,chr21:25897620:C:T,chr21:26171645:A:G,chr21:26171723:T:C
0,2592890,1,2,2,2,2,2,2
1,3219025,1,2,2,2,2,2,2
2,5343805,2,1,2,2,2,2,2
3,5405583,2,1,2,2,2,2,2
4,1831922,1,1,2,2,2,2,2
5,1294579,2,0,2,2,2,2,2
6,3043640,2,1,2,2,2,1,1
7,4976837,2,1,2,2,2,2,2
8,4595192,2,2,2,2,2,2,2
9,5255423,1,1,2,2,2,2,2


In [None]:
for cohort in ["AD","RD","Control"]:
    for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
        ! dx download wgs_analysis/results/12_protective_variants/{cohort}_{ancestry}.raw --overwrite
        if os.path.exists(f"{cohort}_{ancestry}.raw"):
            df = pd.read_csv(f"{cohort}_{ancestry}.raw", sep="\t")
            df.drop(["FID","PAT","MAT","SEX","PHENOTYPE"], axis=1, inplace=True)
            df.columns = df.columns.str.split('_').str[0]
            for var_id in df.columns[1:]:
                participant_ids = list(df["IID"][df[var_id] < 2])
                if len(participant_ids) > 0:
                    with open(f"{cohort}_{ancestry}.txt", "w") as file:
                        for item in participant_ids:
                            file.write(f"{item}\n")
                    ! dx upload {cohort}_{ancestry}.txt --path wgs_analysis/results/12_protective_variants/{var_id.replace(":","_")}/{cohort}_{ancestry}.txt


ID                                file-Gq1zb2jJYBPYFf7xPX00QGV2
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44892887_C_T
Name                              AD_AAC.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Tue Aug 20 02:08:11 2024
Created by                        spencermg3
 via the job                      job-Gq1v4KjJYBPx11zB9b3F0BF3
Last modified                     Tue Aug 20 02:08:48 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq1zbG8JYBPq7J1ygzFB6vpv
Class                     

## APOE genotyping for each variant

In [9]:
%%bash

#for var_id in {"chr19_44892887_C_T","chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"};
for var_id in {"chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"};
do
    files=($(ls ../../mnt/project/wgs_analysis/results/12_protective_variants/${var_id}/*.txt 2>/dev/null))
    
    for file in "${files[@]}";
    do
        filename=$(basename "$file" .txt)
        
        dx run swiss-army-knife \
            -iin="/wgs_analysis/results/4_normalized/normalized.vcf" \
            -iin="/wgs_analysis/results/12_protective_variants/${var_id}/${filename}.txt" \
            -icmd="plink2 --vcf normalized.vcf --chr 19 --from-bp 44905791 --to-bp 44909393 --keep ${filename}.txt --set-all-var-ids 'chr@:#:\$r:\$a' --new-id-max-allele-len 999 --make-bed --out ${filename}" \
            --instance-type mem1_hdd1_v2_x16 \
            --destination "${projectid}:/wgs_analysis/results/12_protective_variants/${var_id}"
    done
done



Using input JSON:
{
    "cmd": "plink2 --vcf normalized.vcf --chr 19 --from-bp 44905791 --to-bp 44909393 --keep AD_AAC.txt --set-all-var-ids 'chr@:#:$r:$a' --new-id-max-allele-len 999 --make-bed --out AD_AAC",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gp9X55QJ8z7kk4qBFkKZv54p"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq1zbG8JYBPq7J1ygzFB6vpv"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_protective_variants/chr19_44905307_
  A_T

Job ID: job-Gq2Pp98JYBPbkJb00PzFjBpp

Using input JSON:
{
    "cmd": "plink2 --vcf normalized.vcf --chr 19 --from-bp 44905791 --to-bp 44909393 --keep AD_AFR.txt --set-all-var-ids 'chr@:#:$r:$a' --new-id-max-allele-len 999 --make-bed --o

In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [7]:
%%bash

for var_id in {"chr19_44892887_C_T","chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"};
do
    files=($(ls ../../mnt/project/wgs_analysis/results/12_protective_variants/${var_id}/*.bim 2>/dev/null))
    
    for file in "${files[@]}";
    do
        filename=$(basename "$file" .bim)
        
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/12_protective_variants/${var_id}/${filename}.bim" \
        -iin="/wgs_analysis/results/12_protective_variants/${var_id}/${filename}.bed" \
        -iin="/wgs_analysis/results/12_protective_variants/${var_id}/${filename}.fam" \
        -iin="/wgs_analysis/data/apoe_variants.txt" \
        -icmd="plink --bfile ${filename} --extract apoe_variants.txt --make-bed --out apoe_snps_${filename}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/12_protective_variants/${var_id}"
    done
done


dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_SAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile AD_AAC --extract apoe_variants.txt --make-bed --out apoe_snps_AD_AAC",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2K608JpbqXZkqZBz0vxBYb"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2K608JpbqxyPxGQyvQk75K"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2K608JpbqxKBqQvQ8kxk1Z"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_AAC.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile AD_MDE --extract apoe_variants.txt --make-bed --out apoe_snps_AD_MDE",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXXjJkzz9QgBbZjbyJK3X"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXXjJkzz2b7G635ZjF1pk"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXXjJkzz1pX90JQkxGV20"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_AFR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile AD_SAS --extract apoe_variants.txt --make-bed --out apoe_snps_AD_SAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2Vg30JZGV3kqZqgG1z6Vy9"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2Vg30JZGV98xZ7G59yG4X5"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2Vg30JZGV342XyZF6pp7gJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_AJ.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_AAC --extract apoe_variants.txt --make-bed --out apoe_snps_Control_AAC",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VG98JYzq3Z982v2vp66qg"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VG98JYzqFJ2vbvj5pQypJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VG98JYzq82XFpb3zZ2jB9"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_AMR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_AJ --extract apoe_variants.txt --make-bed --out apoe_snps_Control_AJ",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VjQjJygZKvbJkVK8gV7fG"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VjQjJygZGfXb0Gkxb67XB"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VjQjJygZ0qZp25bbqqGPf"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_CAH.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_AMR --extract apoe_variants.txt --make-bed --out apoe_snps_Control_AMR",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKqQJpB2bPq62qfxQjFk3"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKqQJpB2VJFY2f4jx69gJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKqQJpB2kb93K3fJZX9p5"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_CAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_CAH --extract apoe_variants.txt --make-bed --out apoe_snps_Control_CAH",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VjfjJz0P0YyBJFf7QQpfG"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VjjQJz0P2jQFqbfzb0Ybp"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VjfjJz0P7YbkxY88ZqJ8K"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_EAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_CAS --extract apoe_variants.txt --make-bed --out apoe_snps_Control_CAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGfQJK24f0x1yQ0BYjGkP"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGfQJK24VpX90JQkxGQPK"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGfQJK24qqJzZZVYv7bgZ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_EUR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_EAS --extract apoe_variants.txt --make-bed --out apoe_snps_Control_EAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ1QJGpYqk6ZPPJpypPfz"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ1QJGpYk614q5k5Xkxq3"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ1QJGpYp2Q641z0Q2v8v"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_MDE.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_EUR --extract apoe_variants.txt --make-bed --out apoe_snps_Control_EUR",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ6QJ28Zqx5Fy5gfQg958"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ6QJ28Zp2Q641z0Q2v93"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ6QJ28ZVfY2YYYBv7vPP"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_SAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_FIN --extract apoe_variants.txt --make-bed --out apoe_snps_Control_FIN",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VP00JY4fB2Q641z0Q2vPv"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VP20JY4f2Y98Bf9kpPF5z"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VP00JY4fJ6k0vvzJy72B3"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_EUR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_MDE --extract apoe_variants.txt --make-bed --out apoe_snps_Control_MDE",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJpjJQb4q06KvB1YYP23p"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJpjJQb4Q0JGvG9bYJV6P"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJpjJQb4bVg15VpYzxjpZ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_EUR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_SAS --extract apoe_variants.txt --make-bed --out apoe_snps_Control_SAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXx0JYB6ggz68F4gyVqJq"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXx0JYB6YKxfQzZXfvv0b"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXx0JYB6k4jXpj9YjzJZ7"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_AAC.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile RD_AAC --extract apoe_variants.txt --make-bed --out apoe_snps_RD_AAC",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VQFjJKQjX79pf2KzkGp7v"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VQFjJKQjp8Vj8gzyZZg69"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VQFjJKQjVBYZ4Gj8VPQY6"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_AFR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile RD_AFR --extract apoe_variants.txt --make-bed --out apoe_snps_RD_AFR",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ00J2F5v5vpXXfxxvYFZ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJ00J2F5v5vpXXfxxvYFX"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGzQJ2F5Z1f0VBZpgG241"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_AJ.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile RD_AJ --extract apoe_variants.txt --make-bed --out apoe_snps_RD_AJ",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKPjJ04yX7kVjq5Kykj57"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKX8J04ypvJk8k19yK0QJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKPjJ04yzJ0b43Z9kYk4V"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_protec

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_CAH.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile RD_AMR --extract apoe_variants.txt --make-bed --out apoe_snps_RD_AMR",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKVjJ7z6qG0JVbz0Z7152"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKVjJ7z6jjKj5Pj1gXq96"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VKVjJ7z6pFPj9f05zvFQ7"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_CAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile RD_CAH --extract apoe_variants.txt --make-bed --out apoe_snps_RD_CAH",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXjjJXKZ7gz68F4gyVqJV"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXjjJXKZ9yjqx4VGZkzqZ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VXjjJXKZFQ80vyVYbxK22"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_EAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile RD_EAS --extract apoe_variants.txt --make-bed --out apoe_snps_RD_EAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGg8J91JkYK3k1Y7qpG9q"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGg8J91JY42XyZF6pkY4V"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VGg8J91Jg2Y7YQfBqvf5B"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_EUR.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_CAH --extract apoe_variants.txt --make-bed --out apoe_snps_Control_CAH",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VYj0J83PQZ5kqBjvYVBpf"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VYj0J83PvqkgzzG9v609F"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VYj0J83PbPq62qfxQjGG8"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_SAS.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_AAC.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_AFR.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_AJ.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/Control_AMR.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input fie


Using input JSON:
{
    "cmd": "plink --bfile Control_CAS --extract apoe_variants.txt --make-bed --out apoe_snps_Control_CAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VQ80Jfvzv5vpXXfxxvZ3k"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VQ80Jfvzbxbx6kqYV3x27"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VQ80JfvzpyV38637pBkzV"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_CAH.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_EAS --extract apoe_variants.txt --make-bed --out apoe_snps_Control_EAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJyjJzyK39jV3VVFz7k6B"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJyjJzyK3yQ74Yf3q9yfJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VJyjJzyK7FyJK5zZvgZ7k"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_CAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_MDE --extract apoe_variants.txt --make-bed --out apoe_snps_Control_MDE",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VpJ8JXq6VK4YKgB3k4bbp"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VpJ8JXq6xGKKKYZ4Y7xVJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VpJ8JXq6YvV86yz4zPZyy"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_EAS.bim" to a name or ID



Using input JSON:
{
    "cmd": "plink --bfile Control_SAS --extract apoe_variants.txt --make-bed --out apoe_snps_Control_SAS",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VbfQJy3QG2Z68p7kpJgf8"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VbfQJy3QF0pKv7Y3ZjZV5"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VbfQJy3QPbjFQ20Bp7ZqJ"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/resul

dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_EUR.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/RD_SAS.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_AAC.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_AFR.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be parsed as file: could not resolve "/wgs_analysis/results/12_protective_variants/AD_AJ.bim" to a name or ID
dxpy.exceptions.DXCLIError: Value provided for input field "in" could not be


Using input JSON:
{
    "cmd": "plink --bfile RD_AAC --extract apoe_variants.txt --make-bed --out apoe_snps_RD_AAC",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VPyQJ3Y21BYZ4Gj8VPQQ3"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VPyQJ3Y2B63Pxq87jvJjp"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2VPyQJ3Y2PbjFQ20Bp7Z2x"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prot

In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [9]:
%%bash

for var_id in {"chr19_44892887_C_T","chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"};
do
    files=($(ls ../../mnt/project/wgs_analysis/results/12_protective_variants/${var_id}/*.bim 2>/dev/null))
    
    for file in "${files[@]}";
    do
        filename=$(basename "$file" .bim)
        
        dx run swiss-army-knife \
        -iin="/wgs_analysis/results/12_protective_variants/${var_id}/apoe_snps_${filename}.bim" \
        -iin="/wgs_analysis/results/12_protective_variants/${var_id}/apoe_snps_${filename}.bed" \
        -iin="/wgs_analysis/results/12_protective_variants/${var_id}/apoe_snps_${filename}.fam" \
        -iin="/wgs_analysis/data/apoe_variants.txt" \
        -icmd="plink --bfile apoe_snps_${filename} --recode compound-genotypes --out apoe_snps_${filename}" \
        --instance-type mem1_hdd1_v2_x16 \
        --destination "${projectid}:/wgs_analysis/results/12_protective_variants/${var_id}"
    done
done



Using input JSON:
{
    "cmd": "plink --bfile apoe_snps_AD_AAC --recode compound-genotypes --out apoe_snps_AD_AAC",
    "in": [
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2z68QJY6B8qY4VYKxv0Jzx"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2z68QJY6B8Yxjxz4gZJkxB"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gq2z68QJY6BPpx2fX06j8vK2"
            }
        },
        {
            "$dnanexus_link": {
                "project": "project-Gbgg70jJYBPf3yZ26F3GVFjx",
                "id": "file-Gky57B8JYBPy8ZV1GfFG444b"
            }
        }
    ]
}

Calling app-GkvkZK89gxxfJBYgQX76pY8g with output destination project-Gbgg70jJY
  BPf3yZ26F3GVFjx:/wgs_analysis/results/12_prote

In [None]:
"""
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------ PAUSE HERE UNTIL PREVIOUS STEP COMPLETES ------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------
"""

In [4]:
for var_id in ["chr19_44892887_C_T","chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"]:
    ! mkdir {var_id}
    
    ! dx download wgs_analysis/results/12_protective_variants/{var_id}/*.ped --overwrite
    ! mv *.ped {var_id}/
    



In [8]:
! dx download wgs_analysis/data/APOE_genotypes_PLINK_ped.py --overwrite
for var_id in ["chr19_44892887_C_T","chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"]:
    for ped_file in glob.glob(f"{var_id}/*"):
        ! python APOE_genotypes_PLINK_ped.py -i {ped_file} -o {ped_file.replace("snps","final")[:-4]}
        ! dx upload {ped_file.replace("snps","final")[:-4]}.APOE_GENOTYPES.csv --path wgs_analysis/results/12_protective_variants/{ped_file.replace("snps","final")[:-4]}.APOE_GENOTYPES.csv
        

Your complete genotype file has been saved here: chr19_44892887_C_T/apoe_final_AD_AAC.APOE_GENOTYPES.csv
The summary counts have been saved here: chr19_44892887_C_T/apoe_final_AD_AAC.APOE_SUMMARY.csv
Thanks!
ID                                file-Gq360BjJYBPk6P6YFQzGfQFq
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44892887_C_T
Name                              apoe_final_AD_AAC.APOE_GENOTYPES.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Wed Aug 21 21:46:19 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Wed Aug 21 21:4

In [13]:
for var_id in ["chr19_44892887_C_T","chr19_44905307_A_T","chr19_44908756_C_A","chr21_25897620_C_T","chr21_26171645_A_G","chr21_26171723_T_C"]:
    print(var_id)
    for cohort in ["AD","RD","Control"]:
        print(cohort)
        cohort_counts = []
        included_ancestries = []
        for ancestry in ["AAC","AFR","AJ","AMR","CAH","CAS","EAS","EUR","FIN","MDE","SAS"]:
            if os.path.exists(f"{var_id}/apoe_final_{cohort}_{ancestry}.APOE_GENOTYPES.csv"):
                apoe_geno = pd.read_csv(f"{var_id}/apoe_final_{cohort}_{ancestry}.APOE_GENOTYPES.csv")
                counts = apoe_geno['APOE_GENOTYPE'].value_counts()
                counts = counts.reindex(["e1/e1", "e1/e2", "e1/e4", "e2/e2", "e2/e3", "e2/e4 or e1/e3", "e3/e3", "e3/e4", "e4/e4"], fill_value=0)
                counts = counts.rename(ancestry)
                counts = pd.concat([counts, pd.Series([counts.sum()], index=['total'])])
                cohort_counts.append(counts)
                included_ancestries.append(ancestry)
        if len(cohort_counts) > 0:
            cohort_counts = pd.concat(cohort_counts, axis=1)
            cohort_counts.columns = included_ancestries
            display(cohort_counts)
            cohort_percentages = cohort_counts.div(cohort_counts.loc["total"], axis=1) * 100
            cohort_percentages.loc["total"] = cohort_counts.loc["total"]
            display(cohort_percentages)
            cohort_combined = cohort_counts.applymap(str) + ' (' + cohort_percentages.applymap(lambda x: f'{x:.2f}%') + ')'
            cohort_combined.loc["total"] = cohort_counts.loc["total"]
            display(cohort_combined)
            cohort_counts.to_csv(f"{var_id}/{cohort}_apoe_genotype_counts.csv")
            cohort_percentages.to_csv(f"{var_id}/{cohort}_apoe_genotype_percentages.csv")
            cohort_combined.to_csv(f"{var_id}/{cohort}_apoe_genotype_combined.csv")
            ! dx upload {var_id}/{cohort}_apoe_genotype_counts.csv --path wgs_analysis/results/12_protective_variants/{var_id}/{cohort}_apoe_genotype_counts.csv
            ! dx upload {var_id}/{cohort}_apoe_genotype_percentages.csv --path wgs_analysis/results/12_protective_variants/{var_id}/{cohort}_apoe_genotype_percentages.csv
            ! dx upload {var_id}/{cohort}_apoe_genotype_combined.csv --path wgs_analysis/results/12_protective_variants/{var_id}/{cohort}_apoe_genotype_combined.csv


chr19_44892887_C_T
AD


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,0,0,0
e2/e3,0,2,0,0,0,0,5,0,0
e2/e4 or e1/e3,0,2,0,0,0,0,78,0,0
e3/e3,0,0,1,0,0,0,85,0,3
e3/e4,1,4,11,1,2,2,1457,1,19
e4/e4,3,1,2,0,1,0,520,1,3
total,4,9,14,1,3,2,2145,2,25


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e3,0.0,22.222222,0.0,0.0,0.0,0.0,0.2331,0.0,0.0
e2/e4 or e1/e3,0.0,22.222222,0.0,0.0,0.0,0.0,3.636364,0.0,0.0
e3/e3,0.0,0.0,7.142857,0.0,0.0,0.0,3.962704,0.0,12.0
e3/e4,25.0,44.444444,78.571429,100.0,66.666667,100.0,67.925408,50.0,76.0
e4/e4,75.0,11.111111,14.285714,0.0,33.333333,0.0,24.242424,50.0,12.0
total,4.0,9.0,14.0,1.0,3.0,2.0,2145.0,2.0,25.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e3,0 (0.00%),2 (22.22%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),5 (0.23%),0 (0.00%),0 (0.00%)
e2/e4 or e1/e3,0 (0.00%),2 (22.22%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),78 (3.64%),0 (0.00%),0 (0.00%)
e3/e3,0 (0.00%),0 (0.00%),1 (7.14%),0 (0.00%),0 (0.00%),0 (0.00%),85 (3.96%),0 (0.00%),3 (12.00%)
e3/e4,1 (25.00%),4 (44.44%),11 (78.57%),1 (100.00%),2 (66.67%),2 (100.00%),1457 (67.93%),1 (50.00%),19 (76.00%)
e4/e4,3 (75.00%),1 (11.11%),2 (14.29%),0 (0.00%),1 (33.33%),0 (0.00%),520 (24.24%),1 (50.00%),3 (12.00%)
total,4,9,14,1,3,2,2145,2,25


ID                                file-Gq38548JYBPY9jV3VVF0111B
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44892887_C_T
Name                              AD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:05 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:06 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq3854jJYBPjp8Z6qvZ1B8q5
Class    

Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,0,0,0,0
e2/e3,1,2,0,0,1,0,14,0,0,1
e2/e4 or e1/e3,0,1,1,2,0,1,117,0,0,0
e3/e3,2,6,0,2,0,0,186,0,1,1
e3/e4,2,4,9,7,5,0,1386,1,0,14
e4/e4,3,0,1,1,1,0,359,0,0,4
total,8,13,11,12,7,1,2062,1,1,20


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e3,12.5,15.384615,0.0,0.0,14.285714,0.0,0.678952,0.0,0.0,5.0
e2/e4 or e1/e3,0.0,7.692308,9.090909,16.666667,0.0,100.0,5.674103,0.0,0.0,0.0
e3/e3,25.0,46.153846,0.0,16.666667,0.0,0.0,9.020369,0.0,100.0,5.0
e3/e4,25.0,30.769231,81.818182,58.333333,71.428571,0.0,67.216295,100.0,0.0,70.0
e4/e4,37.5,0.0,9.090909,8.333333,14.285714,0.0,17.410281,0.0,0.0,20.0
total,8.0,13.0,11.0,12.0,7.0,1.0,2062.0,1.0,1.0,20.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e3,1 (12.50%),2 (15.38%),0 (0.00%),0 (0.00%),1 (14.29%),0 (0.00%),14 (0.68%),0 (0.00%),0 (0.00%),1 (5.00%)
e2/e4 or e1/e3,0 (0.00%),1 (7.69%),1 (9.09%),2 (16.67%),0 (0.00%),1 (100.00%),117 (5.67%),0 (0.00%),0 (0.00%),0 (0.00%)
e3/e3,2 (25.00%),6 (46.15%),0 (0.00%),2 (16.67%),0 (0.00%),0 (0.00%),186 (9.02%),0 (0.00%),1 (100.00%),1 (5.00%)
e3/e4,2 (25.00%),4 (30.77%),9 (81.82%),7 (58.33%),5 (71.43%),0 (0.00%),1386 (67.22%),1 (100.00%),0 (0.00%),14 (70.00%)
e4/e4,3 (37.50%),0 (0.00%),1 (9.09%),1 (8.33%),1 (14.29%),0 (0.00%),359 (17.41%),0 (0.00%),0 (0.00%),4 (20.00%)
total,8,13,11,12,7,1,2062,1,1,20


ID                                file-Gq3855QJYBPzJ0b43Z9BKJ05
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44892887_C_T
Name                              RD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:10 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:11 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq38560JYBPjp8Z6qvZ1B8qX
Class    

Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0,0,0
e2/e2,0,1,0,0,0,0,0,0,0,0,0
e2/e3,5,12,3,0,1,0,0,242,0,0,0
e2/e4 or e1/e3,4,7,6,0,6,3,2,960,0,0,4
e3/e3,7,39,19,0,12,8,5,2322,1,3,59
e3/e4,11,23,48,8,21,22,23,8913,1,4,69
e4/e4,2,2,3,0,4,0,1,865,0,1,9
total,29,84,79,8,44,33,31,13302,2,8,141


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,1.190476,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e3,17.241379,14.285714,3.797468,0.0,2.272727,0.0,0.0,1.819275,0.0,0.0,0.0
e2/e4 or e1/e3,13.793103,8.333333,7.594937,0.0,13.636364,9.090909,6.451613,7.21696,0.0,0.0,2.836879
e3/e3,24.137931,46.428571,24.050633,0.0,27.272727,24.242424,16.129032,17.456022,50.0,37.5,41.843972
e3/e4,37.931034,27.380952,60.759494,100.0,47.727273,66.666667,74.193548,67.004962,50.0,50.0,48.93617
e4/e4,6.896552,2.380952,3.797468,0.0,9.090909,0.0,3.225806,6.502782,0.0,12.5,6.382979
total,29.0,84.0,79.0,8.0,44.0,33.0,31.0,13302.0,2.0,8.0,141.0


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),1 (1.19%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e3,5 (17.24%),12 (14.29%),3 (3.80%),0 (0.00%),1 (2.27%),0 (0.00%),0 (0.00%),242 (1.82%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e4 or e1/e3,4 (13.79%),7 (8.33%),6 (7.59%),0 (0.00%),6 (13.64%),3 (9.09%),2 (6.45%),960 (7.22%),0 (0.00%),0 (0.00%),4 (2.84%)
e3/e3,7 (24.14%),39 (46.43%),19 (24.05%),0 (0.00%),12 (27.27%),8 (24.24%),5 (16.13%),2322 (17.46%),1 (50.00%),3 (37.50%),59 (41.84%)
e3/e4,11 (37.93%),23 (27.38%),48 (60.76%),8 (100.00%),21 (47.73%),22 (66.67%),23 (74.19%),8913 (67.00%),1 (50.00%),4 (50.00%),69 (48.94%)
e4/e4,2 (6.90%),2 (2.38%),3 (3.80%),0 (0.00%),4 (9.09%),0 (0.00%),1 (3.23%),865 (6.50%),0 (0.00%),1 (12.50%),9 (6.38%)
total,29,84,79,8,44,33,31,13302,2,8,141


ID                                file-Gq3856jJYBPgp85fYz93yVBK
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44892887_C_T
Name                              Control_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:15 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:16 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq38578JYBPjp8Z6qvZ1B8qy
Clas

Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,8,0,0
e2/e3,1,0,0,0,0,0,141,0,1
e2/e4 or e1/e3,0,3,0,0,0,0,59,0,0
e3/e3,1,7,3,2,1,1,367,4,10
e3/e4,2,12,5,1,3,0,298,1,2
e4/e4,5,10,0,0,0,0,6,1,0
total,9,32,8,3,4,1,879,6,13


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.910125,0.0,0.0
e2/e3,11.111111,0.0,0.0,0.0,0.0,0.0,16.040956,0.0,7.692308
e2/e4 or e1/e3,0.0,9.375,0.0,0.0,0.0,0.0,6.712173,0.0,0.0
e3/e3,11.111111,21.875,37.5,66.666667,25.0,100.0,41.751991,66.666667,76.923077
e3/e4,22.222222,37.5,62.5,33.333333,75.0,0.0,33.902162,16.666667,15.384615
e4/e4,55.555556,31.25,0.0,0.0,0.0,0.0,0.682594,16.666667,0.0
total,9.0,32.0,8.0,3.0,4.0,1.0,879.0,6.0,13.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),8 (0.91%),0 (0.00%),0 (0.00%)
e2/e3,1 (11.11%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),141 (16.04%),0 (0.00%),1 (7.69%)
e2/e4 or e1/e3,0 (0.00%),3 (9.38%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),59 (6.71%),0 (0.00%),0 (0.00%)
e3/e3,1 (11.11%),7 (21.88%),3 (37.50%),2 (66.67%),1 (25.00%),1 (100.00%),367 (41.75%),4 (66.67%),10 (76.92%)
e3/e4,2 (22.22%),12 (37.50%),5 (62.50%),1 (33.33%),3 (75.00%),0 (0.00%),298 (33.90%),1 (16.67%),2 (15.38%)
e4/e4,5 (55.56%),10 (31.25%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),6 (0.68%),1 (16.67%),0 (0.00%)
total,9,32,8,3,4,1,879,6,13


ID                                file-Gq38580JYBPq1y2GX9bJ56Z8
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44905307_A_T
Name                              AD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:20 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:21 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq3858QJYBPjp8Z6qvZ1B8xf
Class    

Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,0,20,0,0
e2/e3,0,1,1,0,0,0,0,295,1,4
e2/e4 or e1/e3,3,3,1,0,0,0,0,105,0,0
e3/e3,3,11,14,1,4,7,1,670,6,12
e3/e4,5,14,2,1,2,3,0,277,1,3
e4/e4,5,8,0,0,0,0,0,6,0,1
total,16,37,18,2,6,10,1,1373,8,20


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.456664,0.0,0.0
e2/e3,0.0,2.702703,5.555556,0.0,0.0,0.0,0.0,21.485798,12.5,20.0
e2/e4 or e1/e3,18.75,8.108108,5.555556,0.0,0.0,0.0,0.0,7.647487,0.0,0.0
e3/e3,18.75,29.72973,77.777778,50.0,66.666667,70.0,100.0,48.798252,75.0,60.0
e3/e4,31.25,37.837838,11.111111,50.0,33.333333,30.0,0.0,20.1748,12.5,15.0
e4/e4,31.25,21.621622,0.0,0.0,0.0,0.0,0.0,0.436999,0.0,5.0
total,16.0,37.0,18.0,2.0,6.0,10.0,1.0,1373.0,8.0,20.0


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),20 (1.46%),0 (0.00%),0 (0.00%)
e2/e3,0 (0.00%),1 (2.70%),1 (5.56%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),295 (21.49%),1 (12.50%),4 (20.00%)
e2/e4 or e1/e3,3 (18.75%),3 (8.11%),1 (5.56%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),105 (7.65%),0 (0.00%),0 (0.00%)
e3/e3,3 (18.75%),11 (29.73%),14 (77.78%),1 (50.00%),4 (66.67%),7 (70.00%),1 (100.00%),670 (48.80%),6 (75.00%),12 (60.00%)
e3/e4,5 (31.25%),14 (37.84%),2 (11.11%),1 (50.00%),2 (33.33%),3 (30.00%),0 (0.00%),277 (20.17%),1 (12.50%),3 (15.00%)
e4/e4,5 (31.25%),8 (21.62%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),6 (0.44%),0 (0.00%),1 (5.00%)
total,16,37,18,2,6,10,1,1373,8,20


ID                                file-Gq3859QJYBPX6JX5yk31F4Fx
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44905307_A_T
Name                              RD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:26 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:26 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq3859jJYBPX6JX5yk31F4GB
Class    

Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0,0,0
e2/e2,0,0,2,0,1,1,1,299,0,0,1
e2/e3,11,18,21,2,6,11,0,4789,1,3,30
e2/e4 or e1/e3,6,17,7,0,2,2,0,748,0,0,3
e3/e3,20,83,146,12,34,65,6,9191,6,29,167
e3/e4,22,89,21,4,14,6,4,1810,1,2,25
e4/e4,5,15,0,0,0,0,1,12,0,0,1
total,64,222,197,18,57,85,12,16849,8,34,227


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,1.015228,0.0,1.754386,1.176471,8.333333,1.774586,0.0,0.0,0.440529
e2/e3,17.1875,8.108108,10.659898,11.111111,10.526316,12.941176,0.0,28.423052,12.5,8.823529,13.215859
e2/e4 or e1/e3,9.375,7.657658,3.553299,0.0,3.508772,2.352941,0.0,4.439433,0.0,0.0,1.321586
e3/e3,31.25,37.387387,74.111675,66.666667,59.649123,76.470588,50.0,54.549231,75.0,85.294118,73.568282
e3/e4,34.375,40.09009,10.659898,22.222222,24.561404,7.058824,33.333333,10.742477,12.5,5.882353,11.013216
e4/e4,7.8125,6.756757,0.0,0.0,0.0,0.0,8.333333,0.071221,0.0,0.0,0.440529
total,64.0,222.0,197.0,18.0,57.0,85.0,12.0,16849.0,8.0,34.0,227.0


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),2 (1.02%),0 (0.00%),1 (1.75%),1 (1.18%),1 (8.33%),299 (1.77%),0 (0.00%),0 (0.00%),1 (0.44%)
e2/e3,11 (17.19%),18 (8.11%),21 (10.66%),2 (11.11%),6 (10.53%),11 (12.94%),0 (0.00%),4789 (28.42%),1 (12.50%),3 (8.82%),30 (13.22%)
e2/e4 or e1/e3,6 (9.38%),17 (7.66%),7 (3.55%),0 (0.00%),2 (3.51%),2 (2.35%),0 (0.00%),748 (4.44%),0 (0.00%),0 (0.00%),3 (1.32%)
e3/e3,20 (31.25%),83 (37.39%),146 (74.11%),12 (66.67%),34 (59.65%),65 (76.47%),6 (50.00%),9191 (54.55%),6 (75.00%),29 (85.29%),167 (73.57%)
e3/e4,22 (34.38%),89 (40.09%),21 (10.66%),4 (22.22%),14 (24.56%),6 (7.06%),4 (33.33%),1810 (10.74%),1 (12.50%),2 (5.88%),25 (11.01%)
e4/e4,5 (7.81%),15 (6.76%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),1 (8.33%),12 (0.07%),0 (0.00%),0 (0.00%),1 (0.44%)
total,64,222,197,18,57,85,12,16849,8,34,227


ID                                file-Gq385BjJYBPq1y2GX9bJ56Zq
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44905307_A_T
Name                              Control_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:31 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:32 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385F8JYBPb05fV03x8z7bQ
Clas

Unnamed: 0,EUR
e1/e1,0
e1/e2,0
e1/e4,0
e2/e2,0
e2/e3,0
e2/e4 or e1/e3,0
e3/e3,3
e3/e4,1
e4/e4,0
total,4


Unnamed: 0,EUR
e1/e1,0.0
e1/e2,0.0
e1/e4,0.0
e2/e2,0.0
e2/e3,0.0
e2/e4 or e1/e3,0.0
e3/e3,75.0
e3/e4,25.0
e4/e4,0.0
total,4.0


Unnamed: 0,EUR
e1/e1,0 (0.00%)
e1/e2,0 (0.00%)
e1/e4,0 (0.00%)
e2/e2,0 (0.00%)
e2/e3,0 (0.00%)
e2/e4 or e1/e3,0 (0.00%)
e3/e3,3 (75.00%)
e3/e4,1 (25.00%)
e4/e4,0 (0.00%)
total,4


ID                                file-Gq385G0JYBPkBqx8Gk11gGJg
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr19_44908756_C_A
Name                              Control_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:36 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:37 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385GQJYBPYj1g24ZzB535f
Clas

Unnamed: 0,EUR
e1/e1,0
e1/e2,0
e1/e4,0
e2/e2,0
e2/e3,0
e2/e4 or e1/e3,0
e3/e3,1
e3/e4,0
e4/e4,0
total,1


Unnamed: 0,EUR
e1/e1,0.0
e1/e2,0.0
e1/e4,0.0
e2/e2,0.0
e2/e3,0.0
e2/e4 or e1/e3,0.0
e3/e3,100.0
e3/e4,0.0
e4/e4,0.0
total,1.0


Unnamed: 0,EUR
e1/e1,0 (0.00%)
e1/e2,0 (0.00%)
e1/e4,0 (0.00%)
e2/e2,0 (0.00%)
e2/e3,0 (0.00%)
e2/e4 or e1/e3,0 (0.00%)
e3/e3,1 (100.00%)
e3/e4,0 (0.00%)
e4/e4,0 (0.00%)
total,1


ID                                file-Gq385J8JYBPb05fV03x8z7bk
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_25897620_C_T
Name                              Control_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:41 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:42 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385JjJYBPgfJXKzpk98fFp
Clas

Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,1,0
e2/e3,0,2,0,0,0,0,8,0
e2/e4 or e1/e3,0,3,0,0,0,0,3,0
e3/e3,0,1,1,2,1,3,58,3
e3/e4,0,7,0,0,0,0,82,0
e4/e4,2,2,0,0,0,0,25,0
total,2,15,1,2,1,3,177,3


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.564972,0.0
e2/e3,0.0,13.333333,0.0,0.0,0.0,0.0,4.519774,0.0
e2/e4 or e1/e3,0.0,20.0,0.0,0.0,0.0,0.0,1.694915,0.0
e3/e3,0.0,6.666667,100.0,100.0,100.0,100.0,32.768362,100.0
e3/e4,0.0,46.666667,0.0,0.0,0.0,0.0,46.327684,0.0
e4/e4,100.0,13.333333,0.0,0.0,0.0,0.0,14.124294,0.0
total,2.0,15.0,1.0,2.0,1.0,3.0,177.0,3.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),1 (0.56%),0 (0.00%)
e2/e3,0 (0.00%),2 (13.33%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),8 (4.52%),0 (0.00%)
e2/e4 or e1/e3,0 (0.00%),3 (20.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),3 (1.69%),0 (0.00%)
e3/e3,0 (0.00%),1 (6.67%),1 (100.00%),2 (100.00%),1 (100.00%),3 (100.00%),58 (32.77%),3 (100.00%)
e3/e4,0 (0.00%),7 (46.67%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),82 (46.33%),0 (0.00%)
e4/e4,2 (100.00%),2 (13.33%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),25 (14.12%),0 (0.00%)
total,2,15,1,2,1,3,177,3


ID                                file-Gq385KQJYBPvJ3FY4Q1J5FFv
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_26171645_A_G
Name                              AD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:46 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:47 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385P0JYBPgfjQbQQp194PB
Class    

Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,3,0
e2/e3,1,3,0,0,1,1,17,2
e2/e4 or e1/e3,1,2,0,1,0,0,6,0
e3/e3,1,9,1,2,3,3,124,3
e3/e4,4,8,0,4,1,0,74,4
e4/e4,1,2,0,0,1,0,22,2
total,8,24,1,7,6,4,246,11


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,1.219512,0.0
e2/e3,12.5,12.5,0.0,0.0,16.666667,25.0,6.910569,18.181818
e2/e4 or e1/e3,12.5,8.333333,0.0,14.285714,0.0,0.0,2.439024,0.0
e3/e3,12.5,37.5,100.0,28.571429,50.0,75.0,50.406504,27.272727
e3/e4,50.0,33.333333,0.0,57.142857,16.666667,0.0,30.081301,36.363636
e4/e4,12.5,8.333333,0.0,0.0,16.666667,0.0,8.943089,18.181818
total,8.0,24.0,1.0,7.0,6.0,4.0,246.0,11.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),3 (1.22%),0 (0.00%)
e2/e3,1 (12.50%),3 (12.50%),0 (0.00%),0 (0.00%),1 (16.67%),1 (25.00%),17 (6.91%),2 (18.18%)
e2/e4 or e1/e3,1 (12.50%),2 (8.33%),0 (0.00%),1 (14.29%),0 (0.00%),0 (0.00%),6 (2.44%),0 (0.00%)
e3/e3,1 (12.50%),9 (37.50%),1 (100.00%),2 (28.57%),3 (50.00%),3 (75.00%),124 (50.41%),3 (27.27%)
e3/e4,4 (50.00%),8 (33.33%),0 (0.00%),4 (57.14%),1 (16.67%),0 (0.00%),74 (30.08%),4 (36.36%)
e4/e4,1 (12.50%),2 (8.33%),0 (0.00%),0 (0.00%),1 (16.67%),0 (0.00%),22 (8.94%),2 (18.18%)
total,8,24,1,7,6,4,246,11


ID                                file-Gq385PjJYBPb05fV03x8z7fP
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_26171645_A_G
Name                              RD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:51 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:52 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385Q8JYBPb05fV03x8z7fb
Class    

Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,0,19,0,0,0
e2/e3,5,25,0,0,2,3,3,309,0,0,6
e2/e4 or e1/e3,4,11,0,0,2,0,1,52,0,0,1
e3/e3,18,74,2,3,14,15,41,1546,1,1,73
e3/e4,7,36,1,0,7,7,9,535,0,0,15
e4/e4,1,9,0,0,2,1,1,35,0,0,2
total,35,155,3,3,27,26,55,2496,1,1,97


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.761218,0.0,0.0,0.0
e2/e3,14.285714,16.129032,0.0,0.0,7.407407,11.538462,5.454545,12.379808,0.0,0.0,6.185567
e2/e4 or e1/e3,11.428571,7.096774,0.0,0.0,7.407407,0.0,1.818182,2.083333,0.0,0.0,1.030928
e3/e3,51.428571,47.741935,66.666667,100.0,51.851852,57.692308,74.545455,61.939103,100.0,100.0,75.257732
e3/e4,20.0,23.225806,33.333333,0.0,25.925926,26.923077,16.363636,21.434295,0.0,0.0,15.463918
e4/e4,2.857143,5.806452,0.0,0.0,7.407407,3.846154,1.818182,1.402244,0.0,0.0,2.061856
total,35.0,155.0,3.0,3.0,27.0,26.0,55.0,2496.0,1.0,1.0,97.0


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),19 (0.76%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e3,5 (14.29%),25 (16.13%),0 (0.00%),0 (0.00%),2 (7.41%),3 (11.54%),3 (5.45%),309 (12.38%),0 (0.00%),0 (0.00%),6 (6.19%)
e2/e4 or e1/e3,4 (11.43%),11 (7.10%),0 (0.00%),0 (0.00%),2 (7.41%),0 (0.00%),1 (1.82%),52 (2.08%),0 (0.00%),0 (0.00%),1 (1.03%)
e3/e3,18 (51.43%),74 (47.74%),2 (66.67%),3 (100.00%),14 (51.85%),15 (57.69%),41 (74.55%),1546 (61.94%),1 (100.00%),1 (100.00%),73 (75.26%)
e3/e4,7 (20.00%),36 (23.23%),1 (33.33%),0 (0.00%),7 (25.93%),7 (26.92%),9 (16.36%),535 (21.43%),0 (0.00%),0 (0.00%),15 (15.46%)
e4/e4,1 (2.86%),9 (5.81%),0 (0.00%),0 (0.00%),2 (7.41%),1 (3.85%),1 (1.82%),35 (1.40%),0 (0.00%),0 (0.00%),2 (2.06%)
total,35,155,3,3,27,26,55,2496,1,1,97


ID                                file-Gq385V0JYBPgp85fYz93yVG4
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_26171645_A_G
Name                              Control_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:13:56 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:13:57 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385VQJYBPq1y2GX9bJ56bJ
Clas

Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,1,0
e2/e3,0,2,0,0,0,0,8,0
e2/e4 or e1/e3,0,3,0,0,0,0,3,0
e3/e3,0,1,1,2,1,3,58,3
e3/e4,0,7,0,0,0,0,83,0
e4/e4,2,2,0,0,0,0,26,0
total,2,15,1,2,1,3,179,3


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.558659,0.0
e2/e3,0.0,13.333333,0.0,0.0,0.0,0.0,4.469274,0.0
e2/e4 or e1/e3,0.0,20.0,0.0,0.0,0.0,0.0,1.675978,0.0
e3/e3,0.0,6.666667,100.0,100.0,100.0,100.0,32.402235,100.0
e3/e4,0.0,46.666667,0.0,0.0,0.0,0.0,46.368715,0.0
e4/e4,100.0,13.333333,0.0,0.0,0.0,0.0,14.52514,0.0
total,2.0,15.0,1.0,2.0,1.0,3.0,179.0,3.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),1 (0.56%),0 (0.00%)
e2/e3,0 (0.00%),2 (13.33%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),8 (4.47%),0 (0.00%)
e2/e4 or e1/e3,0 (0.00%),3 (20.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),3 (1.68%),0 (0.00%)
e3/e3,0 (0.00%),1 (6.67%),1 (100.00%),2 (100.00%),1 (100.00%),3 (100.00%),58 (32.40%),3 (100.00%)
e3/e4,0 (0.00%),7 (46.67%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),83 (46.37%),0 (0.00%)
e4/e4,2 (100.00%),2 (13.33%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),26 (14.53%),0 (0.00%)
total,2,15,1,2,1,3,179,3


ID                                file-Gq385X8JYBPq1y2GX9bJ56bP
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_26171723_T_C
Name                              AD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:14:01 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:14:02 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385XjJYBPgp85fYz93yVGB
Class    

Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,3,0
e2/e3,1,3,0,0,1,1,17,2
e2/e4 or e1/e3,1,2,0,1,0,0,6,0
e3/e3,1,9,1,2,3,3,124,3
e3/e4,5,8,0,4,1,0,74,4
e4/e4,1,3,0,0,1,0,23,2
total,9,25,1,7,6,4,247,11


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,1.214575,0.0
e2/e3,11.111111,12.0,0.0,0.0,16.666667,25.0,6.882591,18.181818
e2/e4 or e1/e3,11.111111,8.0,0.0,14.285714,0.0,0.0,2.42915,0.0
e3/e3,11.111111,36.0,100.0,28.571429,50.0,75.0,50.202429,27.272727
e3/e4,55.555556,32.0,0.0,57.142857,16.666667,0.0,29.959514,36.363636
e4/e4,11.111111,12.0,0.0,0.0,16.666667,0.0,9.311741,18.181818
total,9.0,25.0,1.0,7.0,6.0,4.0,247.0,11.0


Unnamed: 0,AAC,AFR,AJ,CAH,CAS,EAS,EUR,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),3 (1.21%),0 (0.00%)
e2/e3,1 (11.11%),3 (12.00%),0 (0.00%),0 (0.00%),1 (16.67%),1 (25.00%),17 (6.88%),2 (18.18%)
e2/e4 or e1/e3,1 (11.11%),2 (8.00%),0 (0.00%),1 (14.29%),0 (0.00%),0 (0.00%),6 (2.43%),0 (0.00%)
e3/e3,1 (11.11%),9 (36.00%),1 (100.00%),2 (28.57%),3 (50.00%),3 (75.00%),124 (50.20%),3 (27.27%)
e3/e4,5 (55.56%),8 (32.00%),0 (0.00%),4 (57.14%),1 (16.67%),0 (0.00%),74 (29.96%),4 (36.36%)
e4/e4,1 (11.11%),3 (12.00%),0 (0.00%),0 (0.00%),1 (16.67%),0 (0.00%),23 (9.31%),2 (18.18%)
total,9,25,1,7,6,4,247,11


ID                                file-Gq385YQJYBPvJ3FY4Q1J5FJ4
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_26171723_T_C
Name                              RD_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:14:06 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:14:07 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385Z0JYBPb05fV03x8z7gX
Class    

Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0,0,0,0,0,0,0,0,0,0,0
e1/e2,0,0,0,0,0,0,0,0,0,0,0
e1/e4,0,0,0,0,0,0,0,0,0,0,0
e2/e2,0,0,0,0,0,0,0,19,0,0,0
e2/e3,5,25,0,0,2,3,3,309,0,0,6
e2/e4 or e1/e3,4,11,0,0,2,0,1,52,0,0,1
e3/e3,18,74,2,3,15,15,41,1548,1,2,73
e3/e4,7,36,1,0,7,7,9,537,0,0,15
e4/e4,1,9,0,0,2,1,1,35,0,0,2
total,35,155,3,3,28,26,55,2500,1,2,97


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e1/e4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e2/e2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.76,0.0,0.0,0.0
e2/e3,14.285714,16.129032,0.0,0.0,7.142857,11.538462,5.454545,12.36,0.0,0.0,6.185567
e2/e4 or e1/e3,11.428571,7.096774,0.0,0.0,7.142857,0.0,1.818182,2.08,0.0,0.0,1.030928
e3/e3,51.428571,47.741935,66.666667,100.0,53.571429,57.692308,74.545455,61.92,100.0,100.0,75.257732
e3/e4,20.0,23.225806,33.333333,0.0,25.0,26.923077,16.363636,21.48,0.0,0.0,15.463918
e4/e4,2.857143,5.806452,0.0,0.0,7.142857,3.846154,1.818182,1.4,0.0,0.0,2.061856
total,35.0,155.0,3.0,3.0,28.0,26.0,55.0,2500.0,1.0,2.0,97.0


Unnamed: 0,AAC,AFR,AJ,AMR,CAH,CAS,EAS,EUR,FIN,MDE,SAS
e1/e1,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e1/e4,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e2,0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),0 (0.00%),19 (0.76%),0 (0.00%),0 (0.00%),0 (0.00%)
e2/e3,5 (14.29%),25 (16.13%),0 (0.00%),0 (0.00%),2 (7.14%),3 (11.54%),3 (5.45%),309 (12.36%),0 (0.00%),0 (0.00%),6 (6.19%)
e2/e4 or e1/e3,4 (11.43%),11 (7.10%),0 (0.00%),0 (0.00%),2 (7.14%),0 (0.00%),1 (1.82%),52 (2.08%),0 (0.00%),0 (0.00%),1 (1.03%)
e3/e3,18 (51.43%),74 (47.74%),2 (66.67%),3 (100.00%),15 (53.57%),15 (57.69%),41 (74.55%),1548 (61.92%),1 (100.00%),2 (100.00%),73 (75.26%)
e3/e4,7 (20.00%),36 (23.23%),1 (33.33%),0 (0.00%),7 (25.00%),7 (26.92%),9 (16.36%),537 (21.48%),0 (0.00%),0 (0.00%),15 (15.46%)
e4/e4,1 (2.86%),9 (5.81%),0 (0.00%),0 (0.00%),2 (7.14%),1 (3.85%),1 (1.82%),35 (1.40%),0 (0.00%),0 (0.00%),2 (2.06%)
total,35,155,3,3,28,26,55,2500,1,2,97


ID                                file-Gq385ZjJYBPgfJXKzpk98fKx
Class                             file
Project                           project-Gbgg70jJYBPf3yZ26F3GVFjx
Folder                            /wgs_analysis/results/12_protective_variants/chr21_26171723_T_C
Name                              Control_apoe_genotype_counts.csv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Thu Aug 22 00:14:11 2024
Created by                        spencermg3
 via the job                      job-Gq34GgQJYBPxJQFZ2Y16j8KX
Last modified                     Thu Aug 22 00:14:12 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
ID                                file-Gq385b8JYBPYj1g24ZzB537K
Clas