In [None]:
import os
import sys
import argparse 
import math
import time
# ! pip install h5py
# import h5py
import joblib
import subprocess
import numpy as np
import pandas as pd
import tables
import statsmodels.api as sm
from scipy import stats
# ! pip install --upgrade tables

# 0. Getting PRS and Generating --addit file 

In [None]:
# Merge PRS with transcriptomics.
PRS_df = pd.read_csv("output_prs_prs.csv")
PRS_df.rename(columns={"IID":"ID"}, inplace=True)
old_addit_df = pd.read_csv("OG_PPMI_Only_Combined_Clinical_Omics.csv")
PRS_with_addits_df = PRS_df.merge(old_addit_df, on="ID", how='inner')
PRS_df.to_csv("PRS.csv", index=False)
PRS_with_addits_df.to_csv("PRS_and_addits.csv", index=False)

# 1. Start the munging for different p-value thresholds plus PRS

In [None]:
%%bash
genoml discrete supervised munge --p 0.01 --prefix PPMI_Only_genetics_p1E2_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
genoml discrete supervised munge --p 0.001 --prefix PPMI_Only_genetics_p1E3_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
genoml discrete supervised munge --p 0.0001 --prefix PPMI_Only_genetics_p1E4_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
genoml discrete supervised munge --p 0.00001 --prefix PPMI_Only_genetics_p1E5_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
genoml discrete supervised munge --p 0.000001 --prefix PPMI_Only_genetics_p1E6_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
genoml discrete supervised munge --p 0.0000001 --prefix PPMI_Only_genetics_p1E7_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
genoml discrete supervised munge --p 0.00000001 --prefix PPMI_Only_genetics_p1E8_with_PRS --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no

# 2. Train it

In [None]:
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E2_with_PRS
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E3_with_PRS
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E4_with_PRS
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E5_with_PRS
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E6_with_PRS
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E7_with_PRS
genoml discrete supervised train --prefix PPMI_Only_genetics_p1E8_with_PRS

# 3. Tune it

In [None]:
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E2_with_PRS
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E3_with_PRS
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E4_with_PRS
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E5_with_PRS
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E6_with_PRS
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E7_with_PRS
genoml discrete supervised tune --prefix PPMI_Only_genetics_p1E8_with_PRS

# 4. Some weird MAF issues (lower in reference data than in AMP-PD, MAF < 0.01)
Let's rerun this using the PRS and the SNPs filtered at MAF > 0.05

In [None]:
plink --bfile AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only --maf 0.05 --make-bed --out AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05

In [None]:
# Genoml time again.
%%bash
## Now with a differnet MAF.
# Munge it.
# genoml discrete supervised munge --p 0.01 --prefix ./p1E2/PPMI_Only_genetics_p1E2_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
# genoml discrete supervised munge --p 0.001 --prefix ./p1E3/PPMI_Only_genetics_p1E3_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
# genoml discrete supervised munge --p 0.0001 --prefix ./p1E4/PPMI_Only_genetics_p1E4_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
# genoml discrete supervised munge --p 0.00001 --prefix ./p1E5/PPMI_Only_genetics_p1E5_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
# genoml discrete supervised munge --p 0.000001 --prefix ./p1E6/PPMI_Only_genetics_p1E6_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
# genoml discrete supervised munge --p 0.0000001 --prefix ./p1E7/PPMI_Only_genetics_p1E7_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
# genoml discrete supervised munge --p 0.00000001 --prefix ./p1E8/PPMI_Only_genetics_p1E8_with_PRS-MAF05 --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --addit PRS.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no

# Train it.
# genoml discrete supervised train --prefix ./p1E2/PPMI_Only_genetics_p1E2_with_PRS-MAF05
# genoml discrete supervised train --prefix ./p1E3/PPMI_Only_genetics_p1E3_with_PRS-MAF05
# genoml discrete supervised train --prefix ./p1E4/PPMI_Only_genetics_p1E4_with_PRS-MAF05
# genoml discrete supervised train --prefix ./p1E5/PPMI_Only_genetics_p1E5_with_PRS-MAF05
# genoml discrete supervised train --prefix ./p1E6/PPMI_Only_genetics_p1E6_with_PRS-MAF05
# genoml discrete supervised train --prefix ./p1E7/PPMI_Only_genetics_p1E7_with_PRS-MAF05
# genoml discrete supervised train --prefix ./p1E8/PPMI_Only_genetics_p1E8_with_PRS-MAF05

# Tune it.
genoml discrete supervised tune --prefix ./p1E2/PPMI_Only_genetics_p1E2_with_PRS-MAF05
genoml discrete supervised tune --prefix ./p1E3/PPMI_Only_genetics_p1E3_with_PRS-MAF05
genoml discrete supervised tune --prefix ./p1E4/PPMI_Only_genetics_p1E4_with_PRS-MAF05
genoml discrete supervised tune --prefix ./p1E5/PPMI_Only_genetics_p1E5_with_PRS-MAF05
genoml discrete supervised tune --prefix ./p1E6/PPMI_Only_genetics_p1E6_with_PRS-MAF05
genoml discrete supervised tune --prefix ./p1E7/PPMI_Only_genetics_p1E7_with_PRS-MAF05
genoml discrete supervised tune --prefix ./p1E8/PPMI_Only_genetics_p1E8_with_PRS-MAF05

# 5. Let's check thresholds on adjusted transcriptomic data
After this we'll have thresholds in genomics and genetics (right now its looking like MAF > 0.05 & P <1E-6 for genetics + PRS)

In [None]:
diffexp_df = pd.read_csv("diff_exp_results_11022020.csv")
old_addit_df = pd.read_csv("PDBP_and_OG_PPMI_Transcriptomics_ALLTRANSCRIPTS_diffExp_ProteinCoding_ADJUSTED10PCs.csv")

p1E2_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-2]
p1E3_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-3]
p1E4_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-4]
p1E5_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-5]
p1E6_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-6]
p1E7_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-7]
p1E8_df = diffexp_df[diffexp_df['P_VAL'] <= 1E-8]

p1E2_list = ['ID'] + p1E2_df['PREDICTOR'].unique().tolist()
p1E3_list = ['ID'] + p1E3_df['PREDICTOR'].unique().tolist()
p1E4_list = ['ID'] + p1E4_df['PREDICTOR'].unique().tolist()
p1E5_list = ['ID'] + p1E5_df['PREDICTOR'].unique().tolist()
p1E6_list = ['ID'] + p1E6_df['PREDICTOR'].unique().tolist()
p1E7_list = ['ID'] + p1E7_df['PREDICTOR'].unique().tolist()
p1E8_list = ['ID'] + p1E8_df['PREDICTOR'].unique().tolist()

addit_p1E2_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E2_list)]]
addit_p1E3_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E3_list)]]
addit_p1E4_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E4_list)]]
addit_p1E5_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E5_list)]]
addit_p1E6_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E6_list)]]
addit_p1E7_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E7_list)]]
addit_p1E8_df = old_addit_df[old_addit_df.columns[old_addit_df.columns.isin(p1E8_list)]]

addit_p1E2_df.to_csv("addit_p1E2.csv", index=False)
addit_p1E3_df.to_csv("addit_p1E3.csv", index=False)
addit_p1E4_df.to_csv("addit_p1E4.csv", index=False)
addit_p1E5_df.to_csv("addit_p1E5.csv", index=False)
addit_p1E6_df.to_csv("addit_p1E6.csv", index=False)
addit_p1E7_df.to_csv("addit_p1E7.csv", index=False)
addit_p1E8_df.to_csv("addit_p1E8.csv", index=False)

# 6. Now we head back to just running GenoML!
Same directories different prefixes

In [None]:
%%bash

for i in p1E7 p1E6 p1E5 p1E4 p1E3
do
    echo "[UPDATE] MUNGING DATA AT P VAL LESS THAN --> " ${i}
    genoml discrete supervised munge --pheno OG_PPMI_Only_PHENO.csv --addit addit_${i}.csv --prefix ./${i}/PPMI_Only_transcriptomics_only-${i} --impute mean --feature_selection 500
    echo "[UPDATE] TRAINNING MODELS AND COMPETING ALGORITHMS AT P VAL LESS THAN --> " ${i}
    genoml discrete supervised train --prefix ./${i}/PPMI_Only_transcriptomics_only-${i}
    echo "[UPDATE] TUNING THE WINNING MODEL AND ALGORITHM AT P VAL LESS THAN --> " ${i}
    genoml discrete supervised tune --prefix ./${i}/PPMI_Only_transcriptomics_only-${i}
done

# 7. Combined model time
It is possible to fit these to the equivalent PDBP dataset, harmonize and test. Best one will be the production model to share witht he community as it has the best chances of working well on other datasets

In [None]:
# Make some directories for new models. This is all possible combos of genetic (with PRS) and genomic data P thresholds + clinico demographics.
%%bash
for i in p1E2 p1E3 p1E4 p1E5 p1E6 p1E7 p1E8
do
    mkdir ./genetic_${i}_omic_p1E2/
    mkdir ./genetic_${i}_omic_p1E3/
    mkdir ./genetic_${i}_omic_p1E4/
    mkdir ./genetic_${i}_omic_p1E5/
    mkdir ./genetic_${i}_omic_p1E6/
    mkdir ./genetic_${i}_omic_p1E7/
    mkdir ./genetic_${i}_omic_p1E8/
done

In [None]:
# Now place a custom built "addit" file in each directory. This includes the PRS plus the omics at the specified P threshold plus clinicodemographic data. Genetic data will be handled from the same file for all at munging.

PRS_clinical_df = pd.read_csv("PRS_and_addits.csv")
PRS_clinical_reduced_df = PRS_clinical_df[['ID','PRS90','AGE','MALE','EDUCATION','FAMILY_HISTORY','UPSIT','InfAJ']]
PRS_clinical_reduced_df.head()

addit_df = pd.read_csv("addit_p1E8.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E8.csv", index=False)

addit_df = pd.read_csv("addit_p1E7.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E7.csv", index=False)

addit_df = pd.read_csv("addit_p1E6.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E6.csv", index=False)

addit_df = pd.read_csv("addit_p1E5.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E5.csv", index=False)

addit_df = pd.read_csv("addit_p1E4.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E4.csv", index=False)

addit_df = pd.read_csv("addit_p1E3.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E3.csv", index=False)

addit_df = pd.read_csv("addit_p1E2.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PRS_clinical_omics-p1E2.csv", index=False)

# 7. Big time GenoML runs
We have 7 P thresholds for genetics and 7 for genomics. Thats a lot of modeling ... 49 to be exact.
Let's make these combined models and get it going.

In [None]:
# Munge data.
%%bash

for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised munge --p 0.00000001 --prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
    genoml discrete supervised munge --p 0.0000001 --prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
    genoml discrete supervised munge --p 0.000001 --prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
    genoml discrete supervised munge --p 0.00001 --prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
    genoml discrete supervised munge --p 0.0001 --prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
    genoml discrete supervised munge --p 0.001 --prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
    genoml discrete supervised munge --p 0.01 --prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i} --addit PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_OG_PPMI_Only-MAF05 --pheno OG_PPMI_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --feature_selection 500 --target_features SNPs_rsIDs_target_features.txt --confounders OG_PPMI_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no
done

In [None]:
# Train it.
%%bash

for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised train --prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i}
    genoml discrete supervised train --prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i}
    genoml discrete supervised train --prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i}
    genoml discrete supervised train --prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i}
    genoml discrete supervised train --prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i}
    genoml discrete supervised train --prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i}
    genoml discrete supervised train --prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i}
done

In [None]:
# Tune it.
%%bash

for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised tune --prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i} --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i} --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i} --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i} --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i} --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i} --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i} --max_tune 25
done

# 8. Harmonize the PDBP (aka TEST) dataset

In [None]:
# First, make the MAF-05 PDBP plink files to harmonize
%%bash

plink --bfile AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only --maf 0.05 --make-bed --out ./../../AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05

In [None]:
%%bash

for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml harmonize --test_prefix ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i} --training_snps_alleles ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i}.variants_and_alleles.tab
    genoml harmonize --test_prefix ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i} --training_snps_alleles ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i}.variants_and_alleles.tab
    genoml harmonize --test_prefix ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i} --training_snps_alleles ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i}.variants_and_alleles.tab
    genoml harmonize --test_prefix ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i} --training_snps_alleles ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i}.variants_and_alleles.tab
    genoml harmonize --test_prefix ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i} --training_snps_alleles ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i}.variants_and_alleles.tab
    genoml harmonize --test_prefix ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i} --training_snps_alleles ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i}.variants_and_alleles.tab
    genoml harmonize --test_prefix ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i} --test_geno_prefix AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --ref_model_prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i} --training_snps_alleles ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i}.variants_and_alleles.tab
done

# 9. Munge the PDBP (aka TEST) dataset on the PPMI (aka REFERENCE) dataset columns

In [None]:
# Merge PRS with transcriptomics for PDBP

PDBP_PRS_df = pd.read_csv("output_prs_prs.csv")
PDBP_PRS_df.rename(columns={"IID":"ID"}, inplace=True)
PDBP_old_addit_df = pd.read_csv("PDBP_Only_Combined_Clinical_Omics.csv")
PDBP_PRS_with_addits_df = PDBP_PRS_df.merge(PDBP_old_addit_df, on="ID", how='inner')
PDBP_PRS_df.to_csv("PDBP_PRS.csv", index=False)
PDBP_PRS_with_addits_df.to_csv("PDBP_PRS_and_addits.csv", index=False)

In [None]:
# No place a custom built "addit" file in each directory. This includes the PRS plus the omics at the specified P threshold plus clinicodemographic data. Genetic data will be handled from the same file for all at munging.
# addit threshold .csvs being read in are from a larger PPMI and PDBP file 

PRS_clinical_df = pd.read_csv("PDBP_PRS_and_addits.csv")
PRS_clinical_reduced_df = PRS_clinical_df[['ID','PRS90','AGE','MALE','EDUCATION','FAMILY_HISTORY','UPSIT','InfAJ']]
PRS_clinical_reduced_df.head()

addit_df = pd.read_csv("addit_p1E8.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E8.csv", index=False)

addit_df = pd.read_csv("addit_p1E7.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E7.csv", index=False)

addit_df = pd.read_csv("addit_p1E6.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E6.csv", index=False)

addit_df = pd.read_csv("addit_p1E5.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E5.csv", index=False)

addit_df = pd.read_csv("addit_p1E4.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E4.csv", index=False)

addit_df = pd.read_csv("addit_p1E3.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E3.csv", index=False)

addit_df = pd.read_csv("addit_p1E2.csv")
merged_df = PRS_clinical_reduced_df.merge(addit_df, on='ID', how='inner')
merged_df.to_csv("PDBP_PRS_clinical_omics-p1E2.csv", index=False)

In [None]:
%%bash

for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised munge --p 0.00000001 --prefix ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i}.refColsHarmonize_toKeep.txt
    genoml discrete supervised munge --p 0.0000001 --prefix ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i}.refColsHarmonize_toKeep.txt
    genoml discrete supervised munge --p 0.000001 --prefix ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i}.refColsHarmonize_toKeep.txt
    genoml discrete supervised munge --p 0.00001 --prefix ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i}.refColsHarmonize_toKeep.txt
    genoml discrete supervised munge --p 0.0001 --prefix ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i}.refColsHarmonize_toKeep.txt
    genoml discrete supervised munge --p 0.001 --prefix ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i}.refColsHarmonize_toKeep.txt
    genoml discrete supervised munge --p 0.01 --prefix ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i} --addit PDBP_PRS_clinical_omics-${i}.csv --geno AMP_Euro_sampleQC_variantQC_FINAL_PDBP_Only-MAF05 --pheno PDBP_Only_PHENO.csv --gwas GenoML_mod_META5_p005_maf001_90hits_Nov17.csv --impute mean --skip_prune no --r2_cutoff 0.1 --target_features SNPs_rsIDs_target_features.txt --confounders PDBP_Only_WGS_PCs_Confounders.csv --adjust_data yes --adjust_normalize yes --umap_reduce no --ref_cols_harmonize ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i}.refColsHarmonize_toKeep.txt
done


# 10. Re-train the reference dataset on the harmonized columns

In [None]:
%%bash
for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised train --prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i} --matching_columns ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i}.finalHarmonizedCols_toKeep.txt
    genoml discrete supervised train --prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i} --matching_columns ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i}.finalHarmonizedCols_toKeep.txt 
    genoml discrete supervised train --prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i} --matching_columns ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i}.finalHarmonizedCols_toKeep.txt
    genoml discrete supervised train --prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i} --matching_columns ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i}.finalHarmonizedCols_toKeep.txt
    genoml discrete supervised train --prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i} --matching_columns ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i}.finalHarmonizedCols_toKeep.txt
    genoml discrete supervised train --prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i} --matching_columns ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i}.finalHarmonizedCols_toKeep.txt
    genoml discrete supervised train --prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i} --matching_columns ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i}.finalHarmonizedCols_toKeep.txt
done

# 12. Test the Re-trained Models

In [None]:
%%bash
for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised test --prefix ./genetic_p1E8_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E8_omic_${i} --test_prefix ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i} --ref_model_prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i}.trainedModel
    genoml discrete supervised test --prefix ./genetic_p1E7_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E7_omic_${i} --test_prefix ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i} --ref_model_prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i}.trainedModel
    genoml discrete supervised test --prefix ./genetic_p1E6_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E6_omic_${i} --test_prefix ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i} --ref_model_prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i}.trainedModel
    genoml discrete supervised test --prefix ./genetic_p1E5_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E5_omic_${i} --test_prefix ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i} --ref_model_prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i}.trainedModel
    genoml discrete supervised test --prefix ./genetic_p1E4_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E4_omic_${i} --test_prefix ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i} --ref_model_prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i}.trainedModel
    genoml discrete supervised test --prefix ./genetic_p1E3_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E3_omic_${i} --test_prefix ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i} --ref_model_prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i}.trainedModel
    genoml discrete supervised test --prefix ./genetic_p1E2_omic_${i}/validate-PPMImodel-onPDBPdata-trainedModel-p1E2_omic_${i} --test_prefix ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i}  --ref_model_prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i}.trainedModel
done 

# 13. Tune the re-trained models 

In [None]:
%%bash
for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised tune --prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i} --matching_columns ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i} --matching_columns ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i} --matching_columns ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i} --matching_columns ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i} --matching_columns ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i} --matching_columns ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
    genoml discrete supervised tune --prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i} --matching_columns ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i}.finalHarmonizedCols_toKeep.txt --max_tune 25
done

# 14. Test the tuned models
Only if there's a tuned model! If the tuned model performs worse than the trained model, a tuned model is not generated (and therefore, cannot be tested)

33 models have made gains with tuning

In [None]:
%%bash
for i in p1E8 p1E7 p1E6 p1E5 p1E4 p1E3 p1E2
do
    genoml discrete supervised test --prefix ./genetic_p1E8_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E8_omic_${i} --test_prefix ./genetic_p1E8_omic_${i}/validate-PDBP-genetic_p1E8_omic_${i} --ref_model_prefix ./genetic_p1E8_omic_${i}/PPMI-genetic_p1E8_omic_${i}.tunedModel
    genoml discrete supervised test --prefix ./genetic_p1E7_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E7_omic_${i} --test_prefix ./genetic_p1E7_omic_${i}/validate-PDBP-genetic_p1E7_omic_${i} --ref_model_prefix ./genetic_p1E7_omic_${i}/PPMI-genetic_p1E7_omic_${i}.tunedModel
    genoml discrete supervised test --prefix ./genetic_p1E6_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E6_omic_${i} --test_prefix ./genetic_p1E6_omic_${i}/validate-PDBP-genetic_p1E6_omic_${i} --ref_model_prefix ./genetic_p1E6_omic_${i}/PPMI-genetic_p1E6_omic_${i}.tunedModel
    genoml discrete supervised test --prefix ./genetic_p1E5_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E5_omic_${i} --test_prefix ./genetic_p1E5_omic_${i}/validate-PDBP-genetic_p1E5_omic_${i} --ref_model_prefix ./genetic_p1E5_omic_${i}/PPMI-genetic_p1E5_omic_${i}.tunedModel
    genoml discrete supervised test --prefix ./genetic_p1E4_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E4_omic_${i} --test_prefix ./genetic_p1E4_omic_${i}/validate-PDBP-genetic_p1E4_omic_${i} --ref_model_prefix ./genetic_p1E4_omic_${i}/PPMI-genetic_p1E4_omic_${i}.tunedModel
    genoml discrete supervised test --prefix ./genetic_p1E3_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E3_omic_${i} --test_prefix ./genetic_p1E3_omic_${i}/validate-PDBP-genetic_p1E3_omic_${i} --ref_model_prefix ./genetic_p1E3_omic_${i}/PPMI-genetic_p1E3_omic_${i}.tunedModel
    genoml discrete supervised test --prefix ./genetic_p1E2_omic_${i}/validate-PPMImodel-onPDBPdata-tunedModel-p1E2_omic_${i} --test_prefix ./genetic_p1E2_omic_${i}/validate-PDBP-genetic_p1E2_omic_${i} --ref_model_prefix ./genetic_p1E2_omic_${i}/PPMI-genetic_p1E2_omic_${i}.tunedModel
done 