# Data Prep

In [None]:
# !pip install statsmodels
# !pip install -U seaborn
# !pip install statsmodels
# !pip install lifelines
# !pip install scikit-learn

In [None]:
# Imports here.
import numpy as np
import pandas as pd
import os
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import scipy
from scipy import stats
from statsmodels.stats.weightstats import ztest as ztest

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import fdrcorrection

from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt

In [None]:
# ! dx download PD_interaction_analysis_AUG_21_2023.csv
# ! dx download AD_NO_APOE_interaction_analysis_AUG_21_2023.csv
# ! dx download AD_with_APOE_interaction_analysis_AUG_21_2023.csv
# ! dx download ALS_with_tenure.csv
# # ! dx download DEMENTIA_with_tenure.csv
# # ! dx download VASCULAR_with_tenure.csv

# PD

In [None]:
df_pd = pd.read_csv('PD_interaction_analysis_april_30.csv')
df_pd

## PD and F51 Nonorganic sleep disorders (not due to a substance or known physiological condition)

In [None]:
ndd = 'PD'
variable = 'F51'
model = f'{variable} and {ndd} PRS interaction'
data = df_pd

this_formula = ndd + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, family=sm.families.Binomial(), data=data).fit()
#print(fitted.summary())

list_terms = [f'{variable}', f'interactor_{ndd}_{variable}', f'Z_score']
results = []
for i in list_terms:
    beta_coef  = fitted.params.loc[i]
    beta_se  = fitted.bse.loc[i]
    p_val = fitted.pvalues.loc[i]
    z_val = beta_coef/beta_se
    odds_ratio = np.exp(fitted.params.loc[i])
    conf = fitted.conf_int().loc[i]
    #m5, m95 = np.exp(conf)
    m5, m95 = conf
    #print(model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val)
    results.append((model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val))
output1 = pd.DataFrame(results, columns=('Model', 'Parameter','OR', 'Beta','SE', '95% CI low', "95% CI high", 'z', "P-value"))
output1

## PD and G47 sleep disorders (includes sleep apnea and sleep related movement disorders)

In [None]:
ndd = 'PD'
variable = 'G47'
model = f'{variable} and {ndd} PRS interaction'
data = df_pd

this_formula = ndd + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, family=sm.families.Binomial(), data=data).fit()
#print(fitted.summary())

list_terms = [f'{variable}', f'interactor_{ndd}_{variable}', f'Z_score']
results = []
for i in list_terms:
    beta_coef  = fitted.params.loc[i]
    beta_se  = fitted.bse.loc[i]
    p_val = fitted.pvalues.loc[i]
    z_val = beta_coef/beta_se
    odds_ratio = np.exp(fitted.params.loc[i])
    conf = fitted.conf_int().loc[i]
    #m5, m95 = np.exp(conf)
    m5, m95 = conf
    #print(model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val)
    results.append((model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val))
output2 = pd.DataFrame(results, columns=('Model', 'Parameter','OR', 'Beta','SE', '95% CI low', "95% CI high", 'z', "P-value"))
output2

# AD without APOE

In [None]:
df_ad_no_apoe = pd.read_csv(f'AD_NO_APOE_interaction_analysis_april_30.csv')
df_ad_no_apoe

## AD (no APOE in PRS) and F51 Nonorganic sleep disorders (not due to a substance or known physiological condition)

In [None]:
ndd = 'AD'
variable = 'F51'
model = f'{variable} and {ndd} PRS interaction (excluding APOE4)'
data = df_ad_no_apoe

this_formula = ndd + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, family=sm.families.Binomial(), data=data).fit()
#print(fitted.summary())

list_terms = [f'{variable}', f'interactor_{ndd}_{variable}', f'Z_score']
results = []
for i in list_terms:
    beta_coef  = fitted.params.loc[i]
    beta_se  = fitted.bse.loc[i]
    p_val = fitted.pvalues.loc[i]
    z_val = beta_coef/beta_se
    odds_ratio = np.exp(fitted.params.loc[i])
    conf = fitted.conf_int().loc[i]
    #m5, m95 = np.exp(conf)
    m5, m95 = conf
    #print(model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val)
    results.append((model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val))
output3 = pd.DataFrame(results, columns=('Model', 'Parameter','OR', 'Beta','SE', '95% CI low', "95% CI high", 'z', "P-value"))
output3

## AD (no APOE in PRS) and G47 sleep disorders (includes sleep apnea and sleep related movement disorders)

In [None]:
ndd = 'AD'
variable = 'G47'
model = f'{variable} and {ndd} PRS interaction (excluding APOE4)'
data = df_ad_no_apoe

this_formula = ndd + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, family=sm.families.Binomial(), data=data).fit()
#print(fitted.summary())

list_terms = [f'{variable}', f'interactor_{ndd}_{variable}', f'Z_score']
results = []
for i in list_terms:
    beta_coef  = fitted.params.loc[i]
    beta_se  = fitted.bse.loc[i]
    p_val = fitted.pvalues.loc[i]
    z_val = beta_coef/beta_se
    odds_ratio = np.exp(fitted.params.loc[i])
    conf = fitted.conf_int().loc[i]
    #m5, m95 = np.exp(conf)
    m5, m95 = conf
    #print(model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val)
    results.append((model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val))
output4 = pd.DataFrame(results, columns=('Model', 'Parameter','OR', 'Beta','SE', '95% CI low', "95% CI high", 'z', "P-value"))
output4

# AD with APOE

In [None]:
df_ad_with_apoe = pd.read_csv(f'AD_with_APOE_interaction_analysis_april_30.csv')

## AD (includes APOE in PRS) and F51 Nonorganic sleep disorders (not due to a substance or known physiological condition)

In [None]:
ndd = 'AD'
variable = 'F51'
model = f'{variable} and {ndd} PRS interaction (with APOE4)'
data = df_ad_with_apoe

this_formula = ndd + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, family=sm.families.Binomial(), data=data).fit()
#print(fitted.summary())

list_terms = [f'{variable}', f'interactor_{ndd}_{variable}', f'Z_score']
results = []
for i in list_terms:
    beta_coef  = fitted.params.loc[i]
    beta_se  = fitted.bse.loc[i]
    p_val = fitted.pvalues.loc[i]
    z_val = beta_coef/beta_se
    odds_ratio = np.exp(fitted.params.loc[i])
    conf = fitted.conf_int().loc[i]
    #m5, m95 = np.exp(conf)
    m5, m95 = conf
    #print(model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val)
    results.append((model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val))
output5 = pd.DataFrame(results, columns=('Model', 'Parameter','OR', 'Beta','SE', '95% CI low', "95% CI high", 'z', "P-value"))
output5

## AD (includes APOE in PRS) and G47 sleep disorders (includes sleep apnea and sleep related movement disorders)

In [None]:
ndd = 'AD'
variable = 'G47'
model = f'{variable} and {ndd} PRS interaction (with APOE4)'
data = df_ad_with_apoe

this_formula = ndd + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, family=sm.families.Binomial(), data=data).fit()
#print(fitted.summary())

list_terms = [f'{variable}', f'interactor_{ndd}_{variable}', f'Z_score']
results = []
for i in list_terms:
    beta_coef  = fitted.params.loc[i]
    beta_se  = fitted.bse.loc[i]
    p_val = fitted.pvalues.loc[i]
    z_val = beta_coef/beta_se
    odds_ratio = np.exp(fitted.params.loc[i])
    conf = fitted.conf_int().loc[i]
    #m5, m95 = np.exp(conf)
    m5, m95 = conf
    #print(model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val)
    results.append((model, i, odds_ratio, beta_coef, beta_se, m5, m95, z_val, p_val))
output6 = pd.DataFrame(results, columns=('Model', 'Parameter','OR', 'Beta','SE', '95% CI low', "95% CI high", 'z', "P-value"))
output6

In [None]:
# Final output
final_output = pd.concat([output1, output2, output3, output4, output5, output6])
final_output

In [None]:
final_output.to_csv('final_PRS_sleep_updated_interaction_may_2024.csv', header = True, index = False)

In [None]:
! dx upload final_PRS_sleep_updated_interaction_may_2024.csv --path /data/interaction/final_PRS_sleep_updated_interaction_may_2024.csv

# Age at Onset

In [None]:
#See what the model looks like when we switch to a linear model and use age at onset of PD or AD as the outcome

In [None]:
#Select only cases
pd_cases = df_pd[df_pd['PD']==1]
ad_cases_no_apoe = df_ad_no_apoe[df_ad_no_apoe['AD']==1]
ad_cases_with_apoe = df_ad_no_apoe[df_ad_with_apoe['AD']==1]

## PD

In [None]:
ndd = 'PD'
variable = 'F51'

this_formula = 'AAO' + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, data=pd_cases).fit()
print(fitted.summary())

In [None]:
ndd = 'PD'
variable = 'G47'

this_formula = 'AAO' + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, data=pd_cases).fit()
print(fitted.summary())

## AD without APOE in PRS

In [None]:
ndd = 'AD'
variable = 'F51'

this_formula = 'AAO' + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, data=ad_cases_no_apoe).fit()
print(fitted.summary())

In [None]:
ndd = 'AD'
variable = 'G47'

this_formula = 'AAO' + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, data=ad_cases_no_apoe).fit()
print(fitted.summary())

## AD cases with APOE in PRS

In [None]:
ndd = 'AD'
variable = 'F51'

this_formula = 'AAO' + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, data=ad_cases_with_apoe).fit()
print(fitted.summary())

In [None]:
ndd = 'AD'
variable = 'G47'

this_formula = 'AAO' + f"~ {variable} + interactor_{ndd}_{variable} + Z_score + Z_age + GENETIC_SEX + TOWNSEND + Z_PC1 +Z_PC2 +Z_PC3 +Z_PC4 +Z_PC5"
fitted = sm.formula.glm(formula=this_formula, data=ad_cases_with_apoe).fit()
print(fitted.summary())