Investigating whether significant differences in AD biomarkers exist between fast and slow progressors.

In [None]:
import pandas as pd
import numpy as np
import os
from scipy.stats import chi2_contingency, mannwhitneyu
import statsmodels.stats.api as sms

### ADNI data prep

In [None]:
adni = pd.read_csv("", index_col=0)
a_labels = pd.read_csv("", index_col=0)

adni = a_labels.join(adni)

### NACC data prep

In [None]:
n_labels = pd.read_csv("", index_col=0).rename(columns={"0":"Cluster"})

In [None]:
nacc = pd.read_csv("", index_col=0)

In [None]:
nacc_feats = ["CSFABETA", "CSFPTAU", "CSFTTAU", "AMYLPET"]
nacc_reduced = nacc[nacc_feats].copy()
nacc_reduced["AMYLPET"].replace(8, np.nan, inplace=True)
nacc_reduced = nacc[["NACCVNUM"]+nacc_feats]
nacc_reduced = nacc_reduced[nacc_reduced["NACCVNUM"]==1]
nacc_reduced["AMYLPET"].replace(8, np.nan, inplace=True)
nacc_reduced.dropna(how="all", subset=nacc_feats, inplace=True)

nacc_joined = n_labels.join(nacc_reduced)
nacc_joined.dropna(how="all", subset=nacc_feats, inplace=True)

In [None]:
# test for biomarker difference in ADNI
fast = adni[adni["Cluster"]==1][::]
slow = adni[adni["Cluster"]==2][::]

print("ADNI")
for feat in ["ABETA", "AV45", "TAU"]:
    print(f"{feat}: {mannwhitneyu(fast[feat].dropna(), slow[feat].dropna())}")
    # diff in means
    a = sms.DescrStatsW(slow[feat].dropna())
    b = sms.DescrStatsW(fast[feat].dropna())
    diff = a.mean - b.mean
    cm = sms.CompareMeans(b, a)
    print(f"Diff in means: {np.round(diff, 2)} {str(np.round(cm.tconfint_diff(), 2)).replace('  ', ', ')}")
    
# test for biomarker difference in ADNI
fast = nacc_joined[nacc_joined["Cluster"]==1][::]
slow = nacc_joined[nacc_joined["Cluster"]==2][::]
print("\nNACC")
for feat in ["CSFABETA", "CSFPTAU", "CSFTTAU"]:
    print(f"{feat}: {mannwhitneyu(fast[feat].dropna(), slow[feat].dropna())}")
    # diff in means
    a = sms.DescrStatsW(slow[feat].dropna())
    b = sms.DescrStatsW(fast[feat].dropna())
    diff = a.mean - b.mean
    cm = sms.CompareMeans(b, a)
    print(f"Diff in means: {np.round(diff, 2)} {str(np.round(cm.tconfint_diff(), 2)).replace('  ', ', ')}")

# categorical AMYLPET for NACC
cr_table = pd.crosstab(nacc_joined["Cluster"], nacc_joined["AMYLPET"])
print(f"AMYLPET: {chi2_contingency(cr_table)[1]}")

In [None]:
cr_table = pd.crosstab(nacc_joined["Cluster"], nacc_joined["AMYLPET"])