# Make ADNI data

In [1]:
import pandas as pd

In [2]:
# Should we include another test (e.g., MMSE, FAQ or ADASQ4)?
# https://www.nice.org.uk/guidance/ta217/documents/alzheimers-disease-mild-to-moderate-donepezil-galantamine-rivastigmine-and-memantine-part-review-draft-scope2

In [3]:
adni = pd.read_csv('../data/adni_data.csv', low_memory=False)

In [4]:
# Filter out visits without a CDR-SB measurement.
assert adni['CDRSB.bl'].notnull().all()
has_cdrsb = adni.CDRSB.notnull()
adni_filtered =  adni[has_cdrsb]

# Filter out visits separated by more than 6 months from the previous one.
months_between_visits = adni_filtered.groupby('RID')['M'].diff().fillna(0)
is_regular_and_continuous = (months_between_visits <= 6.0).groupby(adni_filtered.RID).cummin()
adni_filtered = adni_filtered[is_regular_and_continuous]

# Filter out patients with less than 2 visits.
visit_counts = adni_filtered.groupby('RID').size()
patients_with_at_least_two_visits = visit_counts[visit_counts >= 2].index
adni_filtered = adni_filtered[adni_filtered.RID.isin(patients_with_at_least_two_visits)]

# Filter out visits before 2020-10-02 (ICLR 2021 submission deadline).
#is_before_iclr_deadline = pd.to_datetime(adni_filtered.EXAMDATE) <= '2020-10-02'
#adni_filtered = adni_filtered[is_before_iclr_deadline]

In [5]:
# In Pace et al. (2022), 1626 patients were included in the dataset...

adni_filtered.groupby('RID').ngroups

1605

In [6]:
# ... and the patients had a median of three visits.

adni_filtered.groupby('RID').size().value_counts()

3    1111
5     285
2     175
4      34
dtype: int64

In [7]:
# Let's have a look at the diagnosis distribution at baseline.
# - LMCI = late mild cognitive impairment
# - CN = cognitively normal
# - AD = Alzheimer's disease
# - EMCI = early mild cognitive impairment
# - SMC = subjective memory concerns

adni_filtered['DX.bl'].value_counts()

LMCI    2185
CN      1157
AD       876
EMCI     810
SMC      216
Name: DX.bl, dtype: int64

In [8]:
# The task consists of predicting whether an MRI scan was ordered.

adni_filtered['MRI_ordered'] = adni_filtered.Hippocampus.notnull().astype(int)

In [9]:
# From Pace et al. (2022): "Patient observations consist of CDR-SB 
# on a severity scale following O'Bryant et al (2008)..."

# From Hüyük et al. (2021): "The CDR-SB result is categorized as: 
# {'normal', 'questionable impairment', 'mild/severe dementia'}."

# From O'Bryant et al. (2008): "Optimal ranges of CDR-SOB scores corresponding
# to the global CDR scores were 0.5 to 4.0 for a global score of 0.5, 
# 4.5 to 9.0 for a global score of 1.0, 9.5 to 15.5 for a global score of 2.0,
# and 16.0 to 18.0 for a global score of 3.0."

# From https://en.wikipedia.org/wiki/Clinical_Dementia_Rating:
# global score of 0.5 = questionable impairment
# global score of 1.0 = mild impairment
# global score of 2.0 = moderate impairment
# global score of 3.0 = severe impairment

adni_filtered['CDRSB_cat'] = pd.cut(
    adni_filtered.CDRSB,
    bins=[0, 0.5, 4.5, 18.5],
    right=False,
    labels=["CDR-SB normal", "CDR-SB questionable", "CDR-SB severe"]
)

In [10]:
# From Pace et al. (2022): "... and the MRI outcome of the previous visit, 
# categorized into four possibilities (no MRI scan; below average, average and 
# above average hippocampal volume)."

hippocampus_mean = adni_filtered.Hippocampus.mean(skipna=True)
hippocampus_std = adni_filtered.Hippocampus.std(skipna=True)

def mri_outcome(volume):
    if pd.isnull(volume):
        return "No MRI"
    if volume < hippocampus_mean - 0.5 * hippocampus_std:
        return "Vh low"
    elif volume > hippocampus_mean + 0.5 * hippocampus_std:
        return "Vh high"
    else:
        return "Vh average"

adni_filtered["MRI_outcome"] = adni_filtered.Hippocampus.apply(mri_outcome)

adni_filtered["MRI_previous_outcome"] = \
    adni_filtered.groupby('RID').MRI_outcome.shift(1, fill_value="No MRI")

In [11]:
adni_filtered = adni_filtered[
    [
        'RID',
        'CDRSB_cat',
        'MRI_previous_outcome',
        'MRI_ordered',
        'AGE',
        'PTGENDER',
        'PTMARRY',
        'PTEDUCAT',
        'APOE4'
    ]
]
adni_filtered.head()

Unnamed: 0,RID,CDRSB_cat,MRI_previous_outcome,MRI_ordered,AGE,PTGENDER,PTMARRY,PTEDUCAT,APOE4
0,2,CDR-SB normal,No MRI,1,74.3,Male,Married,16,0.0
1,2,CDR-SB normal,Vh high,0,74.3,Male,Married,16,0.0
16,3,CDR-SB severe,No MRI,1,81.3,Male,Married,18,1.0
17,3,CDR-SB severe,Vh low,1,81.3,Male,Married,18,1.0
18,3,CDR-SB questionable,Vh low,1,81.3,Male,Married,18,1.0


In [12]:
assert adni_filtered.notnull().all().all()

In [13]:
adni_filtered.to_csv('../data/adni_Xgy.csv', index=False)