# Dementia Risk Prediction

## Imports & Setup

In [None]:
# Core libraries
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# ML utilities
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Evaluation
from sklearn.metrics import classification_report, roc_auc_score, RocCurveDisplay


## Load Dataset

In [None]:
df = pd.read_csv("../data/raw/nacc_hackathon.csv")
df.head()

df.shape
df.info()


## Target Variable

In [None]:
TARGET = "Dementia"
df[TARGET].value_counts()


## Select a Valid Feature Set

In [None]:
medical_features = [
    "MMSE",
    "CDR",
    "ADAS13",
    "Diagnosis",
    "ClinicalScore",
    "BrainVolume"
]

In [None]:
non_medical_features = [
    col for col in df.columns
    if col not in medical_features + [TARGET]
]


In [None]:
len(non_medical_features)
