# Classification Models


In this notebook we will be building classification models on preprocessed data.

In [None]:
import os
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import warnings

from CogniPredictAD.visualization import Visualizer
from CogniPredictAD.classification import ADNIClassifier

pd.set_option('display.max_rows', 116)
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_info_columns', 40) 

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings(
    "ignore",
    message=".*The `disp` and `iprint` options of the L-BFGS-B solver are deprecated.*",
    category=DeprecationWarning
)

train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

train

In [None]:
viz = Visualizer(train)
viz.count_plot(
    x="DX",
    figsize=(8, 5),
    palette="viridis",
    order = [0, 1, 2, 3],
    xlabel="Diagnosis",
    ylabel="Number of Patients",
    title="Distribution of Patients by Diagnosis (DX)"
)

We already see that: 
- CN:   717 -> 37.07%
- EMCI: 336 -> 17.37%
- LMCI: 548 -> 28.34%
- AD:   333 -> 17.22%

In [None]:
# Target column
y_train = train['DX']

# All other columns as features
X_train = train.drop(columns=['DX'])

## Dataset with `CDRSB`, `LDELTOTAL`, and `mPACCdigit` with Standard Classification

### No Sampling

In [None]:
classifier = ADNIClassifier(classifier="Standard1")
result_df_1, per_class_df_1 = classifier.fit_evaluate_store_models(X_train, y_train, output_dir="../results/all_models/1_NX_NS")

### Hybrid Sampling

... 

## Dataset with `CDRSB`, `LDELTOTAL`, and `mPACCdigit` with XAI Classification

### No Sampling

In [None]:
classifier = ADNIClassifier(classifier="XAI1")
result_df_3, per_class_df_3 = classifier.fit_evaluate_store_models(X_train, y_train, output_dir="../results/all_models/1_X_NS")

### Hybrid Sampling

... 

## Dataset without `CDRSB`, `LDELTOTAL`, and `mPACCdigit` with Standard Classification

In [None]:
train.drop(columns=['CDRSB', 'LDELTOTAL', 'mPACCdigit'], axis=1, inplace=True)

### No Sampling

In [None]:
classifier = ADNIClassifier(classifier="Standard2")
result_df_5, per_class_df_5 = classifier.fit_evaluate_store_models(X_train, y_train, output_dir="../results/all_models/2_NX_NS")

### Hybrid Sampling

... 

## Dataset without `CDRSB`, `LDELTOTAL`, and `mPACCdigit` with XAI Classification

In [None]:
# train.drop(columns=['CDRSB', 'LDELTOTAL', 'mPACCdigit'], axis=1, inplace=True)

### No Sampling

In [None]:
classifier = ADNIClassifier(classifier="XAI2")
result_df_7, per_class_df_7 = classifier.fit_evaluate_store_models(X_train, y_train, output_dir="../results/all_models/2_X_NS")

### Hybrid Sampling

... 