In [None]:
# 3_final_implementation.ipynb (Automated Best Model)

# Import necessary libraries
import pandas as pd
import json
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Import all possible model classes
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier
from xgboost import XGBClassifier

# Load the processed data
try:
    df = pd.read_csv('processed_data.csv')
except FileNotFoundError:
    print("Error: 'processed_data.csv' not found. Please run Notebook 1 first.")
    df = pd.DataFrame()

# --- Prepare Data ---
df['Group'] = (df['CDR'] > 0).astype(int)
df.drop('CDR', axis=1, inplace=True)
X = df.drop(['Group', 'Subject ID'], axis=1)
y = df['Group']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# --- Load the Best Model's Configuration ---
try:
    with open('results/best_model_config.json', 'r') as f:
        config = json.load(f)
except FileNotFoundError:
    print("Error: 'best_model_config.json' not found. Please run Notebook 2 first.")
    # Exit or handle error appropriately
    exit()

model_name = config['Model']
params = config['Best Parameters']

print(f"--- Final Implementation using the Best Model: {model_name} ---")
print(f"Using parameters: {params}")

# --- Dynamically Build and Train the Final Model ---
# Map model name to class
model_map = {
    'SVM': SVC(probability=True, random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'AdaBoost': AdaBoostClassifier(random_state=42),
    'XGBoost': XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
    'Bagging': BaggingClassifier(random_state=42)
}

# Create the final pipeline
final_model = model_map[model_name]
final_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', final_model)
])

# Set the best parameters found during tuning
# The parameter names from GridSearchCV include the step name (e.g., 'svc__C')
# We need to format them for the set_params method
formatted_params = {f"model__{key.split('__')[1]}": val for key, val in params.items()}
final_pipeline.set_params(**formatted_params)

print("\nTraining the final model on the full training data...")
final_pipeline.fit(X_train, y_train)
print("Training complete.")

# --- Save the Final Trained Model ---
joblib.dump(final_pipeline, f'final_model_{model_name.replace(" ", "_")}.pkl')
print(f"\nFinal model saved to 'final_model_{model_name.replace(' ', '_')}.pkl'")

# --- Final Evaluation ---
print("\nEvaluating the model on the unseen test set...")
y_pred = final_pipeline.predict(X_test)
final_accuracy = accuracy_score(y_test, y_pred)

print(f"\nFinal Model Accuracy on Test Set: {final_accuracy:.4f}")
print("\nFinal Classification Report:")
print(classification_report(y_test, y_pred))

# --- Generate and Save Confusion Matrix ---
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Demented', 'Demented'], yticklabels=['Non-Demented', 'Demented'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title(f'Confusion Matrix for {model_name}')
plt.savefig(f'final_results/confusion_matrix_{model_name.replace(" ", "_")}.png')
plt.show()

print(f"\nConfusion matrix saved to 'final_results/confusion_matrix_{model_name.replace(' ', '_')}.png'")
print("\nFinal implementation and evaluation complete.")