# Multiclass Classification - Diabetes Stages

This notebook implements multiclass classification for diabetes stages with confidence estimation.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.calibration import CalibratedClassifierCV
import warnings
warnings.filterwarnings("ignore")
print("Libraries loaded!")

## Load Data

In [None]:
df = pd.read_csv("../data/processed/diabetes_multiclass.csv")
print(f"Dataset shape: {df.shape}")
print(f"
Stage distribution:
{df["Stage"].value_counts().sort_index()}")
df.head()

## Data Preparation

In [None]:
X = df.drop(["Outcome", "Stage"], axis=1)
y = df["Stage"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

## Model Training

In [None]:
models = {
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=1000, multi_class="multinomial"),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel="rbf", probability=True, random_state=42)
}

results = {}
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_proba = model.predict_proba(X_test_scaled)
    
    results[name] = {
        "model": model,
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average="weighted"),
        "recall": recall_score(y_test, y_pred, average="weighted"),
        "f1": f1_score(y_test, y_pred, average="weighted"),
        "y_pred": y_pred,
        "y_proba": y_proba
    }

print("Training complete!")

In [None]:
perf_df = pd.DataFrame({name: {k: v for k, v in data.items() if k not in ["model", "y_pred", "y_proba"]} for name, data in results.items()}).T
print("
Model Performance:")
print(perf_df.round(4))

## Confusion Matrices

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
stage_labels = ["Normal", "Pre-diabetes", "T2D Moderate", "T2D Severe"]

for idx, (name, data) in enumerate(results.items()):
    ax = axes[idx // 2, idx % 2]
    cm = confusion_matrix(y_test, data["y_pred"])
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax, xticklabels=stage_labels, yticklabels=stage_labels)
    ax.set_title(f"{name}
Confusion Matrix")
    ax.set_ylabel("True Label")
    ax.set_xlabel("Predicted Label")
    
plt.tight_layout()
plt.show()

## Confidence Estimation

In [None]:
best_model_name = max(results, key=lambda x: results[x]["accuracy"])
best_model = results[best_model_name]["model"]
print(f"Best model: {best_model_name}")

proba = best_model.predict_proba(X_test_scaled)
confidence = np.max(proba, axis=1)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.hist(confidence, bins=30, edgecolor="black")
plt.xlabel("Confidence")
plt.ylabel("Frequency")
plt.title(f"{best_model_name}
Prediction Confidence")

plt.subplot(1, 2, 2)
for stage in range(4):
    mask = y_test == stage
    plt.scatter(np.where(mask)[0], confidence[mask], label=stage_labels[stage], alpha=0.6)
plt.xlabel("Sample Index")
plt.ylabel("Confidence")
plt.title("Confidence by True Stage")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Average confidence per class
avg_conf_per_class = []
for stage in range(4):
    mask = y_test == stage
    avg_conf_per_class.append(confidence[mask].mean())

plt.figure(figsize=(10, 6))
plt.bar(stage_labels, avg_conf_per_class, color=["#2ecc71", "#f39c12", "#e67e22", "#e74c3c"])
plt.ylabel("Average Confidence")
plt.title(f"{best_model_name}
Average Confidence by Stage")
plt.ylim([0, 1])
for i, v in enumerate(avg_conf_per_class):
    plt.text(i, v + 0.02, f"{v:.3f}", ha="center")
plt.show()

## Calibrated Predictions

In [None]:
calibrated_model = CalibratedClassifierCV(best_model, method="sigmoid", cv=5)
calibrated_model.fit(X_train_scaled, y_train)

y_proba_cal = calibrated_model.predict_proba(X_test_scaled)
confidence_cal = np.max(y_proba_cal, axis=1)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(confidence, bins=30, alpha=0.7, label="Before Calibration", edgecolor="black")
axes[0].set_xlabel("Confidence")
axes[0].set_ylabel("Frequency")
axes[0].set_title("Before Calibration")
axes[0].legend()

axes[1].hist(confidence_cal, bins=30, alpha=0.7, label="After Calibration", color="green", edgecolor="black")
axes[1].set_xlabel("Confidence")
axes[1].set_ylabel("Frequency")
axes[1].set_title("After Calibration")
axes[1].legend()

plt.tight_layout()
plt.show()

## Summary

In this notebook, we:
1. Trained multiple multiclass classification models
2. Evaluated performance across all diabetes stages
3. Implemented confidence estimation techniques
4. Applied calibration for improved confidence estimates

The models can now predict diabetes stages with associated confidence levels.