# Implementation: The Diagnostic Playbook (Medical Scenario)

We will visualize the decision boundaries of models trying to classify patient risk.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 1. Generate Complex Medical Data (Non-Linear)
# 2 features: Age vs Biomarker. 2 Classes: High Risk vs Low Risk
X, y = make_moons(n_samples=500, noise=0.3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

def plot_boundary(model, title):
    # Create grid to show decision boundary
    h = .02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.3)
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.coolwarm, edgecolors='k', alpha=0.6)
    plt.title(title)

plt.figure(figsize=(18, 5))

# Scenario A: Underfitting (Logistic Regression)
model_under = LogisticRegression()
model_under.fit(X_train, y_train)
acc_train = accuracy_score(y_train, model_under.predict(X_train))
acc_test = accuracy_score(y_test, model_under.predict(X_test))

plt.subplot(1, 3, 1)
plot_boundary(model_under, f"Underfitting (Linear)\nTrain: {acc_train:.2f}, Test: {acc_test:.2f}")

# Scenario B: Overfitting (Deep Decision Tree)
model_over = DecisionTreeClassifier(max_depth=None) # Unlimited depth
model_over.fit(X_train, y_train)
acc_train = accuracy_score(y_train, model_over.predict(X_train))
acc_test = accuracy_score(y_test, model_over.predict(X_test))

plt.subplot(1, 3, 2)
plot_boundary(model_over, f"Overfitting (Complex)\nTrain: {acc_train:.2f}, Test: {acc_test:.2f}")

# Scenario C: Success (Pruned Tree)
model_right = DecisionTreeClassifier(max_depth=4) # Constrained
model_right.fit(X_train, y_train)
acc_train = accuracy_score(y_train, model_right.predict(X_train))
acc_test = accuracy_score(y_test, model_right.predict(X_test))

plt.subplot(1, 3, 3)
plot_boundary(model_right, f"Good Fit (Balanced)\nTrain: {acc_train:.2f}, Test: {acc_test:.2f}")

plt.show()