- 🧠 [Understanding Logistic Regression](#logistic-understanding)
  - 🔢 [Binary Classification Motivation](#binary-motivation)
  - 📈 [Sigmoid Function & Probability Output](#sigmoid-probability)
  - 🚧 [Decision Boundary Interpretation](#decision-boundary)
- 🛠️ [Training the Model](#logistic-training)
  - 💸 [Cost Function for Logistic Regression](#logistic-cost)
  - 🔁 [Gradient Descent for Logistic Regression](#logistic-gd)
  - ⚖️ [Feature Scaling](#feature-scaling)
- 📊 [Evaluation & Performance](#logistic-evaluation)
  - 📏 [Accuracy, Precision, Recall, F1](#metrics)
  - 🧮 [Confusion Matrix](#confusion-matrix)
  - 📉 [ROC Curve & AUC](#roc-auc)
  - 🔥 [Overfitting in Classification Models](#overfitting-classification)

---

### 🧠 <a id="logistic-understanding"></a>**1. Understanding Logistic Regression**

---

# <a id="binary-motivation"></a>🔢 Binary Classification Motivation  


In [11]:
# Simulating and visualizing Binary Classification Motivation interactively

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import ipywidgets as widgets
from IPython.display import display

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate a simple binary classification toy dataset."""
    np.random.seed(42)
    X = np.random.randn(n_samples, 2)
    y = (X[:, 0] + X[:, 1] > 0).astype(int)
    X += noise * np.random.randn(*X.shape)
    return X, y

def sigmoid(z):
    """Sigmoid activation function."""
    return 1 / (1 + np.exp(-z))

def plot_binary_classification(threshold=0.5, noise=0.2):
    """Plot binary classification toy dataset with live decision threshold."""
    X, y = generate_toy_dataset(noise=noise)
    # True weights (simulated θ vector)
    theta = np.array([1.0, 1.0])
    bias = 0.0

    logits = X @ theta + bias
    probs = sigmoid(logits)
    
    y_pred = (probs >= threshold).astype(int)

    plt.figure(figsize=(8,6))

    scatter = plt.scatter(X[:, 0], X[:, 1], c=probs, cmap="coolwarm", edgecolor='k', alpha=0.8)
    plt.colorbar(scatter, label="Predicted Probability (hθ(x))")

    for i in range(len(X)):
        plt.text(X[i,0], X[i,1]+0.1, f"{probs[i]:.2f}", fontsize=6, ha='center')

    # Decision Boundary: θᵀx + b = 0 (for thresholded version, manually adjusting)
    x1 = np.linspace(X[:,0].min()-1, X[:,0].max()+1, 100)
    if theta[1] != 0:
        adjusted_bias = np.log(threshold/(1-threshold))  # because sigmoid(θᵀx+b)=threshold
        x2 = -(theta[0]*x1 + adjusted_bias)/theta[1]
        plt.plot(x1, x2, 'k--', label=f"Decision Boundary at threshold={threshold:.2f}")
    else:
        plt.axvline(x=-bias/theta[0], linestyle='--', color='k', label="Decision Boundary")

    plt.title(f"Binary Classification Motivation\nNoise={noise}, Threshold={threshold}")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.legend()
    plt.grid(True)
    plt.show()

# 🕹️ Interactive Widget
widgets.interact(
    plot_binary_classification,
    threshold=widgets.FloatSlider(min=0.0, max=1.0, step=0.01, value=0.5),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=0.5, description='threshold', max=1.0, step=0.01), FloatSlider(value=0…

<function __main__.plot_binary_classification(threshold=0.5, noise=0.2)>

# <a id="sigmoid-probability"></a>📈 Sigmoid Function & Probability Output  


In [12]:
# Simulating and visualizing Sigmoid Function & Probability Output interactively

import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

def sigmoid(z, temperature=1.0):
    """Sigmoid function with temperature scaling."""
    return 1 / (1 + np.exp(-z/temperature))

def plot_sigmoid_function(temperature=1.0, shift=0.0):
    """Plot sigmoid function with adjustable temperature and shift."""
    z = np.linspace(-10, 10, 1000)
    sigmoid_values = sigmoid(z - shift, temperature)

    plt.figure(figsize=(8,6))
    plt.plot(z, sigmoid_values, color="purple", lw=2)
    plt.title(f"Sigmoid Function\nTemperature={temperature:.2f}, Shift={shift:.2f}")
    plt.xlabel("Input Logit (z)")
    plt.ylabel("Sigmoid Output σ(z)")
    plt.grid(True)
    
    # Key points
    plt.axhline(0.5, linestyle='--', color='gray')
    plt.axvline(shift, linestyle='--', color='gray', label=f"Center Shift: {shift:.2f}")
    plt.legend()
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_sigmoid_function,
    temperature=widgets.FloatSlider(min=0.1, max=5.0, step=0.1, value=1.0),
    shift=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=0.0)
)


interactive(children=(FloatSlider(value=1.0, description='temperature', max=5.0, min=0.1), FloatSlider(value=0…

<function __main__.plot_sigmoid_function(temperature=1.0, shift=0.0)>

# <a id="decision-boundary"></a>🚧 Decision Boundary Interpretation  


In [13]:
# Simulating and visualizing Decision Boundary Interpretation interactively

import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate simple 2D binary classification toy data."""
    np.random.seed(42)
    X = np.random.randn(n_samples, 2)
    y = (X[:, 0] + X[:, 1] > 0).astype(int)
    X += noise * np.random.randn(*X.shape)
    return X, y

def plot_decision_boundary(theta0=1.0, theta1=1.0, bias=0.0, noise=0.2):
    """Plot the binary classification dataset and live decision boundary."""
    X, y = generate_toy_dataset(noise=noise)
    
    plt.figure(figsize=(8,6))
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap="bwr", edgecolor='k', alpha=0.7)

    # Plot decision boundary: θ₀x₀ + θ₁x₁ + bias = 0
    x1_range = np.linspace(X[:, 0].min()-1, X[:, 0].max()+1, 100)
    
    if theta1 != 0:
        x2_boundary = -(theta0 * x1_range + bias) / theta1
        plt.plot(x1_range, x2_boundary, 'k--', label=f"Boundary: θ₀={theta0:.2f}, θ₁={theta1:.2f}, bias={bias:.2f}")
    else:
        plt.axvline(x=-bias/theta0, linestyle='--', color='k', label=f"Vertical Boundary (θ₁=0)")

    plt.title("Decision Boundary Interpretation (Live)")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.legend()
    plt.grid(True)
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_decision_boundary,
    theta0=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=1.0),
    theta1=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=1.0),
    bias=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=0.0),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=1.0, description='theta0', max=5.0, min=-5.0), FloatSlider(value=1.0, …

<function __main__.plot_decision_boundary(theta0=1.0, theta1=1.0, bias=0.0, noise=0.2)>

---

# 🛠️ <a id="logistic-training"></a>**2. Training the Model**
 
---

# <a id="logistic-cost"></a>💸 Cost Function for Logistic Regression  


In [14]:
# Simulating and visualizing Cost Function for Logistic Regression interactively

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import ipywidgets as widgets
from IPython.display import display

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate simple 2D binary classification toy data."""
    np.random.seed(42)
    X = np.random.randn(n_samples, 2)
    y = (X[:, 0] + X[:, 1] > 0).astype(int)
    X += noise * np.random.randn(*X.shape)
    return X, y

def sigmoid(z):
    """Sigmoid activation function."""
    return 1 / (1 + np.exp(-z))

def compute_loss(X, y, theta0, theta1, bias):
    """Compute cross-entropy loss."""
    logits = X[:,0] * theta0 + X[:,1] * theta1 + bias
    probs = sigmoid(logits)
    epsilon = 1e-8
    loss = -np.mean(y * np.log(probs + epsilon) + (1 - y) * np.log(1 - probs + epsilon))
    return loss

def plot_loss_surface(theta0_val=1.0, theta1_val=1.0, bias=0.0, noise=0.2):
    """Plot the loss surface for logistic regression."""
    X, y = generate_toy_dataset(noise=noise)
    
    theta0_range = np.linspace(-5, 5, 50)
    theta1_range = np.linspace(-5, 5, 50)
    Theta0, Theta1 = np.meshgrid(theta0_range, theta1_range)
    
    Loss = np.zeros_like(Theta0)
    for i in range(Theta0.shape[0]):
        for j in range(Theta0.shape[1]):
            Loss[i,j] = compute_loss(X, y, Theta0[i,j], Theta1[i,j], bias)

    fig = plt.figure(figsize=(14,6))

    # Subplot 1: 3D Surface
    ax = fig.add_subplot(1,2,1, projection='3d')
    surf = ax.plot_surface(Theta0, Theta1, Loss, cmap=cm.viridis, alpha=0.8)
    ax.scatter(theta0_val, theta1_val, compute_loss(X, y, theta0_val, theta1_val, bias), color='red', s=50)
    ax.set_xlabel('θ₀')
    ax.set_ylabel('θ₁')
    ax.set_zlabel('Loss')
    ax.set_title('Loss Surface (3D View)')
    fig.colorbar(surf, ax=ax, shrink=0.5, aspect=5)

    # Subplot 2: Contour Plot
    ax2 = fig.add_subplot(1,2,2)
    contour = ax2.contourf(Theta0, Theta1, Loss, 50, cmap=cm.viridis)
    ax2.scatter(theta0_val, theta1_val, color='red', s=50, label="Current θ")
    ax2.set_xlabel('θ₀')
    ax2.set_ylabel('θ₁')
    ax2.set_title('Loss Surface (Contour View)')
    ax2.legend()
    fig.colorbar(contour, ax=ax2)

    plt.tight_layout()
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_loss_surface,
    theta0_val=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=1.0),
    theta1_val=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=1.0),
    bias=widgets.FloatSlider(min=-5.0, max=5.0, step=0.1, value=0.0),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=1.0, description='theta0_val', max=5.0, min=-5.0), FloatSlider(value=1…

<function __main__.plot_loss_surface(theta0_val=1.0, theta1_val=1.0, bias=0.0, noise=0.2)>

# <a id="logistic-gd"></a>🔁 Gradient Descent for Logistic Regression  


In [15]:
# Simulating and visualizing Gradient Descent for Logistic Regression (Fixed Version)

import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate a simple 2D binary classification toy dataset."""
    np.random.seed(42)
    X = np.random.randn(n_samples, 2)
    y = (X[:, 0] + X[:, 1] > 0).astype(int)
    X += noise * np.random.randn(*X.shape)
    return X, y

def sigmoid(z):
    """Sigmoid activation function."""
    return 1 / (1 + np.exp(-z))

def compute_loss(X_aug, y, theta):
    """Compute cross-entropy loss."""
    logits = X_aug @ theta
    probs = sigmoid(logits)
    epsilon = 1e-8
    loss = -np.mean(y * np.log(probs + epsilon) + (1 - y) * np.log(1 - probs + epsilon))
    return loss

def gradient_descent_with_visualization(learning_rate=0.1, steps=50, noise=0.2):
    """Perform gradient descent and plot live updates per step."""
    X, y = generate_toy_dataset(noise=noise)
    m, n = X.shape
    X_aug = np.hstack((X, np.ones((m, 1))))  # Add bias
    theta = np.zeros(n + 1)
    
    loss_history = []
    
    x1_range = np.linspace(X[:, 0].min()-1, X[:, 0].max()+1, 100)

    for step in range(steps):
        # Forward pass
        logits = X_aug @ theta
        probs = sigmoid(logits)
        
        # Compute gradient and update parameters
        gradient = (1/m) * (X_aug.T @ (probs - y))
        theta -= learning_rate * gradient
        
        # Record loss
        loss = compute_loss(X_aug, y, theta)
        loss_history.append(loss)
        
        # Plot every few steps (or all steps if few)
        if step % (steps // 10 + 1) == 0 or step == steps-1:
            clear_output(wait=True)
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,6))

            # Decision Boundary
            ax1.scatter(X[:, 0], X[:, 1], c=y, cmap="bwr", edgecolor='k', alpha=0.7)
            if theta[1] != 0:
                x2_boundary = -(theta[0]*x1_range + theta[2])/theta[1]
                ax1.plot(x1_range, x2_boundary, 'k--')
            else:
                ax1.axvline(x=-theta[2]/theta[0], linestyle='--', color='k')

            ax1.set_title(f"Step {step+1}/{steps}\nDecision Boundary Movement")
            ax1.set_xlabel("Feature 1")
            ax1.set_ylabel("Feature 2")
            ax1.grid(True)

            # Loss Curve
            ax2.plot(np.arange(1, len(loss_history)+1), loss_history, marker='o')
            ax2.set_title("Loss Over Training Steps")
            ax2.set_xlabel("Step")
            ax2.set_ylabel("Loss")
            ax2.grid(True)
            ax2.set_xlim(0, steps)
            ax2.set_ylim(0, max(loss_history)+0.1)

            plt.tight_layout()
            plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    gradient_descent_with_visualization,
    learning_rate=widgets.FloatSlider(min=0.001, max=1.0, step=0.01, value=0.1),
    steps=widgets.IntSlider(min=10, max=200, step=10, value=50),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=0.1, description='learning_rate', max=1.0, min=0.001, step=0.01), IntS…

<function __main__.gradient_descent_with_visualization(learning_rate=0.1, steps=50, noise=0.2)>

# <a id="feature-scaling"></a>⚖️ Feature Scaling  


In [None]:
# Simulating and visualizing Feature Scaling effects interactively

import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

def generate_toy_dataset(n_samples=100, feature_scale=20, noise=0.2):
    """Generate 2D dataset with very different feature scales."""
    np.random.seed(42)
    X1 = np.random.randn(n_samples) * feature_scale  # Feature 1 stretched
    X2 = np.random.randn(n_samples)  # Feature 2 normal
    X = np.vstack((X1, X2)).T
    y = (X1 + X2 > 0).astype(int)
    X += noise * np.random.randn(*X.shape)
    return X, y

def standardize_features(X):
    """Standardize features (zero mean, unit variance)."""
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0) + 1e-8  # Prevent divide by zero
    return (X - mu) / sigma

def sigmoid(z):
    """Sigmoid activation."""
    return 1 / (1 + np.exp(-z))

def compute_loss(X_aug, y, theta):
    """Compute cross-entropy loss."""
    logits = X_aug @ theta
    probs = sigmoid(logits)
    epsilon = 1e-8
    loss = -np.mean(y * np.log(probs + epsilon) + (1 - y) * np.log(1 - probs + epsilon))
    return loss

def gradient_descent(X, y, learning_rate=0.1, steps=100):
    """Simple gradient descent optimizer."""
    m, n = X.shape
    X_aug = np.hstack((X, np.ones((m,1))))  # Add bias
    theta = np.zeros(n+1)
    
    loss_history = []
    for _ in range(steps):
        logits = X_aug @ theta
        probs = sigmoid(logits)
        gradient = (1/m) * (X_aug.T @ (probs - y))
        theta -= learning_rate * gradient
        loss_history.append(compute_loss(X_aug, y, theta))
    return loss_history

def plot_scaling_effects(feature_scale=20, noise=0.2, learning_rate=0.1, steps=100):
    """Compare convergence with and without feature scaling."""
    X, y = generate_toy_dataset(feature_scale=feature_scale, noise=noise)
    X_scaled = standardize_features(X)
    
    loss_raw = gradient_descent(X, y, learning_rate, steps)
    loss_scaled = gradient_descent(X_scaled, y, learning_rate, steps)
    
    plt.figure(figsize=(8,6))
    plt.plot(loss_raw, label="Without Scaling", marker='o')
    plt.plot(loss_scaled, label="With Scaling", marker='x')
    plt.title("Gradient Descent Convergence: Scaling vs No Scaling")
    plt.xlabel("Step")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_scaling_effects,
    feature_scale=widgets.FloatSlider(min=1.0, max=50.0, step=1.0, value=20.0),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2),
    learning_rate=widgets.FloatSlider(min=0.001, max=1.0, step=0.01, value=0.1),
    steps=widgets.IntSlider(min=10, max=300, step=10, value=100)
)


interactive(children=(FloatSlider(value=20.0, description='feature_scale', max=50.0, min=1.0, step=1.0), Float…

<function __main__.plot_scaling_effects(feature_scale=20, noise=0.2, learning_rate=0.1, steps=100)>

---

# 📊 <a id="logistic-evaluation"></a>**3. Evaluation & Performance**

---

# <a id="metrics"></a>📏 Accuracy, Precision, Recall, F1  


In [17]:
# Simulating and visualizing Accuracy, Precision, Recall, F1 interactively

import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate a simple binary classification toy dataset."""
    np.random.seed(42)
    X1 = np.random.randn(n_samples)
    X2 = np.random.randn(n_samples)
    y = (X1 + X2 > 0).astype(int)
    scores = X1 + X2 + noise * np.random.randn(n_samples)
    return scores, y

def compute_metrics(y_true, y_pred):
    """Compute accuracy, precision, recall, F1 manually."""
    TP = np.sum((y_pred == 1) & (y_true == 1))
    TN = np.sum((y_pred == 0) & (y_true == 0))
    FP = np.sum((y_pred == 1) & (y_true == 0))
    FN = np.sum((y_pred == 0) & (y_true == 1))
    
    accuracy = (TP + TN) / (TP + TN + FP + FN + 1e-8)
    precision = TP / (TP + FP + 1e-8)
    recall = TP / (TP + FN + 1e-8)
    f1 = 2 * precision * recall / (precision + recall + 1e-8)
    
    return accuracy, precision, recall, f1

def animate_metrics(noise=0.2):
    """Animate threshold moving and metrics changing."""
    scores, y_true = generate_toy_dataset(noise=noise)
    probs = 1 / (1 + np.exp(-scores))

    thresholds = np.linspace(0, 1, 100)
    accuracies, precisions, recalls, f1s = [], [], [], []

    for threshold in thresholds:
        y_pred = (probs >= threshold).astype(int)
        accuracy, precision, recall, f1 = compute_metrics(y_true, y_pred)
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        
    # Plotting
    plt.figure(figsize=(10,6))
    plt.plot(thresholds, accuracies, label="Accuracy", marker='o')
    plt.plot(thresholds, precisions, label="Precision", marker='x')
    plt.plot(thresholds, recalls, label="Recall", marker='s')
    plt.plot(thresholds, f1s, label="F1 Score", marker='^')
    
    plt.title(f"Metrics vs Threshold\nNoise={noise}")
    plt.xlabel("Threshold")
    plt.ylabel("Score")
    plt.ylim(0, 1.05)
    plt.grid(True)
    plt.legend()
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    animate_metrics,
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=0.2, description='noise', max=1.0, step=0.05), Output()), _dom_classes…

<function __main__.animate_metrics(noise=0.2)>

# <a id="confusion-matrix"></a>🧮 Confusion Matrix  


In [18]:
# Simulating and visualizing Confusion Matrix interactively

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate simple binary classification toy dataset."""
    np.random.seed(42)
    X1 = np.random.randn(n_samples)
    X2 = np.random.randn(n_samples)
    y = (X1 + X2 > 0).astype(int)
    scores = X1 + X2 + noise * np.random.randn(n_samples)
    return scores, y

def compute_confusion_matrix(y_true, y_pred):
    """Compute TP, TN, FP, FN manually."""
    TP = np.sum((y_pred == 1) & (y_true == 1))
    TN = np.sum((y_pred == 0) & (y_true == 0))
    FP = np.sum((y_pred == 1) & (y_true == 0))
    FN = np.sum((y_pred == 0) & (y_true == 1))
    return np.array([[TP, FN], [FP, TN]])

def plot_confusion(threshold=0.5, noise=0.2):
    """Plot confusion matrix based on threshold."""
    scores, y_true = generate_toy_dataset(noise=noise)
    probs = 1 / (1 + np.exp(-scores))
    y_pred = (probs >= threshold).astype(int)

    cm = compute_confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", cbar=False,
                xticklabels=["Actual 1", "Actual 0"],
                yticklabels=["Predicted 1", "Predicted 0"])
    plt.title(f"Confusion Matrix\nThreshold={threshold:.2f}, Noise={noise}")
    plt.xlabel("Actual Label")
    plt.ylabel("Predicted Label")
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_confusion,
    threshold=widgets.FloatSlider(min=0.0, max=1.0, step=0.01, value=0.5),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=0.5, description='threshold', max=1.0, step=0.01), FloatSlider(value=0…

<function __main__.plot_confusion(threshold=0.5, noise=0.2)>

# <a id="roc-auc"></a>📉 ROC Curve & AUC  




In [19]:
# Simulating and visualizing ROC Curve & AUC interactively

import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from sklearn.metrics import roc_curve, auc
from IPython.display import display

def generate_toy_dataset(n_samples=100, noise=0.2):
    """Generate simple binary classification toy data."""
    np.random.seed(42)
    X1 = np.random.randn(n_samples)
    X2 = np.random.randn(n_samples)
    y = (X1 + X2 > 0).astype(int)
    scores = X1 + X2 + noise * np.random.randn(n_samples)
    return scores, y

def plot_roc_curve(noise=0.2):
    """Plot ROC curve and calculate AUC."""
    scores, y_true = generate_toy_dataset(noise=noise)
    probs = 1 / (1 + np.exp(-scores))

    # Compute ROC curve
    fpr, tpr, thresholds = roc_curve(y_true, probs)
    roc_auc = auc(fpr, tpr)

    # Plot ROC
    plt.figure(figsize=(8,6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--', lw=2, label="Random Classifier")
    plt.xlabel('False Positive Rate (FPR)')
    plt.ylabel('True Positive Rate (TPR)')
    plt.title(f"ROC Curve\nNoise={noise}")
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_roc_curve,
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2)
)


interactive(children=(FloatSlider(value=0.2, description='noise', max=1.0, step=0.05), Output()), _dom_classes…

<function __main__.plot_roc_curve(noise=0.2)>

# <a id="overfitting-classification"></a>🔥 Overfitting in Classification Models


In [20]:
# Simulating and visualizing Overfitting in Classification Models interactively

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
import ipywidgets as widgets
from IPython.display import display

def generate_toy_dataset(n_samples=300, noise=0.2, complex=False):
    """Generate a simple or complex binary classification dataset."""
    np.random.seed(42)
    X = np.random.randn(n_samples, 2)
    if complex:
        y = (np.sin(1.5*X[:,0]) + np.cos(1.5*X[:,1]) > 0).astype(int)
    else:
        y = (X[:, 0] + X[:, 1] > 0).astype(int)
    X += noise * np.random.randn(*X.shape)
    return X, y

def plot_overfitting(degree=1, noise=0.2, complex_data=False):
    """Plot decision boundary and train vs validation loss."""
    X, y = generate_toy_dataset(noise=noise, complex=complex_data)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

    # Create polynomial logistic regression model
    model = make_pipeline(
        PolynomialFeatures(degree=degree),
        LogisticRegression(max_iter=1000)
    )
    model.fit(X_train, y_train)
    
    train_accuracy = model.score(X_train, y_train)
    val_accuracy = model.score(X_val, y_val)
    
    x_min, x_max = X[:,0].min() - 1, X[:,0].max() + 1
    y_min, y_max = X[:,1].min() - 1, X[:,1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                         np.linspace(y_min, y_max, 300))
    grid = np.c_[xx.ravel(), yy.ravel()]
    
    probs = model.predict_proba(grid)[:,1].reshape(xx.shape)

    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(14,6))
    
    # Decision Boundary Plot
    ax1.contourf(xx, yy, probs, 25, cmap="bwr", alpha=0.6)
    ax1.scatter(X_train[:,0], X_train[:,1], c=y_train, cmap="bwr", edgecolor='k', label='Train')
    ax1.scatter(X_val[:,0], X_val[:,1], c=y_val, cmap="bwr", marker='x', label='Validation')
    ax1.set_title(f"Decision Boundary\nDegree={degree}, Complex={complex_data}, Noise={noise}")
    ax1.legend()
    ax1.grid(True)

    # Train vs Validation Accuracy
    ax2.bar(["Train Accuracy", "Validation Accuracy"], [train_accuracy, val_accuracy], color=["green", "blue"])
    ax2.set_ylim(0,1)
    ax2.set_title("Train vs Validation Accuracy")
    for i, v in enumerate([train_accuracy, val_accuracy]):
        ax2.text(i, v+0.02, f"{v:.2f}", ha='center', fontsize=12)

    plt.tight_layout()
    plt.show()

# 🕹️ Interactive Widgets
widgets.interact(
    plot_overfitting,
    degree=widgets.IntSlider(min=1, max=20, step=1, value=1),
    noise=widgets.FloatSlider(min=0.0, max=1.0, step=0.05, value=0.2),
    complex_data=widgets.Checkbox(value=False, description="Complex Data?")
)


interactive(children=(IntSlider(value=1, description='degree', max=20, min=1), FloatSlider(value=0.2, descript…

<function __main__.plot_overfitting(degree=1, noise=0.2, complex_data=False)>