# Artificial Vision & Feature Separability — 04 · MNIST Logistic Regression Baseline

**Goal.** Establish a strong linear baseline on MNIST digits with **multinomial logistic regression**, including **regularization tuning**, **calibration**, and **error analysis**.  
**Outputs.** Confusion matrix, log-loss, ROC-AUC (OvR), calibration plot, and misclassified examples.  
**Data.** Loaded via `torchvision.datasets.MNIST` (offline-capable if pre-downloaded under `./data`).

In [None]:
# --- Reproducibility & Environment (with SSL fix) ---
import os, random, numpy as np, certifi

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

os.makedirs("results", exist_ok=True)
os.makedirs("data", exist_ok=True)

# SSL fix so dataset downloads don't fail on some systems
os.environ["SSL_CERT_FILE"] = certifi.where()
print("SSL_CERT_FILE set to:", os.environ["SSL_CERT_FILE"])

print("Seed set to", SEED)

In [None]:
# --- Imports ---
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import torch
import torchvision
from torchvision import transforms

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, classification_report, accuracy_score, log_loss, roc_auc_score
)
from sklearn.calibration import calibration_curve

## 1. Data — MNIST (28×28 grayscale)
We load the standard MNIST training and test splits. We flatten to 784-d vectors for linear models and scale features.

In [None]:
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root="./data", train=True, download=True, transform=transform)
testset  = torchvision.datasets.MNIST(root="./data", train=False, download=True, transform=transform)

X_train = np.stack([trainset[i][0].numpy().reshape(-1) for i in range(len(trainset))])
y_train = np.array([int(trainset[i][1]) for i in range(len(trainset))])

X_test = np.stack([testset[i][0].numpy().reshape(-1) for i in range(len(testset))])
y_test = np.array([int(testset[i][1]) for i in range(len(testset))])

print("Train:", X_train.shape, " Test:", X_test.shape)

## 2. Standardization

In [None]:
scaler = StandardScaler(with_mean=True, with_std=True)
Xz_train = scaler.fit_transform(X_train)
Xz_test  = scaler.transform(X_test)

## 3. Logistic Regression — Regularization Sweep (CV by Log-Loss)

In [None]:
param_grid = {"C": np.logspace(-3, 2, 8), "solver": ["lbfgs"], "multi_class": ["multinomial"], "max_iter": [200]}
base = LogisticRegression(random_state=SEED)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

gs = GridSearchCV(base, param_grid, scoring="neg_log_loss", cv=cv, n_jobs=-1, refit=True, verbose=0)
gs.fit(Xz_train, y_train)

best_lr = gs.best_estimator_
print("Best params:", gs.best_params_)
print("Best CV log-loss:", -gs.best_score_)

pred_test = best_lr.predict(Xz_test)
proba_test = best_lr.predict_proba(Xz_test)

acc = accuracy_score(y_test, pred_test)
ll  = log_loss(y_test, proba_test, labels=list(range(10)))
cm  = confusion_matrix(y_test, pred_test, labels=list(range(10)))
print(f"Test Accuracy: {acc:.4f} | Test Log-loss: {ll:.4f}")
print(classification_report(y_test, pred_test))

In [None]:
plt.figure()
plt.imshow(cm, aspect="auto")
plt.title("MNIST — Confusion Matrix (Logistic)")
plt.xlabel("Pred"); plt.ylabel("True")
plt.xticks(range(10)); plt.yticks(range(10))
plt.colorbar(); plt.tight_layout(); plt.savefig("results/04_confusion_logreg_mnist.png", dpi=150); plt.show()

## 4. ROC-AUC (One-vs-Rest macro)

In [None]:
auc = roc_auc_score(y_test, proba_test, multi_class="ovr", labels=list(range(10)))
print(f"ROC-AUC (OvR, macro): {auc:.4f}")

## 5. Calibration Curve

In [None]:
cls = 0
y_true_bin = (y_test == cls).astype(int)
p_cls = proba_test[:, cls]
frac_pos, mean_pred = calibration_curve(y_true_bin, p_cls, n_bins=15, strategy="uniform")

plt.figure()
plt.plot(mean_pred, frac_pos, marker="o")
plt.plot([0,1],[0,1], "--", alpha=0.5)
plt.xlabel("Mean predicted probability")
plt.ylabel("Fraction of positives")
plt.title(f"Calibration — class {cls}")
plt.tight_layout(); plt.savefig("results/04_calibration_mnist_cls0.png", dpi=150); plt.show()

## 6. Error Analysis — Most Confused Pairs & Examples

In [None]:
cm_off = cm.copy()
np.fill_diagonal(cm_off, 0)
i, j = np.unravel_index(np.argmax(cm_off), cm_off.shape)
print(f"Most confused pair: true={i}, pred={j}, count={cm_off[i,j]}")

mis_idx = [k for k in range(len(y_test)) if y_test[k]==i and pred_test[k]==j][:32]

import math
cols = 8
rows = math.ceil(len(mis_idx)/cols) if mis_idx else 1
plt.figure(figsize=(cols*1.2, rows*1.2))
for idx, k in enumerate(mis_idx):
    plt.subplot(rows, cols, idx+1)
    plt.imshow(X_test[k].reshape(28,28), cmap="gray")
    plt.axis("off")
    plt.title(f"{i}→{j}", fontsize=8)
plt.suptitle(f"Misclassified {i}→{j}")
plt.tight_layout(); plt.savefig("results/04_misclassified_pair_grid.png", dpi=150); plt.show()

## 7. Takeaways

- Multinomial logistic regression provides a **strong linear baseline** on MNIST with proper scaling and regularization.
- **Log-loss CV** offers a robust tuning criterion beyond raw accuracy.
- Calibration and confusion analysis give insight into **where** linear models struggle — context for CNN gains later.