# Artificial Vision & Feature Separability — 02 · Colors (Advanced Baselines & Log-Loss)

**Goal.** Strengthen baselines for color categorization with **log-loss**, **regularization**, **class imbalance handling**, and **calibration**.  
**Models.** Multinomial Logistic Regression (C & penalty sweeps), Prototype, k-NN, and GCM.  
**Evaluation.** Accuracy, **log-loss**, confusion matrices, **calibration curves**, and **ROC-AUC (OvR)**.

In [None]:
# --- Reproducibility & Environment ---
import os, random
import numpy as np

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

os.makedirs("results", exist_ok=True)
os.makedirs("data", exist_ok=True)

print("Seed set to", SEED)

In [None]:
# --- Imports ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (
    confusion_matrix, classification_report, accuracy_score, log_loss,
    roc_auc_score, RocCurveDisplay
)
from sklearn.calibration import calibration_curve
from sklearn.decomposition import PCA

## 1. Data
Load from `data/colors.csv` (columns like `R,G,B,label` or `L,a,b,label`) **or** generate an imbalanced toy RGB set to test robustness.

In [None]:
# Option A: CSV loader
csv_path = Path("data/colors.csv")
if csv_path.exists():
    df = pd.read_csv(csv_path)
    feat_cols = [c for c in df.columns if c.lower() in ["r","g","b","l","a","b"]][:3]
    assert len(feat_cols)==3, "Expect 3 feature columns (RGB or Lab)."
else:
    # Option B: imbalanced toy RGB data (red is majority)
    def make_toy_colors_imbalanced(seed=SEED):
        rng = np.random.default_rng(seed)
        centers = {
            "red":   (np.array([220, 40, 40]), 600),
            "green": (np.array([40, 220, 40]), 200),
            "blue":  (np.array([40, 40, 220]), 200),
        }
        X_list, y_list = [], []
        for label, (c, n) in centers.items():
            Xc = rng.normal(c, 30, size=(n, 3)).clip(0,255)
            X_list.append(Xc); y_list += [label]*n
        X = np.vstack(X_list).astype(np.float32)
        y = np.array(y_list)
        return pd.DataFrame({"R":X[:,0], "G":X[:,1], "B":X[:,2], "label":y})
    df = make_toy_colors_imbalanced()
    feat_cols = ["R","G","B"]

df.head()

## 2. Split, Scale, and Encodings

In [None]:
X = df[feat_cols].values
y = df["label"].values
labels = sorted(np.unique(y))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=SEED, stratify=y
)

scaler = StandardScaler()
Xz_train = scaler.fit_transform(X_train)
Xz_test  = scaler.transform(X_test)

print("Train size:", X_train.shape[0], " Test size:", X_test.shape[0], " Classes:", labels)

## 3. Multinomial Logistic Regression — **C** & Penalty Sweep
We sweep **C** (inverse regularization strength) and compare `l2` vs `l1` (if solver supports it).  
We track **accuracy** and **log-loss**.

In [None]:
param_grid = {
    "C": np.logspace(-3, 2, 8),
    "penalty": ["l2"],  # can add "l1" with solver="saga"
    "solver": ["lbfgs"],  # "saga" supports l1+l2 but is slower
    "multi_class": ["multinomial"],
    "max_iter": [1000],
}

base = LogisticRegression(random_state=SEED)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
gs = GridSearchCV(base, param_grid, scoring="neg_log_loss", cv=cv, n_jobs=-1, refit=True, verbose=0)
gs.fit(Xz_train, y_train)

best_lr = gs.best_estimator_
print("Best params:", gs.best_params_)
print("Best CV log-loss:", -gs.best_score_)

pred = best_lr.predict(Xz_test)
proba = best_lr.predict_proba(Xz_test)
acc = accuracy_score(y_test, pred)
ll = log_loss(y_test, proba, labels=labels)
cm = confusion_matrix(y_test, pred, labels=labels)
print(f"Test Accuracy: {acc:.3f}  |  Test Log-loss: {ll:.3f}")
print(classification_report(y_test, pred))

In [None]:
plt.figure()
plt.imshow(cm, aspect="auto")
plt.title("Confusion — Best Logistic Regression")
plt.xlabel("Pred"); plt.ylabel("True")
plt.xticks(range(len(labels)), labels, rotation=45)
plt.yticks(range(len(labels)), labels)
plt.colorbar(); plt.tight_layout(); plt.savefig("results/02_confusion_logreg_best.png", dpi=150); plt.show()

## 4. Prototype & Exemplar Revisited (for comparison)
Prototype classifier on standardized space; k-NN and GCM exemplar models.

In [None]:
# Prototype
centroids = {lab: Xz_train[y_train==lab].mean(axis=0) for lab in labels}
def proto_predict(Xz):
    preds = []
    for row in Xz:
        d2 = {lab: np.linalg.norm(row - mu) for lab, mu in centroids.items()}
        preds.append(min(d2, key=d2.get))
    return np.array(preds)

pred_proto = proto_predict(Xz_test)
acc_proto = accuracy_score(y_test, pred_proto)
cm_proto = confusion_matrix(y_test, pred_proto, labels=labels)
print(f"Prototype — Acc: {acc_proto:.3f}")

# k-NN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(Xz_train, y_train)
pred_knn = knn.predict(Xz_test)
proba_knn = knn.predict_proba(Xz_test)
acc_knn = accuracy_score(y_test, pred_knn)
ll_knn = log_loss(y_test, proba_knn, labels=labels)
print(f"k-NN — Acc: {acc_knn:.3f} | Log-loss: {ll_knn:.3f}")

# GCM exemplar
def gcm_predict_proba(Xz, Xz_train, y_train, labels, c=2.0):
    proba = np.zeros((Xz.shape[0], len(labels)), dtype=float)
    lab2idx = {lab:i for i,lab in enumerate(labels)}
    for i, row in enumerate(Xz):
        d = Xz_train - row
        dist = np.linalg.norm(d, axis=1)
        sim = np.exp(-c * dist)
        # accumulate per class
        for s, lab in zip(sim, y_train):
            proba[i, lab2idx[lab]] += s
        # normalize
        ssum = proba[i].sum()
        if ssum > 0: proba[i] /= ssum
    return proba

proba_gcm = gcm_predict_proba(Xz_test, Xz_train, y_train, labels, c=2.0)
pred_gcm = np.array([labels[i] for i in np.argmax(proba_gcm, axis=1)])
acc_gcm = accuracy_score(y_test, pred_gcm)
ll_gcm = log_loss(y_test, proba_gcm, labels=labels)
print(f"GCM — Acc: {acc_gcm:.3f} | Log-loss: {ll_gcm:.3f}")

In [None]:
# Save confusion matrices
for name, yhat in [
    ("prototype", pred_proto),
    ("knn", pred_knn),
    ("gcm", pred_gcm),
]:
    cm_ = confusion_matrix(y_test, yhat, labels=labels)
    plt.figure(); plt.imshow(cm_, aspect="auto")
    plt.title(f"Confusion — {name}")
    plt.xlabel("Pred"); plt.ylabel("True")
    plt.xticks(range(len(labels)), labels, rotation=45)
    plt.yticks(range(len(labels)), labels)
    plt.colorbar(); plt.tight_layout(); plt.savefig(f"results/02_confusion_{name}.png", dpi=150); plt.show()

## 5. Calibration Analysis
Plot reliability curves (true vs predicted probability) for the best logistic and k-NN.

In [None]:
def plot_calibration(y_true, proba, labels, title, path):
    # reduce to max-class probability for a simple reliability curve
    # (for multi-class, more advanced plots per-class or ECE can be added)
    y_true_bin = (y_true == labels[0]).astype(int)  # choose a reference class for demo
    p_ref = proba[:, 0]  # probability of class labels[0]
    frac_pos, mean_pred = calibration_curve(y_true_bin, p_ref, n_bins=10, strategy="uniform")
    plt.figure()
    plt.plot(mean_pred, frac_pos, marker="o")
    plt.plot([0,1],[0,1], "--", alpha=0.5)
    plt.xlabel("Mean predicted probability")
    plt.ylabel("Fraction of positives")
    plt.title(title)
    plt.tight_layout(); plt.savefig(path, dpi=150); plt.show()

# Logistic
proba_lr = best_lr.predict_proba(Xz_test)
plot_calibration(y_test, proba_lr, labels, "Calibration — Logistic (class 0 ref)", "results/02_calibration_logreg.png")

# k-NN
plot_calibration(y_test, proba_knn, labels, "Calibration — k-NN (class 0 ref)", "results/02_calibration_knn.png")

## 6. ROC-AUC (One-vs-Rest)
Compute macro-average ROC-AUC using OvR.

In [None]:
# Binarize labels for OvR
lab2idx = {lab:i for i,lab in enumerate(labels)}
y_test_bin = np.array([lab2idx[lab] for lab in y_test])

# For scikit's multi-class ROC-AUC, pass probas and labels
auc_lr  = roc_auc_score(y_test, proba_lr, multi_class="ovr", labels=labels)
auc_knn = roc_auc_score(y_test, proba_knn, multi_class="ovr", labels=labels)
auc_gcm = roc_auc_score(y_test, proba_gcm, multi_class="ovr", labels=labels)

print(f"ROC-AUC (OvR) — Logistic: {auc_lr:.3f} | k-NN: {auc_knn:.3f} | GCM: {auc_gcm:.3f}")

## 7. Decision Illustrations (2D PCA)

In [None]:
p2 = PCA(n_components=2, random_state=SEED)
Z_train = p2.fit_transform(Xz_train)
Z_test  = p2.transform(Xz_test)

def plot_boundary(model, Z_tr, y_tr, title, outpath):
    # Fit on 2D to draw clean boundaries
    model.fit(Z_tr, y_tr)
    xmin, ymin = Z_tr.min(axis=0) - 1
    xmax, ymax = Z_tr.max(axis=0) + 1
    xx, yy = np.meshgrid(np.linspace(xmin, xmax, 200), np.linspace(ymin, ymax, 200))
    grid = np.c_[xx.ravel(), yy.ravel()]
    pred = model.predict(grid).reshape(xx.shape)

    def label_to_int(arr, labels):
        mapping = {lab:i for i,lab in enumerate(labels)}
        return np.vectorize(mapping.get)(arr)

    plt.figure()
    plt.contourf(xx, yy, label_to_int(pred, labels), alpha=0.3)
    plt.scatter(Z_tr[:,0], Z_tr[:,1], c=label_to_int(y_tr, labels), s=10, edgecolor='k', linewidth=0.2)
    plt.title(title); plt.tight_layout(); plt.savefig(outpath, dpi=150); plt.show()

plot_boundary(LogisticRegression(max_iter=1000, multi_class="multinomial", random_state=SEED),
              Z_train, y_train, "Decision — Logistic (2D PCA)", "results/02_boundary_logistic_2d.png")

plot_boundary(KNeighborsClassifier(n_neighbors=5),
              Z_train, y_train, "Decision — k-NN (2D PCA)", "results/02_boundary_knn_2d.png")

## 8. Takeaways
- **Log-loss** is a stricter metric than accuracy and is improved via **regularization tuning** (C-sweep).
- Prototype and exemplar models provide interpretable baselines; exemplar (k-NN/GCM) often improves log-loss on complex boundaries.
- **Calibration** matters when comparing probabilistic models; k-NN can be overconfident/underconfident depending on k.
- Macro **ROC-AUC (OvR)** provides class-balanced performance insight for imbalanced data.