# Artificial Vision & Feature Separability — 05 · Perceptron & Linear Separability

**Goal.** Implement the classic **Perceptron** algorithm, study **linear separability**, and compare against **logistic regression** on toy and real-ish datasets.  
**Outputs.** Decision boundaries, learning curves, margin diagnostics, and confusion matrices.

In [None]:
# --- Reproducibility & Environment ---
import os, random, numpy as np
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
os.makedirs("results", exist_ok=True)
print("Seed set to", SEED)

In [None]:
# --- Imports ---
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.datasets import make_blobs, make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.decomposition import PCA

## 1. Data — Linearly Separable vs Non-separable
We generate a clearly separable set and a slightly overlapping set.

In [None]:
# Separable
X_sep, y_sep = make_blobs(n_samples=600, centers=2, cluster_std=0.7, random_state=SEED)
y_sep = y_sep*2 - 1  # convert to {-1, +1} for perceptron
# Non-separable
X_non, y_non = make_classification(n_samples=600, n_features=2, n_redundant=0, n_informative=2,
                                   n_clusters_per_class=1, class_sep=0.6, flip_y=0.1, random_state=SEED)
y_non = y_non*2 - 1

# Standardize for stability
sc_sep = StandardScaler().fit(X_sep);  Z_sep = sc_sep.transform(X_sep)
sc_non = StandardScaler().fit(X_non);  Z_non = sc_non.transform(X_non)

print("Shapes:", Z_sep.shape, Z_non.shape)

## 2. Perceptron (from scratch)
We implement the online Perceptron update \( w \leftarrow w + y_i x_i \) when a point is misclassified.

In [None]:
def perceptron_train(X, y, epochs=20, lr=1.0, shuffle=True, seed=SEED):
    rng = np.random.default_rng(seed)
    w = np.zeros(X.shape[1])
    b = 0.0
    history = []
    for ep in range(epochs):
        idx = np.arange(len(X))
        if shuffle:
            rng.shuffle(idx)
        mistakes = 0
        for i in idx:
            if y[i]*(np.dot(w, X[i]) + b) <= 0:
                w += lr * y[i] * X[i]
                b += lr * y[i]
                mistakes += 1
        history.append(mistakes)
    return w, b, history

def perceptron_predict(X, w, b):
    return np.sign(X @ w + b)

### 2.1 Train on separable vs non-separable data

In [None]:
w_s, b_s, hist_s = perceptron_train(Z_sep, y_sep)
w_n, b_n, hist_n = perceptron_train(Z_non, y_non)

print("Final mistakes per epoch (separable):", hist_s[-5:])
print("Final mistakes per epoch (non-separable):", hist_n[-5:])

plt.figure(); plt.plot(hist_s, label="separable"); plt.plot(hist_n, label="non-separable")
plt.xlabel("epoch"); plt.ylabel("# mistakes"); plt.legend(); plt.title("Perceptron mistakes per epoch")
plt.tight_layout(); plt.savefig("results/05_perceptron_mistakes.png", dpi=150); plt.show()

## 3. Decision Boundaries

In [None]:
def plot_boundary(ax, X, y, w, b, title):
    ax.scatter(X[:,0], X[:,1], c=(y>0).astype(int), s=12, edgecolor='k', linewidth=0.2)
    xs = np.linspace(X[:,0].min()-1, X[:,0].max()+1, 200)
    if abs(w[1]) > 1e-8:
        ys = -(w[0]*xs + b)/w[1]
        ax.plot(xs, ys, 'k-')
    ax.set_title(title)

fig, axes = plt.subplots(1,2, figsize=(10,4))
plot_boundary(axes[0], Z_sep, y_sep, w_s, b_s, "Separable (Perceptron)")
plot_boundary(axes[1], Z_non, y_non, w_n, b_n, "Non-separable (Perceptron)")
plt.tight_layout(); plt.savefig("results/05_perceptron_boundaries.png", dpi=150); plt.show()

## 4. Compare with Logistic Regression

In [None]:
log_sep = LogisticRegression().fit(Z_sep, (y_sep>0).astype(int))
log_non = LogisticRegression().fit(Z_non, (y_non>0).astype(int))

pred_sep_lr = log_sep.predict(Z_sep)
pred_non_lr = log_non.predict(Z_non)

acc_sep_lr = accuracy_score((y_sep>0).astype(int), pred_sep_lr)
acc_non_lr = accuracy_score((y_non>0).astype(int), pred_non_lr)

print(f"Logistic Acc — separable: {acc_sep_lr:.3f}, non-separable: {acc_non_lr:.3f}")

In [None]:
def decision_line_logreg(clf, X):
    xs = np.linspace(X[:,0].min()-1, X[:,0].max()+1, 200)
    w = clf.coef_[0]; b = clf.intercept_[0]
    ys = -(w[0]*xs + b)/w[1]
    return xs, ys

fig, axes = plt.subplots(1,2, figsize=(10,4))
axes[0].scatter(Z_sep[:,0], Z_sep[:,1], c=(y_sep>0).astype(int), s=12, edgecolor='k', linewidth=0.2)
xs, ys = decision_line_logreg(log_sep, Z_sep); axes[0].plot(xs, ys, 'r-'); axes[0].set_title("Separable (Logistic)")
axes[1].scatter(Z_non[:,0], Z_non[:,1], c=(y_non>0).astype(int), s=12, edgecolor='k', linewidth=0.2)
xs, ys = decision_line_logreg(log_non, Z_non); axes[1].plot(xs, ys, 'r-'); axes[1].set_title("Non-separable (Logistic)")
plt.tight_layout(); plt.savefig("results/05_logreg_boundaries.png", dpi=150); plt.show()

## 5. Margin Diagnostics (optional)
Perceptron converges if data are linearly separable with margin \(\gamma\). We estimate a proxy margin along the learned direction.

In [None]:
def margin_proxy(X, y, w, b):
    margins = y * (X @ w + b) / (np.linalg.norm(w) + 1e-8)
    return margins.min(), margins.mean()

m_min_sep, m_mean_sep = margin_proxy(Z_sep, y_sep, w_s, b_s)
m_min_non, m_mean_non = margin_proxy(Z_non, y_non, w_n, b_n)
print(f"Margin proxy — separable: min={m_min_sep:.3f}, mean={m_mean_sep:.3f}")
print(f"Margin proxy — non-separable: min={m_min_non:.3f}, mean={m_mean_non:.3f}")

## 6. Takeaways
- Perceptron **converges** on linearly separable data; on non-separable data, mistakes persist.
- Logistic regression provides a **probabilistic linear** baseline and often handles slight overlap better.
- Visualizing boundaries highlights **linear separability limits** — sets up the move to MLP/CNN.