# Assignment 1: Perceptron vs Adaline and the XOR Problem

Foundational Models – Perceptron vs. Adaline and the XOR Problem.  
All implementations use **NumPy only**; data in **bipolar format** (-1 and +1).

## Step 1: Dataset creation (bipolar format)

AND, OR, and XOR truth tables with inputs and targets in **bipolar** form: -1 and +1 only.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Bipolar logic: -1 and +1 only (no 0/1)
# AND: (1,1)->1; (1,-1)->-1; (-1,1)->-1; (-1,-1)->-1
X_and = np.array([[1, 1], [1, -1], [-1, 1], [-1, -1]], dtype=np.float64)
y_and = np.array([1, -1, -1, -1], dtype=np.float64)

# OR: (1,1)->1; (1,-1)->1; (-1,1)->1; (-1,-1)->-1
X_or = np.array([[1, 1], [1, -1], [-1, 1], [-1, -1]], dtype=np.float64)
y_or = np.array([1, 1, 1, -1], dtype=np.float64)

# XOR: (1,1)->-1; (1,-1)->1; (-1,1)->1; (-1,-1)->-1
X_xor = np.array([[1, 1], [1, -1], [-1, 1], [-1, -1]], dtype=np.float64)
y_xor = np.array([-1, 1, 1, -1], dtype=np.float64)

print("AND:")
print("X =", X_and)
print("y =", y_and)
print("\nOR:")
print("X =", X_or)
print("y =", y_or)
print("\nXOR:")
print("X =", X_xor)
print("y =", y_xor)

AND:
X = [[ 1.  1.]
 [ 1. -1.]
 [-1.  1.]
 [-1. -1.]]
y = [ 1. -1. -1. -1.]

OR:
X = [[ 1.  1.]
 [ 1. -1.]
 [-1.  1.]
 [-1. -1.]]
y = [ 1.  1.  1. -1.]

XOR:
X = [[ 1.  1.]
 [ 1. -1.]
 [-1.  1.]
 [-1. -1.]]
y = [-1.  1.  1. -1.]


## Step 2: EDA – Linear (in)separability plots

2D scatter: x1 vs x2, coloured by target (+1 vs -1). AND and OR are linearly separable; XOR is not.

In [2]:
def plot_dataset(X, y, title, ax):
    """Scatter x1 vs x2, colour/marker by target (+1 or -1)."""
    pos = y == 1
    neg = y == -1
    ax.scatter(X[pos, 0], X[pos, 1], c='C0', marker='o', s=80, label='+1', edgecolors='k')
    ax.scatter(X[neg, 0], X[neg, 1], c='C1', marker='s', s=80, label='-1', edgecolors='k')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.set_title(title)
    ax.legend()
    ax.set_xlim(-1.5, 1.5)
    ax.set_ylim(-1.5, 1.5)
    ax.grid(True, alpha=0.3)
    ax.set_aspect('equal')

fig, axes = plt.subplots(1, 3, figsize=(12, 4))
plot_dataset(X_and, y_and, 'AND – linearly separable', axes[0])
plot_dataset(X_or, y_or, 'OR – linearly separable', axes[1])
plot_dataset(X_xor, y_xor, 'XOR – not linearly separable', axes[2])
plt.tight_layout()
plt.show()

## Step 3: Perceptron (from scratch)

- **Update rule:** Δw_i = α * x_i * t only when predicted output ≠ target t.
- **Activation:** binary step: output = +1 if net ≥ 0, else -1.

In [3]:
class Perceptron:
    """Perceptron with bipolar step activation. Update only on misclassification: Δw_i = α * x_i * t."""

    def __init__(self, n_features, learning_rate=0.1, max_epochs=100):
        # weights include bias (first weight); input will be augmented with 1
        self.w = np.zeros(n_features + 1)
        self.alpha = learning_rate
        self.max_epochs = max_epochs

    @staticmethod
    def step(net):
        """Binary step: +1 if net >= 0, else -1."""
        return np.where(net >= 0, 1.0, -1.0)

    def fit(self, X, y):
        X_aug = np.column_stack([np.ones(len(X)), X])  # bias term
        history = []
        for epoch in range(self.max_epochs):
            misclass = 0
            for i in range(len(X_aug)):
                net = np.dot(self.w, X_aug[i])
                out = self.step(net)
                if out != y[i]:
                    # Perceptron rule: Δw = α * x * t (target t, not error)
                    self.w += self.alpha * y[i] * X_aug[i]
                    misclass += 1
            history.append(misclass)
            if misclass == 0:
                break
        return np.array(history)

    def predict(self, X):
        X_aug = np.column_stack([np.ones(len(X)), X])
        net = X_aug @ self.w
        return self.step(net)

In [4]:
# Train Perceptron on AND and OR
perc_and = Perceptron(n_features=2, learning_rate=0.1, max_epochs=100)
hist_and = perc_and.fit(X_and, y_and)

perc_or = Perceptron(n_features=2, learning_rate=0.1, max_epochs=100)
hist_or = perc_or.fit(X_or, y_or)

print('Perceptron AND: converged in', len(hist_and), 'epochs')
print('Perceptron OR: converged in', len(hist_or), 'epochs')
print('AND predictions:', perc_and.predict(X_and))
print('OR predictions:', perc_or.predict(X_or))

Perceptron AND: converged in 3 epochs
Perceptron OR: converged in 3 epochs
AND predictions: [ 1. -1. -1. -1.]
OR predictions: [ 1.  1.  1. -1.]


In [5]:
def plot_decision_boundary(ax, w, xlim=(-1.5, 1.5), ylim=(-1.5, 1.5)):
    """Plot line w0 + w1*x1 + w2*x2 = 0 => x2 = -(w0 + w1*x1)/w2 (if w2 != 0)."""
    w0, w1, w2 = w[0], w[1], w[2]
    if np.abs(w2) < 1e-9:
        return
    x1 = np.linspace(xlim[0], xlim[1], 100)
    x2 = -(w0 + w1 * x1) / w2
    ax.plot(x1, x2, 'k--', linewidth=2, label='Decision boundary')

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
plot_dataset(X_and, y_and, 'AND – Perceptron decision boundary', axes[0])
plot_decision_boundary(axes[0], perc_and.w)
axes[0].legend()

plot_dataset(X_or, y_or, 'OR – Perceptron decision boundary', axes[1])
plot_decision_boundary(axes[1], perc_or.w)
axes[1].legend()
plt.tight_layout()
plt.show()

## Step 4: Adaline (from scratch)

- **Update rule:** Δw_i = α * (t - y_in) * x_i, where y_in = w^T x (net input, no step in update).
- **Training:** linear activation (output = y_in); MSE computed on y_in vs t.
- **Classification:** threshold: class +1 if y_in ≥ 0, else -1.

In [6]:
class Adaline:
    """Adaline (Widrow-Hoff / delta rule). Training uses linear output (y_in); classification uses threshold."""

    def __init__(self, n_features, learning_rate=0.1, max_epochs=100, random_state=None):
        # random_state=None => zero init (for AND/OR). Set an int so XOR gets small random init and a non-degenerate boundary.
        if random_state is None:
            self.w = np.zeros(n_features + 1)
        else:
            self.w = np.random.RandomState(random_state).randn(n_features + 1) * 0.01
        self.alpha = learning_rate
        self.max_epochs = max_epochs

    def fit(self, X, y):
        X_aug = np.column_stack([np.ones(len(X)), X])
        mse_history = []
        for epoch in range(self.max_epochs):
            # Sequential (online) updates: Δw_i = α * (t - y_in) * x_i per sample
            for i in range(len(X_aug)):
                y_in = np.dot(self.w, X_aug[i])
                self.w += self.alpha * (y[i] - y_in) * X_aug[i]
            y_in_all = X_aug @ self.w
            mse = np.mean((y - y_in_all) ** 2)
            mse_history.append(mse)
            if mse < 1e-6:
                break
        return np.array(mse_history)

    def predict(self, X):
        X_aug = np.column_stack([np.ones(len(X)), X])
        y_in = X_aug @ self.w
        return np.where(y_in >= 0, 1.0, -1.0)  # threshold for classification

In [7]:
# Train Adaline on AND and OR; track MSE (zero init so they converge)
ada_and = Adaline(n_features=2, learning_rate=0.1, max_epochs=500)
mse_and = ada_and.fit(X_and, y_and)

ada_or = Adaline(n_features=2, learning_rate=0.1, max_epochs=500)
mse_or = ada_or.fit(X_or, y_or)

print('Adaline AND: final MSE =', mse_and[-1], ', epochs =', len(mse_and))
print('Adaline OR: final MSE =', mse_or[-1], ', epochs =', len(mse_or))
print('AND predictions:', ada_and.predict(X_and))
print('OR predictions:', ada_or.predict(X_or))

Adaline AND: final MSE = 0.25432525951557095 , epochs = 500
Adaline OR: final MSE = 0.25432525951557095 , epochs = 500
AND predictions: [ 1. -1. -1. -1.]
OR predictions: [ 1.  1.  1. -1.]


In [8]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
axes[0].plot(mse_and, 'C0-')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE')
axes[0].set_title('Adaline on AND – MSE vs epoch')
axes[0].grid(True, alpha=0.3)

axes[1].plot(mse_or, 'C1-')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MSE')
axes[1].set_title('Adaline on OR – MSE vs epoch')
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Step 5: XOR – demonstrating the limitation

Train Perceptron and Adaline on XOR. Neither can separate the data; document behaviour and (for report) inequality contradictions and decision boundary equation.

In [9]:
# Perceptron on XOR – does not converge
perc_xor = Perceptron(n_features=2, learning_rate=0.1, max_epochs=500)
hist_xor = perc_xor.fit(X_xor, y_xor)

print('Perceptron on XOR: misclassifications per epoch (last 10):', hist_xor[-10:])
print('Never converges: misclassifications remain > 0.')

Perceptron on XOR: misclassifications per epoch (last 10): [4 4 4 4 4 4 4 4 4 4]
Never converges: misclassifications remain > 0.


In [10]:
plt.figure(figsize=(6, 4))
plt.plot(hist_xor)
plt.xlabel('Epoch')
plt.ylabel('Misclassifications')
plt.title('Perceptron on XOR – does not converge')
plt.grid(True, alpha=0.3)
plt.show()

In [11]:
# Adaline on XOR – MSE does not go to zero; single line cannot separate
ada_xor = Adaline(n_features=2, learning_rate=0.1, max_epochs=500, random_state=44)
mse_xor = ada_xor.fit(X_xor, y_xor)

print('Adaline on XOR: final weights (bias, w1, w2) =', ada_xor.w)
print('Decision boundary: {:.4f} + {:.4f}*x1 + {:.4f}*x2 = 0'.format(ada_xor.w[0], ada_xor.w[1], ada_xor.w[2]))
print('Final MSE =', mse_xor[-1])

Adaline on XOR: final weights (bias, w1, w2) = [-1.38777878e-17  5.88235294e-02  1.17647059e-01]
Decision boundary: -0.0000 + 0.0588*x1 + 0.1176*x2 = 0
Final MSE = 1.0173010380622838


In [12]:
plt.figure(figsize=(6, 4))
plt.plot(mse_xor)
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.title('Adaline on XOR – MSE does not converge to zero')
plt.grid(True, alpha=0.3)
plt.show()

# Plot Adaline decision boundary on XOR (cannot separate the two classes)
fig, ax = plt.subplots(figsize=(5, 5))
plot_dataset(X_xor, y_xor, 'XOR – Adaline decision boundary (cannot separate)', ax)
plot_decision_boundary(ax, ada_xor.w)
ax.legend()
plt.tight_layout()
plt.show()

## Step 6: Multi-layer network for XOR (conceptual + forward pass)

2–2–1 architecture: 2 inputs + bias, 2 hidden (step), 1 output (step). Weights set manually so the network computes XOR (no learning). See report for diagram and weight justification.

In [13]:
def step(z):
    return np.where(z >= 0, 1.0, -1.0)

# Weights set by hand: H1 = AND (fires only at (1,1)); H2 = OR (fires for (1,1),(1,-1),(-1,1)).
# Output = step(-1 - H1 + H2) => +1 only when H1=-1 and H2=+1 (XOR). See report for full derivation.

W_h = np.array([
    [-1.5, 1, 1],   # H1: AND
    [0.5, 1, 1]     # H2: OR
])
W_out = np.array([-1.0, -1.0, 1.0])  # bias, H1, H2 => output = step(-1 - H1 + H2)

def xor_forward(X):
    """Forward pass for 2-2-1 XOR network with step activation."""
    ones = np.ones((len(X), 1))
    X_aug = np.column_stack([ones, X])
    H_in = X_aug @ W_h.T   # (n, 2)
    H = step(H_in)
    H_aug = np.column_stack([ones, H])
    out_in = H_aug @ W_out
    return step(out_in)

xor_pred = xor_forward(X_xor)
print('Multi-layer XOR predictions:', xor_pred)
print('Targets:              ', y_xor)
print('Match:', np.allclose(xor_pred, y_xor))

Multi-layer XOR predictions: [-1.  1.  1. -1.]
Targets:               [-1.  1.  1. -1.]
Match: True
