Task 2: Implement a multi-class perceptron algorithm
Implement (from scratch) a multi-class perceptron training algorithm. Essentially, a multi-class perceptron
represents 10 independent linear classifiers that will each try to predict whether a digit belongs to a particular
class. The weight update rules can be found in Chapter 6.1 of Understanding Deep Learning textbook.
Specifically, refer to equations 6.3 and 6.7

In [1]:
from threading import activeCount

# Import packages

import numpy as np
import pandas as pd

In [2]:
# Load in data

train_in = pd.read_csv("../../data/train_in.csv", header=None).T
test_in = pd.read_csv("../../data/test_in.csv", header=None)
train_out = pd.read_csv("../../data/train_out.csv", header=None).T
test_out = pd.read_csv("../../data/test_out.csv", header=None)

In [3]:
# Activation function

combined = {}

def ReLu(x):
    return np.where(x > 0, x, 0)

def softmax(a):
    return np.exp(a) / np.sum(np.exp(a))

train_in.shape
# To create a matrix where each digit contains all the digit's matrices
# for matrix, digit in zip(train_in.values, train_out.values.ravel()):
#     combined.setdefault(digit, []).append(matrix)

(256, 1707)

In [3]:
class Perceptron():
    def __init__(self, lr=0.05, iterations=10):
        self.lr = lr
        self.iterations = iterations
        # self.activation_func = ReLu
        self.activation = None
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_features, n_samples = X.shape
        print(f"Interpreted as: {n_samples} samples, {n_features} features")
        n_classes = len(np.unique(y))
        Y = np.zeros((n_samples, n_classes))
        Y[np.arange(n_samples), y] = 1

        print(X.shape)
        if self.weights is None:
            self.weights = np.random.randn(n_features, n_classes) * 0.01
            self.bias = np.zeros(n_classes) * 0.01

        for epoch in range(0, self.iterations):
            epoch_loss = 0.0
            # CHANGE 1: Replace enumerate(X) with range(n_samples)
            for idx in range(n_samples):
                y_true = Y[idx]
                x_i = X[:, idx]
                scores = np.dot(x_i, self.weights) + self.bias
                error = scores - y_true
                epoch_loss += 0.5 * np.sum(error**2)

                weight_update = np.outer(x_i, error)
                self.weights -= self.lr * weight_update
                self.bias -= self.lr * error

            if epoch % 10 == 0 or epoch == self.iterations-1:
                preds = self.predict(X)
                acc = np.mean(preds == y)
                print(f"Epoch {epoch+1}/{self.iterations} loss={epoch_loss:.4f} acc={acc:.4f}")

    def predict(self, X):
        layer_1 = np.dot(X.T, self.weights) + self.bias
        return np.argmax(layer_1, axis=1)

In [5]:
mlp = Perceptron(lr=0.0005, iterations=100)
mlp.fit(train_in.values, train_out)


Interpreted as: 1707 samples, 256 features
(256, 1707)
Epoch 1/100 loss=372.5967 acc=0.8992
Epoch 11/100 loss=232.0332 acc=0.9315
Epoch 21/100 loss=222.6196 acc=0.9391
Epoch 31/100 loss=218.0155 acc=0.9426
Epoch 41/100 loss=215.2326 acc=0.9455
Epoch 51/100 loss=213.3646 acc=0.9467
Epoch 61/100 loss=212.0248 acc=0.9467
Epoch 71/100 loss=211.0182 acc=0.9479
Epoch 81/100 loss=210.2354 acc=0.9479
Epoch 91/100 loss=209.6103 acc=0.9490
Epoch 100/100 loss=209.1471 acc=0.9496
