In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
train_in = pd.read_csv("../../data/train_in.csv", header=None)
test_in = pd.read_csv("../../data/test_in.csv", header=None)
train_out = pd.read_csv("../../data/train_out.csv", header=None)
test_out = pd.read_csv("../../data/test_out.csv", header=None)

In [None]:
X_train = train_in.to_numpy(dtype=np.float64)
X_test = test_in.to_numpy(dtype=np.float64)
y_train = train_out.to_numpy(dtype=np.int64).ravel()
y_test = test_out.to_numpy(dtype=np.int64).ravel()

In [None]:
num_classes = 10
no_of_examples = X_train.shape[0]
num_features = X_train.shape[1]

rng = np.random.default_rng(2)
W = rng.normal(0, 0.01, size=(num_features, num_classes)).astype(np.float64)
b = rng.normal(0, 0.01, size=(num_classes,)).astype(np.float64)

train_acc_hist, train_loss_hist = [], []

epochs = 100
lr = 0.01

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

for epoch in range(1, epochs + 1):

    total_epoch_loss = 0.0
    correct_per_epoch = 0

    sum_grad_W = 0
    sum_grad_b = 0

    for example in range(no_of_examples):
        x = X_train[example]    # (256,)
        y_true = y_train[example]   # int (0-9)

        scores = b + W.T @ x    # (10,)
        probabilities = softmax(scores) # (10,)

        prediction = np.argmax(probabilities)
        if prediction == y_true:
            correct_per_epoch += 1

        y_vec = np.zeros(num_classes, dtype=np.float64)
        y_vec[y_true] = 1.0

        class_wise_error = probabilities - y_vec

        example_loss = np.sum(class_wise_error ** 2)
        total_epoch_loss += float(example_loss)

        grad_b = 2 * class_wise_error
        grad_W = 2 * np.outer(x, class_wise_error)

        sum_grad_W += grad_W
        sum_grad_b += grad_b

        # W -= lr * grad_W
        # b -= lr * grad_b

    W -= lr * (sum_grad_W / no_of_examples)
    b -= lr * (sum_grad_b / no_of_examples)

    mean_loss = total_epoch_loss / no_of_examples
    epoch_accuracy = correct_per_epoch / no_of_examples

    train_acc_hist.append(epoch_accuracy)
    train_loss_hist.append(mean_loss)

    if epoch_accuracy == 1:
        break

    print(f"Epoch {epoch:02d} | train loss: {mean_loss:.4f} | train acc: {epoch_accuracy:.4f}")

In [None]:
plt.figure()
plt.plot(range(len(train_acc_hist)), train_acc_hist, label="train")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy per epoch")
plt.grid(True); plt.legend()

plt.figure()
plt.plot(range(len(train_loss_hist)), train_loss_hist, label="train")
plt.xlabel("Epoch")
plt.ylabel("MSE (probs vs one-hot)")
plt.title("Loss per epoch")
plt.grid(True); plt.legend()

plt.show()