In [7]:
import pickle
import os
import pandas as pd
import numpy as np

In [8]:
train_file = "./kaggle/input/fii-nn-2025-homework-2/extended_mnist_train.pkl"
test_file = "./kaggle/input/fii-nn-2025-homework-2/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

In [9]:
train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)

In [10]:
test_data = []
for image, label in test:
    test_data.append(image.flatten())

In [None]:
# convert to numpy arrays
X_train = np.array(train_data)
y_train = np.array(train_labels)
X_test = np.array(test_data)

# normalize data (255 values for 0-255 black-white images)
X_train = X_train / 255.0
X_test = X_test / 255.0

def one_hot(y, num_classes):
    # one-hot, meaning:
    # for digit n, we create a vector of size num_classes
    # all values are 0, except the nth position, which is 1
    one_hot_y = np.zeros((y.shape[0], num_classes))
    for row, col in enumerate(y):
        one_hot_y[row, col] = 1 # row = sample, col = digit 0-9
    return one_hot_y

class Perceptron:
    def __init__(self, n_inputs, n_outputs, learning_rate=0.1):
        # weight initialized with small random values
        self.W = np.random.randn(n_inputs, n_outputs) * 0.01
        # bias initialized with small random values
        self.b = np.random.randn(n_outputs) * 0.01
        # learning rate
        self.lr = learning_rate

    def _softmax(self, z):
        # softmax activation formula
        exp_z = np.exp(z)
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, X):
        ### forward step
        # @ = matrix multiplication
        # z = XW + b
        z = X @ self.W + self.b
        # apply softmax activation
        y = self._softmax(z)
        return y

    def fit(self, X, y, epochs=150):
        # number of samples
        num_samples = X.shape[0]
        num_classes = len(np.unique(y)) # always 10 (digits 0 to 9)
        y_one_hot = one_hot(y, num_classes)
        print("TRAINING: START!")
        # pass through the dataset EPOCHS times
        for epoch in range(epochs):
            # forward pass, predicted probabilities
            y_predicted_probabilities = self.forward(X)
            # y_one_hot is the expected output (Target)
            # y_predicted_probabilities is the predicted output (y)
            error_term = y_one_hot - y_predicted_probabilities # Target - y
            grad_W = X.T @ error_term # (Target - y) * Tranpose(X)
            grad_b = np.sum(error_term, axis=0) # sum of errors (Target - y)

            # learning rate is the percent of the "correction" we apply to W and b
            self.W += self.lr * (grad_W / num_samples) # W = W + lr * gradient_W
            self.b += self.lr * (grad_b / num_samples) # b = b + lr * gradient_b
            if (epoch + 1) % 10 == 0:
                # cross-entropy loss
                loss = -np.sum(y_one_hot * np.log(y_predicted_probabilities)) / num_samples
                print(f"EPOCH {epoch + 1}/{epochs} WITH LOSS: {loss:.5f}")
        print("TRAINING: COMPLETE!")

    def predict(self, X):
        # get predicted probabilities (returns a matrix of 10 columns,
        # one for each digit 0-9) and len(X) rows
        y_probabilities = self.forward(X)
        # return the label with highest probability for each sample
        return np.argmax(y_probabilities, axis=1)


In [12]:
n_inputs = 784 # 28x28 images flattened
n_outputs = 10 # digits 0-9
learning_rate = 0.8
epochs = 300

model = Perceptron(n_inputs=n_inputs, n_outputs=n_outputs, learning_rate=learning_rate)
model.fit(X_train, y_train, epochs=epochs)


TRAINING: START!
EPOCH 10/300 WITH LOSS: 0.80810
EPOCH 20/300 WITH LOSS: 0.55922
EPOCH 30/300 WITH LOSS: 0.46085
EPOCH 40/300 WITH LOSS: 0.43039
EPOCH 50/300 WITH LOSS: 0.41013
EPOCH 60/300 WITH LOSS: 0.39517
EPOCH 70/300 WITH LOSS: 0.38353
EPOCH 80/300 WITH LOSS: 0.37414
EPOCH 90/300 WITH LOSS: 0.36635
EPOCH 100/300 WITH LOSS: 0.35975
EPOCH 110/300 WITH LOSS: 0.35405
EPOCH 120/300 WITH LOSS: 0.34908
EPOCH 130/300 WITH LOSS: 0.34467
EPOCH 140/300 WITH LOSS: 0.34074
EPOCH 150/300 WITH LOSS: 0.33721
EPOCH 160/300 WITH LOSS: 0.33400
EPOCH 170/300 WITH LOSS: 0.33107
EPOCH 180/300 WITH LOSS: 0.32838
EPOCH 190/300 WITH LOSS: 0.32591
EPOCH 200/300 WITH LOSS: 0.32361
EPOCH 210/300 WITH LOSS: 0.32148
EPOCH 220/300 WITH LOSS: 0.31949
EPOCH 230/300 WITH LOSS: 0.31763
EPOCH 240/300 WITH LOSS: 0.31589
EPOCH 250/300 WITH LOSS: 0.31424
EPOCH 260/300 WITH LOSS: 0.31269
EPOCH 270/300 WITH LOSS: 0.31123
EPOCH 280/300 WITH LOSS: 0.30984
EPOCH 290/300 WITH LOSS: 0.30852
EPOCH 300/300 WITH LOSS: 0.30726
TR

In [13]:
predictions = model.predict(X_test)

In [14]:
predictions_csv = {
    "ID": list(range(len(predictions))),
    "target": predictions,
}

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)

print("\nSubmission file 'submission.csv' created successfully.")


Submission file 'submission.csv' created successfully.
