In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from IPython.display import HTML

import sys

sys.path.append("../")
import loss_functions as ls
import utils as ut
from models import Model
from layers import Linear, Flatten
from activation_functions import Sigmoid, ReLU, Tanh

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm
from collections import Counter

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, ConcatDataset, Subset

# Define a transform to convert the data to tensors and normalize it
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)

trainset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
testset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)
concatset = ConcatDataset([trainset, testset])

# Create a smaller balanced subset
samples_per_class = 10000  # 500 samples per digit = 5000 total
labels = torch.tensor([label for _, label in trainset])
indices_by_label = {}

# Group indices by label
for i, label in enumerate(labels):
    label_idx = label.item()
    if label_idx not in indices_by_label:
        indices_by_label[label_idx] = []
    indices_by_label[label_idx].append(i)

# Collect balanced subset indices
subset_indices = []
for label, indices in indices_by_label.items():
    subset_indices.extend(indices[:samples_per_class])

# Create the subset
balanced_subset = Subset(trainset, subset_indices)

# Extract data from the subset
x, y = [], []
for i in range(len(balanced_subset)):
    data = balanced_subset[i]
    x.append(data[0].numpy().flatten())
    y.append(data[1])

# Split into train/val/test
x_train, x_temp, y_train, y_temp = train_test_split(
    x, y, test_size=0.3, random_state=42, stratify=y
)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

# Convert to numpy arrays
x_train = np.array(x_train)
x_val = np.array(x_val)
y_train = np.array(y_train)
y_val = np.array(y_val)
x_test = np.array(x_test)
y_test = np.array(y_test)

print(f"Train set: {x_train.shape}, {y_train.shape}")
print(f"Validation set: {x_val.shape}, {y_val.shape}")
print(f"Test set: {x_test.shape}, {y_test.shape}")
print(f"Class distribution in train set: {Counter(y_train)}")

# Clean up
del trainset
del testset
del balanced_subset

KeyboardInterrupt: 

In [3]:
model = Model(ls.multi_cross_entropy)
model.add(Linear(28 * 28, 256, ReLU()))
model.add(Linear(256, 10, None))

print(model)

Linear Layer (in 784, out 256), (activation relu)
Linear Layer (in 256, out 10), (activation None)


In [4]:
print(
    f"Train shape: {x_train.shape}, Val shape: {x_val.shape}, Test shape: {x_test.shape}"
)

loss = model.fit(
    train_data=(x_train, y_train), validation_data=(x_val, y_val), epochs=20, lr=0.01
)

Train shape: (42000, 784), Val shape: (9000, 784), Test shape: (9000, 784)
Training epoch 0
Epoch 0 loss -> 380.43445086449145
Validation loss -> 81.74822977367633
Training epoch 1
Epoch 1 loss -> 378.91472165580586
Validation loss -> 81.72693963673268
Training epoch 2
Epoch 2 loss -> 378.87361654541166
Validation loss -> 81.42972852493388
Training epoch 3
Epoch 3 loss -> 378.91581451766524
Validation loss -> 81.73908196161653
Training epoch 4
Epoch 4 loss -> 378.9276456396509
Validation loss -> 81.76164968707792
Training epoch 5
Epoch 5 loss -> 378.8733027002945
Validation loss -> 81.72292723393392
Training epoch 6
Epoch 6 loss -> 378.949467733321
Validation loss -> 81.7901134645553
Training epoch 7
Epoch 7 loss -> 378.9319647778343
Validation loss -> 81.78696064857444
Training epoch 8


KeyboardInterrupt: 

In [None]:
plt.plot(loss["train"], label="Train Loss")
plt.plot(loss["val"], label="Validation Loss")

In [None]:
print(f"Test Size: {len(x_test)}")
predictions = []
for batch in x_test:
    predictions.append(model.predict(batch))
predictions = np.array(predictions)
predictions = predictions.reshape((predictions.shape[0], -1))

y_test = ut.one_hot_target(y_test, (y_test.shape[0], 10))
print(predictions.shape, y_test.shape)

In [None]:
predictions_idx = np.argmax(predictions, axis=1)
y_test_idx = np.argmax(y_test, axis=1)

accuracy = np.where(predictions_idx == y_test_idx, 1, 0)
accuracy = np.sum(accuracy) / y_test_idx.shape[0] * 100

print(accuracy)