In [1]:
#imports 
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn

In [2]:
#the datasets
train = pd.read_csv('./mnist_train.csv')
test = pd.read_csv('./mnist_test.csv')

# convert pandas dataframe to numpy array
train1 = train.to_numpy()
test1 = test.to_numpy()

# split X and label
y_train = train1[:, 0]
X_train = train1[:, 1:]

y_test = test1[:, 0]
X_test = test1[:, 1:]

In [3]:
#making the datasets work
#normalize
X_train = X_train/255.
X_test = X_test/255.

#make shape right
X_train = X_train.reshape(-1, 1, 28,28) #technically 1 channel of grayscale then 28 by 28
X_test = X_test.reshape(-1,1, 28,28)

#make the size right (Lenet5 assumes 32 x 32)
X_train = np.pad(X_train, pad_width=((0,0), (0,0), (2,2), (2,2))) #is size [amount of data, channels, height, width) so just add 2 on w and h
X_test = np.pad(X_test, pad_width=((0,0), (0,0), (2,2), (2,2))) #total increase is 4 so is now (n, 1, 32,32) as 28+4=32

#make torch
xTrain = torch.from_numpy(X_train)
yTrain = torch.from_numpy(y_train)
xTest = torch.from_numpy(X_test)
yTest = torch.from_numpy(y_test)

#make data type right
xTrain = xTrain.to(torch.float32)
xTest = xTest.to(torch.float32)

#one hot encode the ys
yTrainOH = F.one_hot(yTrain, num_classes=10)
yTestOH = F.one_hot(yTest, num_classes=10)

In [4]:
#checking the data set
print(yTest[0:10])
print(yTestOH[0:10]) #looks about right

print(X_train.shape)

tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9])
tensor([[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])
(60000, 1, 32, 32)


In [5]:
#model
class Lenet5(nn.Module):
    def __init__(self):
        super(Lenet5, self).__init__()

        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0) #if k=5 and s=1, p=0 for 32 to go to 28
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0) #to go from 14 to 10 need p=0

        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        self.tanh = nn.Tanh()
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2) #same for both    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        Z1 = self.pool(self.tanh(self.conv1(x)))
        Z2 = self.pool(self.tanh(self.conv2(Z1)))
        flatten = torch.flatten(Z2, start_dim=1)
        x1 = self.tanh(self.fc1(flatten))
        x2 = self.tanh(self.fc2(x1))
        x3 = self.fc3(x2) #implicit softmax due to CrossEntropyLoss as criterion 
        return x3

In [6]:
#put on gpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Lenet5()
model = model.to(device)
xTrain = xTrain.to(device)
xTest = xTest.to(device)
yTrainOH = yTrainOH.to(device)
yTestOH = yTestOH.to(device)
yTrain = yTrain.to(device)
yTest = yTest.to(device)
print(device)

cuda


In [7]:
from torch.utils.data import TensorDataset, DataLoader

train_loader = DataLoader(TensorDataset(xTrain, yTrain), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(xTest, yTest), batch_size=64)

In [8]:
#vars/rest of functions 
def calculate_accuracy(X, Y):
    predictions = torch.argmax(X, dim=1)
    return (predictions == Y).sum().item()/len(Y)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
num_epochs = 30

In [9]:
for epoch in range(num_epochs):
    model.train()
    train_loss, correct, total = 0, 0, 0

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * batch_x.size(0)
        correct += (outputs.argmax(1) == batch_y).sum().item()
        total += batch_x.size(0)
    
    avg_train_loss = train_loss / total
    train_acc = correct / total

    model.eval()
    test_loss, test_correct, test_total = 0.0, 0, 0

    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)

            test_loss += loss.item() * batch_x.size(0)
            test_correct += (outputs.argmax(dim=1) == batch_y).sum().item()
            test_total += batch_x.size(0)

    avg_test_loss = test_loss / test_total
    test_acc = test_correct / test_total


    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f}, Test Loss: {avg_test_loss:.4f}, Test Acc: {test_acc:.4f}")

Epoch 1/30, Train Loss: 2.2959, Train Acc: 0.1148, Test Loss: 2.2901, Test Acc: 0.1289
Epoch 2/30, Train Loss: 2.2835, Train Acc: 0.1472, Test Loss: 2.2749, Test Acc: 0.1832
Epoch 3/30, Train Loss: 2.2640, Train Acc: 0.2557, Test Loss: 2.2485, Test Acc: 0.3552
Epoch 4/30, Train Loss: 2.2268, Train Acc: 0.4591, Test Loss: 2.1948, Test Acc: 0.5548
Epoch 5/30, Train Loss: 2.1456, Train Acc: 0.5787, Test Loss: 2.0728, Test Acc: 0.6065
Epoch 6/30, Train Loss: 1.9623, Train Acc: 0.5932, Test Loss: 1.8131, Test Acc: 0.5854
Epoch 7/30, Train Loss: 1.6536, Train Acc: 0.5886, Test Loss: 1.4781, Test Acc: 0.6141
Epoch 8/30, Train Loss: 1.3487, Train Acc: 0.6418, Test Loss: 1.2075, Test Acc: 0.6894
Epoch 9/30, Train Loss: 1.1147, Train Acc: 0.7094, Test Loss: 1.0062, Test Acc: 0.7470
Epoch 10/30, Train Loss: 0.9446, Train Acc: 0.7584, Test Loss: 0.8625, Test Acc: 0.7846
Epoch 11/30, Train Loss: 0.8242, Train Acc: 0.7892, Test Loss: 0.7604, Test Acc: 0.8076
Epoch 12/30, Train Loss: 0.7373, Train Ac