Goal: Add convolution, see what happens.

In [1]:
import torch

In [2]:
import torchvision.datasets as datasets
from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_data = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_data = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

In [3]:
from torch.utils.data import DataLoader
from torch.utils.data import random_split

train_split = 0.8
train_size = int(train_split * len(train_data))
val_size = len(train_data) - train_size

train_subset, val_subset = random_split(train_data, [train_size, val_size])

train_loader = DataLoader(dataset=train_subset, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val_subset, batch_size=64, shuffle=False)
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=False)


In [4]:
import torch.nn as nn
import torch.optim as optim

class Skynet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        dropout_rate = 0.5
        self.conv = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3)
        self.relu1 = nn.ReLU()
        self.drop = nn.Dropout(p=dropout_rate)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=64, kernel_size=3, stride=2, padding=1)
        self.relu2 = nn.ReLU()
        self.drop0 = nn.Dropout(p=dropout_rate)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=2, padding=1)
        self.relu0 = nn.ReLU()
        self.flatten = nn.Flatten()
        self.drop2 = nn.Dropout(p=dropout_rate)
        # self.fc = nn.Linear((26 // 2) ** 2 * 32, 64)
        self.fc = nn.Linear((26 // 2 // 2 + 1) ** 2 * 64, 64)
        self.relu3 = nn.ReLU()
        self.drop3 = nn.Dropout(p=dropout_rate)
        self.fc2 = nn.Linear(64, 64)
        self.relu4 = nn.ReLU()
        self.drop4 = nn.Dropout(p=dropout_rate)
        self.output = nn.Linear(64, output_size)

    def forward(self, x):
        # x = x.view(x.size(0), -1)
        x = self.conv(x)
        x = self.relu1(x)
        # x = self.drop(x)
        x = self.conv2(x)
        x = self.relu2(x)
        # x = self.drop0(x)
        x = self.conv3(x)
        x = self.relu0(x)
        x = self.flatten(x)
        x = self.drop2(x)
        x = self.fc(x)
        x = self.relu3(x)
        x = self.drop3(x)
        # x = self.fc2(x)
        # x = self.relu4(x)
        # x = self.drop4(x)
        x = self.output(x)
        return x

In [5]:
input_size = 28 ** 2
hidden_size = 64
output_size = 10

model = Skynet(input_size, hidden_size, output_size)


In [6]:
loss_func = torch.nn.CrossEntropyLoss()

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.2)

In [8]:
def train_one_epoch():
    model.train(True)
    batches = 0
    avg_loss = 0
    for step, (features, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        preds = model(features)
        loss = loss_func(preds, labels)
        loss.backward()
        optimizer.step()
        
        avg_loss += loss
        batches = step
    
    avg_loss = avg_loss / batches
    print(f"Average loss for training batches in this epoch: {avg_loss}")

    model.train(False)
    batches = 0
    avg_loss = 0
    for step, (features, labels) in enumerate(val_loader):
        preds = model(features)
        loss = loss_func(preds, labels)
        
        avg_loss += loss
        batches = step

    avg_loss = avg_loss / batches
    print(f"Average loss for validation batches in this epoch: {avg_loss}")


In [9]:
model.train(True)
for i in range (0, 30):
    print(f"Beginning epoch {i}...")
    train_one_epoch()
    print("")

Beginning epoch 0...
Average loss for training batches in this epoch: 0.5897470712661743
Average loss for validation batches in this epoch: 0.13148629665374756

Beginning epoch 1...
Average loss for training batches in this epoch: 0.22160571813583374
Average loss for validation batches in this epoch: 0.09065211564302444

Beginning epoch 2...
Average loss for training batches in this epoch: 0.17408297955989838
Average loss for validation batches in this epoch: 0.0690009742975235

Beginning epoch 3...
Average loss for training batches in this epoch: 0.1491249054670334
Average loss for validation batches in this epoch: 0.0682399794459343

Beginning epoch 4...
Average loss for training batches in this epoch: 0.12904539704322815
Average loss for validation batches in this epoch: 0.05282985046505928

Beginning epoch 5...
Average loss for training batches in this epoch: 0.12277815490961075
Average loss for validation batches in this epoch: 0.050439637154340744

Beginning epoch 6...
Average lo

In [10]:
model.train(False)
preds = []
for features, labels in test_loader:
    with torch.no_grad():
        batch_preds = model(features)
        preds.extend(batch_preds.tolist())

In [11]:
preds_tensor = torch.tensor(preds)
category_preds = torch.argmax(preds_tensor, dim=1)

In [12]:
def check_accuracy(preds, actual):
    if len(actual) != len(preds):
        return -1
    return sum([int(actual[i] == preds[i]) for i in range(0, len(actual))]) / len(actual)

In [13]:
print(f"Accuracy: {check_accuracy(category_preds, test_data.targets)}")

Accuracy: 0.9911


0.983

0.9842

0.9846

0.9865

0.9888

0.9902

99.11% accuracy is awesome! I think I'll stop here for now.