In [88]:
import numpy as np
import torch
from torch.nn import Sequential, Conv2d, MaxPool2d, Dropout2d, CrossEntropyLoss, ReLU, Linear, Flatten, Module
import torch.optim as optim

from torch.utils.data import DataLoader

import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [89]:
#if torch.cuda.is_available():
    #device = 'cuda'
#else:
    #device = 'cpu'

In [90]:
class ASL(Module):
    def __init__(self, input_size):
        super().__init__()
    
    def forward(self, x):
        return x

In [91]:
batch_size = 100

# transform images into normalized tensors
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))
])

train_dataset = datasets.MNIST(
    "./",
    download=True,
    train=True,
    transform=transform,
)

test_dataset = datasets.MNIST(
    "./",
    download=True,
    train=False,
    transform=transform,
)

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=1,
    pin_memory=True,
)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=1,
    pin_memory=True,
)

In [92]:
p_drop = 0.05

NN = Sequential(Conv2d(1, 32, (5, 5)),
                MaxPool2d(2,2),
                ASL(32),
                Dropout2d(p_drop),
                Conv2d(32, 64, (5, 5)),
                MaxPool2d(2,2),
                Dropout2d(p_drop),
                Conv2d(64, 128, (2, 2)),
                MaxPool2d(2,2),
                Dropout2d(p_drop),
                ReLU(),
                Flatten(),
                Linear(128, 10)
                )

NN.modules


<bound method Module.modules of Sequential(
  (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): ASL()
  (3): Dropout2d(p=0.05, inplace=False)
  (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Dropout2d(p=0.05, inplace=False)
  (7): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (9): Dropout2d(p=0.05, inplace=False)
  (10): ReLU()
  (11): Flatten(start_dim=1, end_dim=-1)
  (12): Linear(in_features=128, out_features=10, bias=True)
)>

In [93]:
criterion = CrossEntropyLoss()
optimizer = optim.SGD(NN.parameters(), lr=0.001, momentum=0.9)

In [94]:
def test_loss(NN):
    correct = 0
    total = 0
    total2 = 0
    loss = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in test_dataloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = NN(images)
            loss += criterion(outputs, labels).item()
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            total2 += 1
            correct += (predicted == labels).sum().item()
    
    return loss/total2, 100 * correct / total

In [95]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = NN(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 0:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] train_loss: {running_loss / 100:.3f}')
            running_loss = 0.0
            print(test_loss(NN))


[1,     1] train_loss: 0.023
(2.3088919878005982, 10.25)
[1,   101] train_loss: 2.232
(2.115778603553772, 53.38)
[1,   201] train_loss: 1.888
(1.5047953057289123, 68.36)
[1,   301] train_loss: 1.105
(0.7493386021256447, 81.19)
[1,   401] train_loss: 0.634
(0.5087035059928894, 85.93)
[1,   501] train_loss: 0.483
(0.4115734979510307, 88.32)
[2,     1] train_loss: 0.004
(0.3572144535556436, 89.48)
[2,   101] train_loss: 0.340
(0.32170737579464914, 90.45)
[2,   201] train_loss: 0.321
(0.2905134744942188, 91.15)
[2,   301] train_loss: 0.306
(0.2700541178137064, 91.89)
[2,   401] train_loss: 0.275
(0.25317953728139403, 92.3)
[2,   501] train_loss: 0.257
(0.24296134492382407, 92.38)
