In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
dtype = torch.float32
device = torch.device('cuda:0')

In [4]:
transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4), (0.2))
])

# MNIST has 70,000 images
# We use 60,000 images. At test time, we will use the other 10,000 images.
NUM_TRAIN = 57000

mnist_train = dset.MNIST('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(mnist_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

mnist_val = dset.MNIST('./datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(mnist_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 60000)))

mnist_test = dset.MNIST('./datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(mnist_test, batch_size=64)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets\MNIST\raw\train-images-idx3-ubyte.gz


100.1%

Extracting ./datasets\MNIST\raw\train-images-idx3-ubyte.gz to ./datasets\MNIST\raw


113.5%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets\MNIST\raw\train-labels-idx1-ubyte.gz
Extracting ./datasets\MNIST\raw\train-labels-idx1-ubyte.gz to ./datasets\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets\MNIST\raw\t10k-images-idx3-ubyte.gz


0.0%4%

Extracting ./datasets\MNIST\raw\t10k-images-idx3-ubyte.gz to ./datasets\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets\MNIST\raw\t10k-labels-idx1-ubyte.gz


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Extracting ./datasets\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./datasets\MNIST\raw
Processing...
Done!


In [5]:
model_conv = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, padding=1),
    nn.LeakyReLU(),
    nn.Conv2d(32,64,kernel_size=3, padding=1),
    nn.LeakyReLU(),
    nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)), # (N, 64, 14, 14)
    
    nn.BatchNorm2d(64),
    nn.LeakyReLU(),
    nn.Conv2d(64, 64, kernel_size=3, padding=1),
    nn.LeakyReLU(),
    nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)), # (N, 64, 7, 7)
    
    nn.Flatten(),
    nn.Linear(64*7*7, 64*4*4),
    nn.ReLU(),
    nn.Linear(64*4*4, 128),
    nn.ReLU(),
    nn.Linear(128,10),
    nn.Dropout(0.3)    
).to(device)

conv_optim = optim.Adam(model_conv.parameters(), weight_decay = 1e-2)

In [6]:
model_fc = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 512),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 10)
).to(device=device, dtype=dtype)

fc_optim = optim.Adam(model_fc.parameters(), weight_decay = 1e-3)

In [7]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print(f'Got {num_correct} / {num_samples} correct (%.2f)' % (100 * acc))

In [8]:
def train_model(model, optimizer, loadert, loaderv, epochs=1, print_every=100):
    for e in range(epochs):
        print(f"----------  Epoch {e}  ----------\n")
        for t, (x,y) in enumerate(loadert):
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            
            scores = model(x)
            loss = F.cross_entropy(scores, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if t % print_every == 0:
                print(f"Iteration {t}: Loss = {loss.item()}")
                check_accuracy(loaderv, model)
                print()
            

In [9]:
train_model(model_fc, fc_optim, loader_train, loader_val, epochs=5, print_every=500)

----------  Epoch 0  ----------

Iteration 0: Loss = 2.334533929824829
Checking accuracy on validation set
Got 589 / 3000 correct (19.63)

Iteration 500: Loss = 0.17414768040180206
Checking accuracy on validation set
Got 2892 / 3000 correct (96.40)

----------  Epoch 1  ----------

Iteration 0: Loss = 0.238621324300766
Checking accuracy on validation set
Got 2909 / 3000 correct (96.97)

Iteration 500: Loss = 0.10675705224275589
Checking accuracy on validation set
Got 2911 / 3000 correct (97.03)

----------  Epoch 2  ----------

Iteration 0: Loss = 0.13637706637382507
Checking accuracy on validation set
Got 2920 / 3000 correct (97.33)

Iteration 500: Loss = 0.12428975105285645
Checking accuracy on validation set
Got 2906 / 3000 correct (96.87)

----------  Epoch 3  ----------

Iteration 0: Loss = 0.0475778728723526
Checking accuracy on validation set
Got 2941 / 3000 correct (98.03)

Iteration 500: Loss = 0.09662456065416336
Checking accuracy on validation set
Got 2920 / 3000 correct (97

In [10]:
check_accuracy(loader_test, model_fc)

Checking accuracy on test set
Got 9534 / 10000 correct (95.34)


In [11]:
train_model(model_conv, conv_optim, loader_train, loader_val, epochs=5, print_every=500)

----------  Epoch 0  ----------

Iteration 0: Loss = 2.317458152770996
Checking accuracy on validation set
Got 309 / 3000 correct (10.30)

Iteration 500: Loss = 0.06568978726863861
Checking accuracy on validation set
Got 2914 / 3000 correct (97.13)

----------  Epoch 1  ----------

Iteration 0: Loss = 0.08501437306404114
Checking accuracy on validation set
Got 2936 / 3000 correct (97.87)

Iteration 500: Loss = 0.04274098202586174
Checking accuracy on validation set
Got 2921 / 3000 correct (97.37)

----------  Epoch 2  ----------

Iteration 0: Loss = 0.07485563308000565
Checking accuracy on validation set
Got 2937 / 3000 correct (97.90)

Iteration 500: Loss = 0.06528503447771072
Checking accuracy on validation set
Got 2948 / 3000 correct (98.27)

----------  Epoch 3  ----------

Iteration 0: Loss = 0.03686854615807533
Checking accuracy on validation set
Got 2944 / 3000 correct (98.13)

Iteration 500: Loss = 0.08982988446950912
Checking accuracy on validation set
Got 2948 / 3000 correct 

In [12]:
check_accuracy(loader_test, model_conv)

Checking accuracy on test set
Got 9826 / 10000 correct (98.26)


In [13]:
transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4), (0.2))
])

# Fashion MNIST has 70,000 images
# We use 60,000 images. At test time, we will use the remaining 10,000 images.
NUM_TRAIN = 57000

fash_train = dset.FashionMNIST('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(fash_train, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

fash_val = dset.FashionMNIST('./datasets', train=True, download=True,
                           transform=transform)
loader_val = DataLoader(fash_val, batch_size=64, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 60000)))

fash_test = dset.FashionMNIST('./datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(fash_test, batch_size=64)

model_fc.reset_parameters()
model_conv.reset_parameters()

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./datasets\FashionMNIST\raw\train-images-idx3-ubyte.gz


100.0%

Extracting ./datasets\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./datasets\FashionMNIST\raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./datasets\FashionMNIST\raw\train-labels-idx1-ubyte.gz


111.0%

Extracting ./datasets\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./datasets\FashionMNIST\raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./datasets\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100.0%

Extracting ./datasets\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./datasets\FashionMNIST\raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./datasets\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


159.1%

Extracting ./datasets\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./datasets\FashionMNIST\raw
Processing...
Done!


In [14]:
fc_optim = optim.SGD(model_fc.parameters(), lr=1e-3, momentum=0.9, nesterov=True)
train_model(model_fc, fc_optim, loader_train, loader_val, epochs=10, print_every=500)

----------  Epoch 0  ----------

Iteration 0: Loss = 6.890028476715088
Checking accuracy on validation set
Got 166 / 3000 correct (5.53)

Iteration 500: Loss = 0.6413515210151672
Checking accuracy on validation set
Got 2353 / 3000 correct (78.43)

----------  Epoch 1  ----------

Iteration 0: Loss = 0.6066540479660034
Checking accuracy on validation set
Got 2436 / 3000 correct (81.20)

Iteration 500: Loss = 0.40940040349960327
Checking accuracy on validation set
Got 2486 / 3000 correct (82.87)

----------  Epoch 2  ----------

Iteration 0: Loss = 0.43081191182136536
Checking accuracy on validation set
Got 2530 / 3000 correct (84.33)

Iteration 500: Loss = 0.38758134841918945
Checking accuracy on validation set
Got 2545 / 3000 correct (84.83)

----------  Epoch 3  ----------

Iteration 0: Loss = 0.373351126909256
Checking accuracy on validation set
Got 2572 / 3000 correct (85.73)

Iteration 500: Loss = 0.4051193296909332
Checking accuracy on validation set
Got 2563 / 3000 correct (85.43

In [15]:
check_accuracy(loader_test, model_fc)

Checking accuracy on test set
Got 8631 / 10000 correct (86.31)


In [16]:
conv_optim = optim.SGD(model_conv.parameters(), lr=1e-3, momentum=0.9, nesterov=True)
train_model(model_conv, conv_optim, loader_train, loader_val, epochs=10, print_every=500)

----------  Epoch 0  ----------

Iteration 0: Loss = 3.7345099449157715
Checking accuracy on validation set
Got 209 / 3000 correct (6.97)

Iteration 500: Loss = 0.6412071585655212
Checking accuracy on validation set
Got 2371 / 3000 correct (79.03)

----------  Epoch 1  ----------

Iteration 0: Loss = 0.45955002307891846
Checking accuracy on validation set
Got 2489 / 3000 correct (82.97)

Iteration 500: Loss = 0.3712809979915619
Checking accuracy on validation set
Got 2535 / 3000 correct (84.50)

----------  Epoch 2  ----------

Iteration 0: Loss = 0.31323274970054626
Checking accuracy on validation set
Got 2522 / 3000 correct (84.07)

Iteration 500: Loss = 0.3540203273296356
Checking accuracy on validation set
Got 2586 / 3000 correct (86.20)

----------  Epoch 3  ----------

Iteration 0: Loss = 0.4157051742076874
Checking accuracy on validation set
Got 2592 / 3000 correct (86.40)

Iteration 500: Loss = 0.30157551169395447
Checking accuracy on validation set
Got 2607 / 3000 correct (86.

In [17]:
check_accuracy(loader_test, model_conv)

Checking accuracy on test set
Got 8852 / 10000 correct (88.52)
