In [2]:
import os
import time
import copy

import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
import matplotlib.pyplot as plt
from PIL import Image

In [3]:
WEIGHTS_PATH = 'weights.pt'

In [4]:
transform = transforms.Compose([
  transforms.ToTensor()
])

In [None]:
train_dataset = datasets.MNIST('mnist', transform=transform, train=True, download=True)
test_dataset = datasets.MNIST('mnist', transform=transform, train=False, download=True)

In [6]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True, num_workers=4)

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
    self.pool = nn.MaxPool2d((2, 2))
    self.fc1 = nn.Linear(32 * 14 * 14, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool(x)
    x = x.view(x.size(0), -1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x





In [9]:
def train(model, train_loader, criterion, optimizer, epochs=10):
  losses = []
  best_loss = 0

  for e in range(10):
    running_loss = 0

    for data in train_loader:

      images, labels = data[0].to(device), data[1].to(device)

      # Training pass
      optimizer.zero_grad()
      
      output = model(images)
      loss = criterion(output, labels)
      
      #This is where the model learns by backpropagating
      loss.backward()
      
      #And optimizes its weights here
      optimizer.step()
      
      running_loss += loss.item()

      if e == 0:
        best_loss = running_loss

    else:
      if running_loss < best_loss:
        best_loss = running_loss
        torch.save(model.state_dict(), WEIGHTS_PATH)
      print("Epoch {} - Training loss: {}".format(e, running_loss / len(train_loader)))
      losses.append(running_loss / len(train_loader))
  return losses

In [12]:
def validate(test_loader):
  model = Model()
  model = model.to(device)
  model.load_state_dict(torch.load(WEIGHTS_PATH))

  correct = 0
  with torch.no_grad():
    for i, data in enumerate(test_loader):
      images, labels = data[0].to(device), data[1].to(device)

      # images = images.view(images.shape[0], -1)

      output = model(images)
      _, preds = torch.max(output, 1)

      correct += (len(labels) - len(torch.nonzero(preds - labels)))

  print(correct / len(test_loader.dataset))

In [None]:
model = Model()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
losses = train(model, train_loader, criterion, optimizer)

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.title('Adam and learning rate 0.01. Accuracy ~97%')
plt.xlabel('Epochs')
plt.ylabel('Training Loss')

ax.plot([x for x in range(10)], losses)

In [None]:
validate(test_loader)