In [1]:
### The goal of this file: 
### (1) download MNIST dataset if not exsit
### (2) pretain the model (n_epochs=0, if no pretrain)

import torch
import torchvision

In [2]:
### parameters ###

n_epochs = 0
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x28d5f835050>

In [3]:
### download dataset ###

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('./files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./files/MNIST\raw\train-images-idx3-ubyte.gz


9920512it [00:00, 15974886.53it/s]                                                                                     


Extracting ./files/MNIST\raw\train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./files/MNIST\raw\train-labels-idx1-ubyte.gz


32768it [00:00, 46452.31it/s]                                                                                          


Extracting ./files/MNIST\raw\train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./files/MNIST\raw\t10k-images-idx3-ubyte.gz


1654784it [00:06, 247355.71it/s]                                                                                       


Extracting ./files/MNIST\raw\t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./files/MNIST\raw\t10k-labels-idx1-ubyte.gz


8192it [00:00, 16850.61it/s]                                                                                           


Extracting ./files/MNIST\raw\t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [4]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [5]:
import matplotlib.pyplot as plt
import numpy as np

In [6]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [7]:
### Define a two-layer Relu neural network ###

class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return x

network = nn.Sequential(
            Flatten(),
            nn.Linear(784, 200),
            nn.ReLU(),
            nn.Linear(200, 10),
            nn.ReLU(),
            nn.LogSoftmax()
        )

In [8]:
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                      momentum=momentum)

In [9]:
mu = 1;
lamb = 1e-3;
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [10]:
def train(epoch):
  network.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    output = network(data)
    err_loss = mu*F.nll_loss(output, target)
    regularization_loss = 0
    for param in network.parameters():
        regularization_loss += lamb*torch.sum(torch.abs(param))
    loss = err_loss + regularization_loss;
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), err_loss.item()))
      train_losses.append(err_loss.item())
      train_counter.append(
        (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [11]:
def test():
  network.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [12]:
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
torch.save(network.state_dict(), './results/model.pth')
torch.save(optimizer.state_dict(), './results/optimizer.pth')

  input = module(input)



Test set: Avg. loss: 2.2936, Accuracy: 1268/10000 (12%)

