In [2]:
%matplotlib inline
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt

### Importing Dataset

In [4]:
dataset_test = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data', train=False, download=True, transform=torchvision.transforms.ToTensor()), 
  batch_size=100,
  shuffle=True
)
dataset_train = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('../data', train=True, download=True, transform=torchvision.transforms.ToTensor()),
  batch_size=100,
  shuffle=True
)

In [13]:
for batch_x, batch_y in dataset_train:
    print(batch_x.shape)
    break

torch.Size([100, 1, 28, 28])


### Implementing the `accuracy` function

We will evaluate the quality of a classifier by its accuracy on the test set.

In [None]:
def accuracy(predicted_logits, reference):
    """
    Compute the ratio of correctly predicted labels
    
    @param predicted_logits: float32 tensor of shape (batch size, num classes)
    @param reference: int64 tensor of shape (batch_size) with the class number
    """
    
    return (reference.numel() - torch.count_nonzero(predicted_logits.argmax(axis=1) - reference))/reference.numel()

In [None]:
def train(model, criterion, dataset_train, dataset_test, optimizer, num_epochs):
  """
  @param model: torch.nn.Module
  @param criterion: torch.nn.modules.loss._Loss
  @param dataset_train: torch.utils.data.DataLoader
  @param dataset_test: torch.utils.data.DataLoader
  @param optimizer: torch.optim.Optimizer
  @param num_epochs: int
  """
  print("Starting training")
  for epoch in range(num_epochs):
    # Train an epoch
    model.train()
    for batch_x, batch_y in dataset_train:
      batch_x, batch_y = batch_x.to(device), batch_y.to(device)

      # Evaluate the network (forward pass)
      optimizer.zero_grad()
      output = model(batch_x)
      
      # Compute the gradient
      loss = criterion(output,batch_y)
      loss.backward() # backward pass

      # Update the parameters of the model with a gradient step
      optimizer.step() #gradient descent
      # my_lr_scheduler.step()

    # Test the quality on the test set
    model.eval()
    accuracies_test = []
    for batch_x, batch_y in dataset_test:
      batch_x, batch_y = batch_x.to(device), batch_y.to(device)

      # Evaluate the network (forward pass)
      prediction = model(batch_x)
      accuracies_test.append(accuracy(prediction, batch_y))

    print("Epoch {} | Test accuracy: {:.5f}".format(epoch, sum(accuracies_test).item()/len(accuracies_test)))

In [None]:
num_epochs = 10
learning_rate = 1e-2
batch_size = 1000

# If a GPU is available (should be on Colab, we will use it)
if not torch.cuda.is_available():
  raise Exception("Things will go much quicker if you enable a GPU in Colab under 'Runtime / Change Runtime Type'")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train the logistic regression model with the Adam optimizer
criterion = torch.nn.CrossEntropyLoss() # this includes LogSoftmax which executes a logistic transformation


### A convolutional network

We use the tools you built before to train this simple Convolutional Network
* first convolutional layer: 5x5 convolutions and 32 feature maps,
* ReLu activation,
* Max-pooling : 2x2 (with strading of 2x2),
* second convolutional layer: 5x5 convolutions and 64 feature maps,
* ReLu activation,
* Max-pooling : 2x2 (with strading of 2x2),
* first fully-connected layer: 512 hidden units,
* dropout probability during training: 50 %,
* ReLu activation,
* TODO nb_sorties with log softmax activation.

In [None]:
class CNN_Model(torch.nn.Module):
  def __init__(self):
    """This architecture is a CNN"""
    super().__init__()
    
    final_channels = 64
    subsize = 16
    self.fc_in_size = int(subsize**2 / 2**2 / 2**2 * final_channels)

    self.conv1 = torch.nn.Conv2d(3, 32, 5, padding='same')
    self.conv2 = torch.nn.Conv2d(32, final_channels, 5, padding='same')
    
    self.fc1 = torch.nn.Linear(self.fc_in_size, 512)
    self.fc2 = torch.nn.Linear(512, 2)

  def forward(self, x):
    relu = torch.nn.functional.relu
    max_pool2d = torch.nn.functional.max_pool2d(kernel_size=2, stride=2)

    x = max_pool2d(relu(self.conv1(x)))
    x = max_pool2d(relu(self.conv2(x)))
    x = x.view(-1, self.fc_in_size)
    x = relu(self.fc1(x))
    x = torch.nn.functional.dropout(x, training=self.training)
    x = self.fc2(x)
    return torch.nn.functional.log_softmax(x, dim=1)

In [None]:
model_lenet = CNN_Model().to(device)
optimizer = torch.optim.Adam(model_lenet.parameters(), lr=learning_rate, weight_decay=5e-4)

# decayRate = 0.95
# my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decayRate)

train(model_lenet, criterion, dataset_train, dataset_test, optimizer, num_epochs)

# Expect roughly TODO X% accuracy on the test set.
# Training should take around TODO x minutes