In [1]:
import torch
import torchvision
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from time import time

# set seed for the random number generations (to be able to replicate the results)
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x28f0b8d1cd0>

In [2]:
# loading and transforming the data

# transform
transform = transforms.Compose([transforms.ToTensor(), #transforms image into colour matrices with values ranging from 0 -1 (becomes a Torch Tensor)
                              transforms.Normalize((0.5,), (0.5,)), # normalizes tensor
                              ])

# loading the data
trainset = datasets.EMNIST('\Datasets\MNIST\Trainset', download=True, split='letters', train=True, transform=transform)
valset = datasets.EMNIST('\Datasets\MNIST\Valset', download=True, split='letters', train=False, transform=transform)

# creating the batches 
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) 
test_loader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True) 

In [3]:
# defining the neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,10,3) # for convolution: imput size has to match number of channels for the image
        self.conv2 = nn.Conv2d(10,20,3)
        self.conv3 = nn.Conv2d(20,30,3) # for convolution: imput size has to match number of channels for the image
        self.conv4 = nn.Conv2d(30,40,3)
        self.conv5 = nn.Conv2d(40,50,3) 
        self.lin1 = nn.Linear(200, 27) # 5760 is calculated by: print(x.view(64, -1).shape)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(F.dropout(self.conv2(x), training=self.training),2)) # defined in forward part, because hear during training can be specified (not done with test)
        x = F.relu(self.conv3(x))
        x = F.relu(F.max_pool2d(F.dropout(self.conv4(x), training=self.training),2))
        x = F.relu(self.conv5(x))
        x = x.view(64, -1) # transforms to 2D. Returns a new tensor with the same data as the self tensor but of a different shape.Keep 64 batches, calculate / infer other dimension
        x = self.lin1(x) # requires 2D input
        return F.log_softmax(x)
        
model = Net()
print(model)

Net(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(20, 30, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(30, 40, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(40, 50, kernel_size=(3, 3), stride=(1, 1))
  (lin1): Linear(in_features=200, out_features=27, bias=True)
)


### Notes
- network has relatively small input (28x28), therefore the network is limited:
    - for multiple conv layers, pooling cannot be added to each layer (otherwise dimensions get too small)

In [4]:
# # Print shape of images at different points in the model
# print(images.shape)

# model0 = nn.Dropout2d()

# model = nn.Conv2d(1,10,3)
# output_pooled = model(images)
# print(output_pooled.shape)

# model2 = nn.Conv2d(10,20,3)
# output2_pooled = F.max_pool2d(model2(output_pooled),2)
# print(output2_pooled.shape)

# model3 = nn.Conv2d(20,30,3)
# output3_pooled = model3(output2_pooled)
# print(output3_pooled.shape)

# model4 = nn.Conv2d(30,40,3)
# output4_pooled =F.max_pool2d(model4(output3_pooled),2)
# print(output4_pooled.shape)

# model5 = nn.Conv2d(40,50,3)
# output5_pooled = model5(output4_pooled)
# print(output5_pooled.shape)

# print(output5_pooled.view(64, -1).shape)

In [5]:
# defining the optimization method
learning_rate = 0.3
momentum = 0.9
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                      momentum=momentum)

In [6]:
# setting the image to the correct size
images, labels = next(iter(train_loader))
#images = images.view(images.shape[0], -1)
print(images.shape)
# images = images.transpose(1,3)

# criterion = nn.NLLLoss()
# logps = model(images) #log probabilities
# loss = criterion(logps, labels) #calculate the NLL loss

torch.Size([64, 1, 28, 28])


In [7]:
# training the neural network
# core training process (NN iterates over the training set and updates the weights))
# time0 = time()
epochs = 5
def train(epochs):
    #for e in range(epochs):
#     time0 = time()
        running_loss = 0
        correct = 0
        for images, labels in train_loader:
            # Training pass
            optimizer.zero_grad() # sets all optimized gradients to zero

            output = model(images)
            
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(labels.data.view_as(pred)).sum()
            
            #loss 
            loss = F.nll_loss(output, labels)

            #This is where the model learns by backpropagating
            loss.backward()

            #And optimizes its weights here
            optimizer.step()

            running_loss += loss.item()
        else: 
            print("Epoch {} - Training loss: {:.4f} Train Accuracy: ({:.2f}%)".format(
                i+1, running_loss/len(train_loader),
                100. * correct / len(train_loader.dataset)))
#     print("\nTraining Time (in minutes) =",(time()-time0)/60)

In [8]:
test_losses = []
def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for images, labels in test_loader:
      output = model(images)
      test_loss += F.nll_loss(output, labels, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(labels.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print("Test los: {:.4f}, Accuracy: {}/{} ({:.2f}%)".format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))
# test()

In [9]:
time0 = time()
test()
for i in range(epochs):
  train(i)
  test()
print("\nTraining Time (in minutes) =",(time()-time0)/60)

  return F.log_softmax(x)


Test los: 3.2944, Accuracy: 800/20800 (3.85%)
Epoch 1 - Training loss: 3.2707 Train Accuracy: (3.83%)
Test los: 3.2676, Accuracy: 800/20800 (3.85%)
Epoch 2 - Training loss: 3.2692 Train Accuracy: (3.87%)
Test los: 3.2686, Accuracy: 800/20800 (3.85%)
Epoch 3 - Training loss: 3.2697 Train Accuracy: (3.98%)
Test los: 3.2676, Accuracy: 800/20800 (3.85%)
Epoch 4 - Training loss: 3.2693 Train Accuracy: (3.88%)
Test los: 3.2794, Accuracy: 800/20800 (3.85%)
Epoch 5 - Training loss: 3.2700 Train Accuracy: (3.82%)
Test los: 3.2643, Accuracy: 800/20800 (3.85%)

Training Time (in minutes) = 102.68746958971023


In [10]:
# save the model
torch.save(model, './05e_model.pt') 
#  save the trained model