In [1]:
import torch
import torchvision
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from time import time

# set seed for the random number generations (to be able to replicate the results)
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x19aec075cd0>

In [2]:
# loading and transforming the data

# transform
transform = transforms.Compose([transforms.ToTensor(), #transforms image into colour matrices with values ranging from 0 -1 (becomes a Torch Tensor)
                              transforms.Normalize((0.5,), (0.5,)), # normalizes tensor
                              ])

# loading the data
trainset = datasets.EMNIST('\Datasets\MNIST\Trainset', download=True, split='letters', train=True, transform=transform)
valset = datasets.EMNIST('\Datasets\MNIST\Valset', download=True, split='letters', train=False, transform=transform)

# creating the batches 
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) 
test_loader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True) 

In [3]:
# defining the neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,5,3) # for convolution: imput size has to match number of channels for the image
        self.conv2 = nn.Conv2d(5,10,3)
        self.lin1 = nn.Linear(250, 27) # 5760 is calculated by: print(x.view(64, -1).shape)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(F.dropout(self.conv2(x), training=self.training), 2)) # defined in forward part, because hear during training can be specified (not done with test)
        x = x.view(64, -1) # transforms to 2D. Returns a new tensor with the same data as the self tensor but of a different shape.Keep 64 batches, calculate / infer other dimension
        x = F.relu(self.lin1(x)) # requires 2D input
        return F.log_softmax(x)
        
model = Net()
print(model)

Net(
  (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
  (lin1): Linear(in_features=250, out_features=27, bias=True)
)


In [4]:
# defining the optimization method
learning_rate = 0.003
momentum = 0.9
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                      momentum=momentum)

In [5]:
# setting the image to the correct size
images, labels = next(iter(train_loader))
#images = images.view(images.shape[0], -1)
print(images.shape)
# images = images.transpose(1,3)

# criterion = nn.NLLLoss()
# logps = model(images) #log probabilities
# loss = criterion(logps, labels) #calculate the NLL loss

torch.Size([64, 1, 28, 28])


In [6]:
# # core training process (NN iterates over the training set and updates the weights))
# time0 = time()
# epochs = 15
# for e in range(epochs):
#     running_loss = 0
#     for images, labels in train_loader:
#         # Training pass
#         optimizer.zero_grad() # sets all optimized gradients to zero
        
#         output = model(images)
#         #loss = criterion(output, labels)
#         loss = F.nll_loss(output, labels)
        
#         #This is where the model learns by backpropagating
#         loss.backward()
        
#         #And optimizes its weights here
#         optimizer.step()
        
#         running_loss += loss.item()
#     else:
#         print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_loader)))
# print("\nTraining Time (in minutes) =",(time()-time0)/60)

  return F.log_softmax(x)


Epoch 0 - Training loss: 1.3590104275789017
Epoch 1 - Training loss: 0.5644566162503682
Epoch 2 - Training loss: 0.49902214093086045
Epoch 3 - Training loss: 0.47444882661868365
Epoch 4 - Training loss: 0.45830427191196343
Epoch 5 - Training loss: 0.4462468508268014
Epoch 6 - Training loss: 0.43445130755503975
Epoch 7 - Training loss: 0.42441312162539896
Epoch 8 - Training loss: 0.4169119238012876
Epoch 9 - Training loss: 0.4095271796026291
Epoch 10 - Training loss: 0.40561118542001795
Epoch 11 - Training loss: 0.3991884300724054
Epoch 12 - Training loss: 0.39260446388752035
Epoch 13 - Training loss: 0.39244213270071227
Epoch 14 - Training loss: 0.3882146206345314

Training Time (in minutes) = 31.731059296925864


In [29]:
# correct_count, all_count = 0, 0
# for images,labels in test_loader:
#   for i in range(len(labels)):
#     images = images[i].view(1,1,28,28)
#     with torch.no_grad():
#         logps = model(images)

    
#     ps = torch.exp(logps)
#     probab = list(ps.numpy()[0])
#     pred_label = probab.index(max(probab))
#     true_label = labels.numpy()[i]
#     if(true_label == pred_label):
#       correct_count += 1
#     all_count += 1

# print("Number Of Images Tested =", all_count)
# print("\nModel Accuracy =", (correct_count/all_count))

In [6]:
# training the neural network
# core training process (NN iterates over the training set and updates the weights))
# time0 = time()
epochs = 15
def train(epochs):
    #for e in range(epochs):
#     time0 = time()
        running_loss = 0
        correct = 0
        for images, labels in train_loader:
            # Training pass
            optimizer.zero_grad() # sets all optimized gradients to zero

            output = model(images)
            
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(labels.data.view_as(pred)).sum()
            
            #loss 
            loss = F.nll_loss(output, labels)

            #This is where the model learns by backpropagating
            loss.backward()

            #And optimizes its weights here
            optimizer.step()

            running_loss += loss.item()
        else: 
            print("Epoch {} - Training loss: {:.4f} Train Accuracy: ({:.2f}%)".format(
                i+1, running_loss/len(train_loader),
                100. * correct / len(train_loader.dataset)))
#     print("\nTraining Time (in minutes) =",(time()-time0)/60)

In [7]:
test_losses = []
def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for images, labels in test_loader:
      output = model(images)
      test_loss += F.nll_loss(output, labels, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(labels.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)
  print("Test los: {:.4f}, Accuracy: {}/{} ({:.2f}%)".format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))
# test()

In [8]:
time0 = time()
test()
for i in range(epochs):
  train(i)
  test()
print("\nTraining Time (in minutes) =",(time()-time0)/60)

  return F.log_softmax(x)


Test los: 3.3024, Accuracy: 131/20800 (0.63%)
Epoch 1 - Training loss: 1.6517 Train Accuracy: (56.64%)
Test los: 0.9994, Accuracy: 15091/20800 (72.55%)
Epoch 2 - Training loss: 0.9203 Train Accuracy: (74.11%)
Test los: 0.8541, Accuracy: 15818/20800 (76.05%)
Epoch 3 - Training loss: 0.8293 Train Accuracy: (76.31%)
Test los: 0.8059, Accuracy: 15989/20800 (76.87%)
Epoch 4 - Training loss: 0.7933 Train Accuracy: (77.20%)
Test los: 0.7784, Accuracy: 16133/20800 (77.56%)
Epoch 5 - Training loss: 0.7707 Train Accuracy: (77.74%)
Test los: 0.7720, Accuracy: 16111/20800 (77.46%)
Epoch 6 - Training loss: 0.7567 Train Accuracy: (78.12%)
Test los: 0.7531, Accuracy: 16254/20800 (78.14%)
Epoch 7 - Training loss: 0.7469 Train Accuracy: (78.29%)
Test los: 0.7571, Accuracy: 16255/20800 (78.15%)
Epoch 8 - Training loss: 0.7387 Train Accuracy: (78.48%)
Test los: 0.7582, Accuracy: 16267/20800 (78.21%)
Epoch 9 - Training loss: 0.7319 Train Accuracy: (78.62%)
Test los: 0.7519, Accuracy: 16264/20800 (78.19%)


In [12]:
# Print shape of images at different points in the model
print(images.shape)
model = nn.Conv2d(1,5,3)
model1b = nn.Dropout2d()
output_pooled = F.max_pool2d(model1b(model(images)),2)
print(output_pooled.shape)

model2 = nn.Conv2d(5,10,3)

output2_pooled = F.max_pool2d(model1b(model2(output_pooled)),2)
print(output2_pooled.shape)

print(output2_pooled.view(64, -1).shape)

torch.Size([64, 1, 28, 28])
torch.Size([64, 5, 13, 13])
torch.Size([64, 10, 5, 5])
torch.Size([64, 250])


In [9]:
# save the model
torch.save(model, './04_model.pt') 
#  save the trained model