In [33]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from matplotlib.pyplot import *
from torch.optim import Adam
from tensorboardX import SummaryWriter

In [35]:
#Load train set and test set and normalize the images in range [-1,1]
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(( 0.1307,), ( 0.3081,))])

#50000 images training
trainset = torchvision.datasets.MNIST(root='./mnist', train=True,
                                        download=True, transform=transform)
#We load 4 samples per batch reduce the traininset to 12500
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

#print(len(trainset))
#10000 images test
testset = torchvision.datasets.MNIST(root='./mnist', train=False,
                                       download=True, transform=transform)
#We load 4 samples per batchreduce the traininset to 2500
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)


classes = list(range(10))

In [36]:
#CNN definition
class Net(nn.Module):
    def __init__(self): #here you define de frame 
        super(Net, self).__init__()
        #input images: 28x28
        self.conv1 = nn.Conv2d(1, 6, 3)
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size) stride=1, padding=0, dilation=1
        self.pool = nn.MaxPool2d(2, 2)
        #nn.MaxPool2d(kernel_size, stride=None)
        self.conv2 = nn.Conv2d(6, 16, 3)
        #self.pool2 = nn.MaxPool2d(2)
        
        #FC layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        #applies linear transformation to the incoming data y=x*A'+b
        #torch.nn.Linear(in_features, out_features, bias=True)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x): #here you define the connections between the different layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        # keep same infos but change the shape in a 2d tensor
        x = F.relu(self.fc1(x))
        #apply activation fct with RELU
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()

In [37]:
#Define Loss function and optimizer
# Loss Function: cross entropy
# Optimizer: SGD
criterion = nn.CrossEntropyLoss() #loss function
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #SGD method to fit weights and biases to groundtruth
#optimizer = Adam(net.parameters())

In [38]:
#Training network over 2 epochs
epochs=16

for epoch in range(epochs):  # loop over the dataset multiple times
    writer = SummaryWriter(logdir='Net_MNIST/training_epoch%d'%(epoch))
    running_loss = 0.0
    total=0
    total_final=0
    correct_final=0
    correct=0
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #print('inputs',inputs.size(),'labels',labels.size(),'outpu',outputs.size())
        #print('outputs:',outputs.type(),'labels:',labels.type())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()#for backpropagation we use SGD here implemented cross entropy loss
        #here we update weights in net
        
        # print statistics
        _, predicted = torch.max(outputs.data, 1) #torch.max(input, dim) return maximum value of all element from input tensor in the given dim
        total += labels.size(0) #count the number of labels with right shape
        correct += (predicted == labels).sum().item()
        running_loss += loss.item()
        total_final += labels.size(0) #count the number of labels with right shape
        correct_final += (predicted == labels).sum().item()
        if i % 300 == 299:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            writer.add_scalar('loss_train', running_loss / 300, i)
            writer.add_scalar('accuracy_train', 100*correct/total, i)
            running_loss = 0.0
        #writer.add_scalar('loss', loss, step)


print('Finished Training')
writer.close()

[1,   300] loss: 2.139
[1,   600] loss: 0.598
[1,   900] loss: 0.347
[2,   300] loss: 0.263
[2,   600] loss: 0.225
[2,   900] loss: 0.191
[3,   300] loss: 0.166
[3,   600] loss: 0.151
[3,   900] loss: 0.135
[4,   300] loss: 0.125
[4,   600] loss: 0.120
[4,   900] loss: 0.114
[5,   300] loss: 0.098
[5,   600] loss: 0.100
[5,   900] loss: 0.100
[6,   300] loss: 0.091
[6,   600] loss: 0.085
[6,   900] loss: 0.087
[7,   300] loss: 0.076
[7,   600] loss: 0.079
[7,   900] loss: 0.075
[8,   300] loss: 0.067
[8,   600] loss: 0.068
[8,   900] loss: 0.070
[9,   300] loss: 0.061
[9,   600] loss: 0.067
[9,   900] loss: 0.064
[10,   300] loss: 0.057
[10,   600] loss: 0.060
[10,   900] loss: 0.057
[11,   300] loss: 0.051
[11,   600] loss: 0.053
[11,   900] loss: 0.055
[12,   300] loss: 0.049
[12,   600] loss: 0.047
[12,   900] loss: 0.048
[13,   300] loss: 0.043
[13,   600] loss: 0.046
[13,   900] loss: 0.051
[14,   300] loss: 0.044
[14,   600] loss: 0.042
[14,   900] loss: 0.044
[15,   300] loss: 0

In [39]:
#training saving
PATH = './MNIST_net_16epochs.pth'
torch.save(net.state_dict(), PATH)

In [40]:
#Test
#dataiter = iter(testloader)
#images, labels = dataiter.next()

PATH = './MNIST_net_16epochs.pth'
#Load previously net from choosen training
net = Net()
net.load_state_dict(torch.load(PATH))
writer = SummaryWriter(logdir='Net_MNIST/test')
correct = 0
total = 0
total_final =0
correct_final =0
test_loss = 0
with torch.no_grad(): #desactivate autograd engine ( used to perform validation )
    
    for i, data in enumerate(testloader, 0):
        images, labels = data
        
        optimizer.zero_grad()
        outputs = net(images) #apply our updated CNN to images of test set
        
        _, predicted = torch.max(outputs.data, 1) #torch.max(input, dim) return maximum value of all element from input tensor in the given dim
        total += labels.size(0) #count the number of labels with right shape
        correct += (predicted == labels).sum().item() #count the number of right labels 
        total_final += labels.size(0) #count the number of labels with right shape
        correct_final += (predicted == labels).sum().item() #count the number of right labels 
        loss = criterion(outputs, labels)
        #loss.backward()
        optimizer.step()
        test_loss += loss.item()
        if i % 300 == 299:    # print every 500 mini-batches
            print('[%5d] loss: %.3f, accuracy: %.3f' %( i + 1, (test_loss / 300), (100*correct/total) ))
        
            writer.add_scalar('accuracy_test', 100*correct/total, i)
            writer.add_scalar('loss_test', test_loss / 300, i)
            test_loss = 0
            correct = 0
            total = 0
        
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * float(correct_final) / float(total_final)))

Accuracy of the network on the 10000 test images: 98 %
