In [14]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from matplotlib.pyplot import *
from torch.optim import Adam
from tensorboardX import SummaryWriter

In [15]:
#Load train set and test set and normalize the images in range [-1,1]
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(( 0.1307,), ( 0.3081,))])

#50000 images training
trainset = torchvision.datasets.MNIST(root='./mnist', train=True,
                                        download=True, transform=transform)
#We load 4 samples per batch reduce the traininset to 12500
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

#print(len(trainset))
#10000 images test
testset = torchvision.datasets.MNIST(root='./mnist', train=False,
                                       download=True, transform=transform)
#We load 4 samples per batchreduce the traininset to 2500
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)


classes = list(range(10))

In [16]:
#CNN definition
class Net(nn.Module):
    def __init__(self): #here you define de frame 
        super(Net, self).__init__()
        #input images: 28x28
        self.conv1 = nn.Conv2d(1, 6, 3)
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size) stride=1, padding=0, dilation=1
        self.pool = nn.MaxPool2d(2, 2)
        #nn.MaxPool2d(kernel_size, stride=None)
        self.conv2 = nn.Conv2d(6, 16, 3)
        #self.pool2 = nn.MaxPool2d(2)
        
        #FC layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        #applies linear transformation to the incoming data y=x*A'+b
        #torch.nn.Linear(in_features, out_features, bias=True)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        # Spatial transformer localization-network
        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            
            nn.Linear(10 * 3 * 3, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
            
        )

        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, xs.size(1)*xs.size(2)*xs.size(3))#what changed
        # calculate the transformation parameters theta
        theta = self.fc_loc(xs)
        # resize theta
        theta = theta.view(-1, 2, 3)
        # grid generator => transformation on parameters theta
        grid = F.affine_grid(theta, x.size())
        # grid sampling => applying the spatial transformations
        x = F.grid_sample(x, grid)

        return x
    
    def forward(self, x): #here you define the connections between the different layers
        # transform the input
        x = self.stn(x)
        
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        # keep same infos but change the shape in a 2d tensor
        x = F.relu(self.fc1(x))
        #apply activation fct with RELU
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    #def num_flat_features(self, x):
     #   size = x.size()[1:]  # all dimensions except the batch dimension
     #   num_features = 1
     #   for s in size:
     #       num_features *= s
     #   return num_features


net = Net()

In [17]:
#Define Loss function and optimizer
# Loss Function: cross entropy
# Optimizer: SGD
criterion = nn.CrossEntropyLoss() #loss function
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #SGD method to fit weights and biases to groundtruth
#optimizer = Adam(net.parameters())

In [18]:
#Training network over 2 epochs
epochs=16

for epoch in range(epochs):  # loop over the dataset multiple times
    writer = SummaryWriter(logdir='TransfNet_MNIST_batch64/training_epoch%d'%(epoch))
    running_loss = 0.0
    total=0
    total_final=0
    correct_final=0
    correct=0
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #print('inputs',inputs.size(),'labels',labels.size(),'outpu',outputs.size())
        #print('outputs:',outputs.type(),'labels:',labels.type())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()#for backpropagation we use SGD here implemented cross entropy loss
        #here we update weights in net
        
        # print statistics
        _, predicted = torch.max(outputs.data, 1) #torch.max(input, dim) return maximum value of all element from input tensor in the given dim
        total += labels.size(0) #count the number of labels with right shape
        correct += (predicted == labels).sum().item()
        running_loss += loss.item()
        total_final += labels.size(0) #count the number of labels with right shape
        correct_final += (predicted == labels).sum().item()
        if i % 100 == 99:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            writer.add_scalar('loss_train', running_loss / 100, i)
            writer.add_scalar('accuracy_train', 100*correct/total, i)
            running_loss = 0.0
        #writer.add_scalar('loss', loss, step)


print('Finished Training')
writer.close()

[1,   100] loss: 2.290
[1,   200] loss: 2.218
[1,   300] loss: 1.815
[1,   400] loss: 0.805
[1,   500] loss: 0.483
[1,   600] loss: 0.374
[1,   700] loss: 0.305
[1,   800] loss: 0.269
[1,   900] loss: 0.242
[2,   100] loss: 0.202
[2,   200] loss: 0.183
[2,   300] loss: 0.165
[2,   400] loss: 0.170
[2,   500] loss: 0.156
[2,   600] loss: 0.153
[2,   700] loss: 0.130
[2,   800] loss: 0.150
[2,   900] loss: 0.135
[3,   100] loss: 0.109
[3,   200] loss: 0.113
[3,   300] loss: 0.117
[3,   400] loss: 0.113
[3,   500] loss: 0.108
[3,   600] loss: 0.099
[3,   700] loss: 0.103
[3,   800] loss: 0.091
[3,   900] loss: 0.100
[4,   100] loss: 0.091
[4,   200] loss: 0.088
[4,   300] loss: 0.088
[4,   400] loss: 0.083
[4,   500] loss: 0.078
[4,   600] loss: 0.084
[4,   700] loss: 0.081
[4,   800] loss: 0.081
[4,   900] loss: 0.081
[5,   100] loss: 0.064
[5,   200] loss: 0.068
[5,   300] loss: 0.069
[5,   400] loss: 0.071
[5,   500] loss: 0.075
[5,   600] loss: 0.072
[5,   700] loss: 0.073
[5,   800] 

In [19]:
#training saving
PATH = './MNIST_transf.net_16epochs.pth'
torch.save(net.state_dict(), PATH)

In [20]:
#Test
#dataiter = iter(testloader)
#images, labels = dataiter.next()

PATH = './MNIST_transf.net_16epochs.pth'
#Load previously net from choosen training
net = Net()
net.load_state_dict(torch.load(PATH))
writer = SummaryWriter(logdir='TransfNet_MNIST_batch64/test')
correct = 0
total = 0
total_final =0
correct_final =0
test_loss = 0
with torch.no_grad(): #desactivate autograd engine ( used to perform validation )
    
    for i, data in enumerate(testloader, 0):
        images, labels = data
        
        optimizer.zero_grad()
        outputs = net(images) #apply our updated CNN to images of test set
        
        _, predicted = torch.max(outputs.data, 1) #torch.max(input, dim) return maximum value of all element from input tensor in the given dim
        total += labels.size(0) #count the number of labels with right shape
        correct += (predicted == labels).sum().item() #count the number of right labels 
        total_final += labels.size(0) #count the number of labels with right shape
        correct_final += (predicted == labels).sum().item() #count the number of right labels 
        loss = criterion(outputs, labels)
        #loss.backward()
        optimizer.step()
        test_loss += loss.item()
        if i % 500 == 499:    # print every 500 mini-batches
            print('[%5d] loss: %.3f, accuracy: %.3f' %( i + 1, (test_loss / 500), (100*correct/total) ))
        
            writer.add_scalar('accuracy_test', 100*correct/total, i)
            writer.add_scalar('loss_test', test_loss / 500, i)
            test_loss = 0
            correct = 0
            total = 0
        
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * float(correct_final) / float(total_final)))

Accuracy of the network on the 10000 test images: 98 %
