<a href="https://colab.research.google.com/github/Oteranga/VAE-valid-input/blob/main/MNIST/MNIST-models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [79]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import os


In [126]:
# Define relevant variables for the ML task
num_classes = 10
learning_rate = 0.001
num_epochs = 10
batch_size = 64

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### MNIST Dataset

In [133]:
already_downloaded = os.path.isdir('/content/data/MNIST/raw')
download = already_downloaded == False

#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))
                                                  ]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))
                                                  ]),
                                          download = True)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

## Main

### Training method

In [84]:
def trainModel(model, train_loader, num_epochs, learning_rate, cost):
    optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate)
    total_step = len(train_loader)

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):  
            images = images.to(device)
            labels = labels.to(device)

            #Forward pass
            outputs = model(images)
            loss = cost(outputs, labels)
                
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
                    
            if (i+1) % (total_step//2) == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                            .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

### Testing Method

In [5]:
def evaluateModel(model, test_loader):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


## Model 1: LeNet1

In [100]:
class LeNet1(nn.Module):
    def __init__(self, num_classes):
        super(LeNet1, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 4, kernel_size = 5, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.block2 = nn.Sequential(
            nn.Conv2d(4, 12, kernel_size = 5, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.dense = nn.Linear(768, num_classes)

    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        
        out = out.reshape(out.size(0), -1) #Flatten
        out = self.dense(out)
        return out

### Info

In [131]:
model1 = LeNet1(num_classes).to(device)
x = torch.randn(64,1,32,32).to(device)
output = model1(x)

#print(model1)
summary(model1, (1,32,32))
print("output.shape : ",output.shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 32, 32]             104
              ReLU-2            [-1, 4, 32, 32]               0
         MaxPool2d-3            [-1, 4, 16, 16]               0
            Conv2d-4           [-1, 12, 16, 16]           1,212
              ReLU-5           [-1, 12, 16, 16]               0
         MaxPool2d-6             [-1, 12, 8, 8]               0
            Linear-7                   [-1, 10]           7,690
Total params: 9,006
Trainable params: 9,006
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.12
Params size (MB): 0.03
Estimated Total Size (MB): 0.16
----------------------------------------------------------------
output.shape :  torch.Size([64, 10])


### Training

In [134]:
#Training
cost = nn.CrossEntropyLoss()

trainModel(model1, train_loader, num_epochs, learning_rate, cost)

Epoch [1/10], Step [469/938], Loss: 2.2571
Epoch [1/10], Step [938/938], Loss: 2.2687
Epoch [2/10], Step [469/938], Loss: 2.1904
Epoch [2/10], Step [938/938], Loss: 2.0965
Epoch [3/10], Step [469/938], Loss: 2.0621
Epoch [3/10], Step [938/938], Loss: 1.9139
Epoch [4/10], Step [469/938], Loss: 1.8862
Epoch [4/10], Step [938/938], Loss: 1.8336
Epoch [5/10], Step [469/938], Loss: 1.5671
Epoch [5/10], Step [938/938], Loss: 1.4023
Epoch [6/10], Step [469/938], Loss: 1.3950
Epoch [6/10], Step [938/938], Loss: 1.4551
Epoch [7/10], Step [469/938], Loss: 1.0663
Epoch [7/10], Step [938/938], Loss: 0.8699
Epoch [8/10], Step [469/938], Loss: 1.0166
Epoch [8/10], Step [938/938], Loss: 0.7602
Epoch [9/10], Step [469/938], Loss: 0.8093
Epoch [9/10], Step [938/938], Loss: 0.6372
Epoch [10/10], Step [469/938], Loss: 0.6974
Epoch [10/10], Step [938/938], Loss: 0.5074


### Testing

In [136]:
evaluateModel(model1, test_loader)

Accuracy of the network on the 10000 test images: 82.79 %


### Save

In [21]:
model1_scripted = torch.jit.script(model1) # Export to TorchScript
model1_scripted.save('/content/Models/model1.pt') # Save

## Modelo 2: LeNet4

In [135]:
class LeNet4(nn.Module):
    def __init__(self, num_classes):
        super(LeNet4, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size = 5, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.block2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size = 5, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.dense = nn.Sequential(
            nn.Linear(1024, 84),
            nn.ReLU(),
            nn.Linear(84, num_classes)
        )

    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        
        out = out.reshape(out.size(0), -1) #Flatten
        out = self.dense(out)
        return out

### Info

In [137]:
model2 = LeNet4(num_classes).to(device)
x2 = torch.randn(64,1,32,32).to(device)
output2 = model2(x)

#print(model2)
summary(model2, (1,32,32))
print("output.shape : ",output2.shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 32, 32]             156
              ReLU-2            [-1, 6, 32, 32]               0
         MaxPool2d-3            [-1, 6, 16, 16]               0
            Conv2d-4           [-1, 16, 16, 16]           2,416
              ReLU-5           [-1, 16, 16, 16]               0
         MaxPool2d-6             [-1, 16, 8, 8]               0
            Linear-7                   [-1, 84]          86,100
              ReLU-8                   [-1, 84]               0
            Linear-9                   [-1, 10]             850
Total params: 89,522
Trainable params: 89,522
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.18
Params size (MB): 0.34
Estimated Total Size (MB): 0.52
---------------------------------------------

### Training

In [138]:
#Training
cost2 = nn.CrossEntropyLoss()
trainModel(model2, train_loader, num_epochs, learning_rate, cost2)

Epoch [1/10], Step [469/938], Loss: 2.2892
Epoch [1/10], Step [938/938], Loss: 2.2316
Epoch [2/10], Step [469/938], Loss: 2.2384
Epoch [2/10], Step [938/938], Loss: 2.1648
Epoch [3/10], Step [469/938], Loss: 2.0878
Epoch [3/10], Step [938/938], Loss: 1.9468
Epoch [4/10], Step [469/938], Loss: 1.7474
Epoch [4/10], Step [938/938], Loss: 1.6890
Epoch [5/10], Step [469/938], Loss: 1.3032
Epoch [5/10], Step [938/938], Loss: 1.2832
Epoch [6/10], Step [469/938], Loss: 1.0309
Epoch [6/10], Step [938/938], Loss: 0.8622
Epoch [7/10], Step [469/938], Loss: 0.8975
Epoch [7/10], Step [938/938], Loss: 0.8561
Epoch [8/10], Step [469/938], Loss: 0.7380
Epoch [8/10], Step [938/938], Loss: 0.4932
Epoch [9/10], Step [469/938], Loss: 0.4870
Epoch [9/10], Step [938/938], Loss: 0.3934
Epoch [10/10], Step [469/938], Loss: 0.4804
Epoch [10/10], Step [938/938], Loss: 0.4292


### Testing

In [139]:
evaluateModel(model2, test_loader)

Accuracy of the network on the 10000 test images: 89.03 %


### Save

In [140]:
model2_scripted = torch.jit.script(model2) # Export to TorchScript
model2_scripted.save('/content/Models/model2.pt') # Save

## Model 3: LeNet5

In [141]:
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size = 5, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.block2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size = 5, padding = 'same'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.dense = nn.Sequential(
            nn.Linear(1024, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, num_classes)
        )

    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)

        out = out.reshape(out.size(0), -1) #Flatten
        out = self.dense(out)
        return out

### Info

In [152]:
model3 = LeNet5(num_classes).to(device)
output3 = model3(x)

#print(model3)
summary(model3, (1,32,32))
print("output.shape : ",output3.shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 32, 32]             156
              ReLU-2            [-1, 6, 32, 32]               0
         MaxPool2d-3            [-1, 6, 16, 16]               0
            Conv2d-4           [-1, 16, 16, 16]           2,416
              ReLU-5           [-1, 16, 16, 16]               0
         MaxPool2d-6             [-1, 16, 8, 8]               0
            Linear-7                  [-1, 120]         123,000
              ReLU-8                  [-1, 120]               0
            Linear-9                   [-1, 84]          10,164
             ReLU-10                   [-1, 84]               0
           Linear-11                   [-1, 10]             850
Total params: 136,586
Trainable params: 136,586
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/

### Teaining

In [143]:
#Training
cost3 = nn.CrossEntropyLoss()
trainModel(model3, train_loader, num_epochs, learning_rate, cost3)

Epoch [1/10], Step [469/938], Loss: 2.2988
Epoch [1/10], Step [938/938], Loss: 2.2736
Epoch [2/10], Step [469/938], Loss: 2.2870
Epoch [2/10], Step [938/938], Loss: 2.2750
Epoch [3/10], Step [469/938], Loss: 2.2670
Epoch [3/10], Step [938/938], Loss: 2.2576
Epoch [4/10], Step [469/938], Loss: 2.2230
Epoch [4/10], Step [938/938], Loss: 2.1866
Epoch [5/10], Step [469/938], Loss: 2.1223
Epoch [5/10], Step [938/938], Loss: 2.0400
Epoch [6/10], Step [469/938], Loss: 1.9428
Epoch [6/10], Step [938/938], Loss: 1.6458
Epoch [7/10], Step [469/938], Loss: 1.5152
Epoch [7/10], Step [938/938], Loss: 1.3499
Epoch [8/10], Step [469/938], Loss: 1.1509
Epoch [8/10], Step [938/938], Loss: 0.9618
Epoch [9/10], Step [469/938], Loss: 1.0151
Epoch [9/10], Step [938/938], Loss: 0.9745
Epoch [10/10], Step [469/938], Loss: 0.7184
Epoch [10/10], Step [938/938], Loss: 0.7064


### Testing

In [144]:
evaluateModel(model3, test_loader)

Accuracy of the network on the 10000 test images: 84.2 %


### Save

In [32]:
model3_scripted = torch.jit.script(model3) # Export to TorchScript
model3_scripted.save('/content/Models/model3.pt') # Save

## Custom Model

In [145]:
class CustomNet(nn.Module):
    def __init__(self, num_classes):
        super(CustomNet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 32,  kernel_size = 3, padding = 'valid'),
            nn.ReLU(),
            nn.Conv2d(32, 32,  kernel_size = 3, padding = 'valid'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64,  kernel_size = 3, padding = 'valid'),
            nn.ReLU(),
            nn.Conv2d(64, 64,  kernel_size = 3, padding = 'valid'),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.dense = nn.Sequential(
            nn.Linear(1600, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, num_classes)
        )

    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        
        out = out.reshape(out.size(0), -1) #Flatten
        out = self.dense(out)
        return out

### Info

In [155]:
model4 = CustomNet(num_classes).to(device)
x = torch.randn(64,1,32,32).to(device)
output4 = model4(x)

summary(model4, (1,32,32))
#print(model4)
print("output.shape : ", output4.shape)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 30, 30]             320
              ReLU-2           [-1, 32, 30, 30]               0
            Conv2d-3           [-1, 32, 28, 28]           9,248
              ReLU-4           [-1, 32, 28, 28]               0
         MaxPool2d-5           [-1, 32, 14, 14]               0
            Conv2d-6           [-1, 64, 12, 12]          18,496
              ReLU-7           [-1, 64, 12, 12]               0
            Conv2d-8           [-1, 64, 10, 10]          36,928
              ReLU-9           [-1, 64, 10, 10]               0
        MaxPool2d-10             [-1, 64, 5, 5]               0
           Linear-11                  [-1, 200]         320,200
             ReLU-12                  [-1, 200]               0
           Linear-13                  [-1, 200]          40,200
             ReLU-14                  [

In [None]:
model4.

### Training

In [149]:
#Training
cost = nn.CrossEntropyLoss()

trainModel(model4, train_loader, num_epochs, learning_rate, cost)

Epoch [1/9], Step [469/938], Loss: 0.2739
Epoch [1/9], Step [938/938], Loss: 0.5004
Epoch [2/9], Step [469/938], Loss: 0.1327
Epoch [2/9], Step [938/938], Loss: 0.2999
Epoch [3/9], Step [469/938], Loss: 0.2895
Epoch [3/9], Step [938/938], Loss: 0.2537
Epoch [4/9], Step [469/938], Loss: 0.2480
Epoch [4/9], Step [938/938], Loss: 0.1805
Epoch [5/9], Step [469/938], Loss: 0.3096
Epoch [5/9], Step [938/938], Loss: 0.5341
Epoch [6/9], Step [469/938], Loss: 0.3130
Epoch [6/9], Step [938/938], Loss: 0.6519
Epoch [7/9], Step [469/938], Loss: 0.4142
Epoch [7/9], Step [938/938], Loss: 0.2463
Epoch [8/9], Step [469/938], Loss: 0.3347
Epoch [8/9], Step [938/938], Loss: 0.2070
Epoch [9/9], Step [469/938], Loss: 0.0769
Epoch [9/9], Step [938/938], Loss: 0.0839


### Testing

In [154]:
evaluateModel(model4, test_loader)

Accuracy of the network on the 10000 test images: 94.21 %


### Save

In [151]:
model4_scripted = torch.jit.script(model4) # Export to TorchScript
model4_scripted.save('/content/Models/model4.pt') # Save