# Install packages and import dependencies

In [1]:
try:
    import torch
    from torch import nn 
    from torchvision import datasets, transforms 

except ImportError:
    %pip install torch torchvision torchviz
    import torch
    from torch import nn 
    from torchvision import datasets, transforms

In [2]:
import time

# define transformation of the dataset

In [3]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# load the dataset

In [4]:
train_set = datasets.MNIST('data/PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
test_set = datasets.MNIST('data/PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)

In [5]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True)

# Model construction

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


Fully Connected Layers but no Convolutional layers

In [7]:
class ImageClassifier_NC(nn.Module):
    def __init__(self):
        super(ImageClassifier_NC, self).__init__()
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.fc_layers(x)
        return x
    
classifier_NC = ImageClassifier_NC().to(device)

CLassifier with 1 convolutation layer

In [8]:
class ImageClassifier_1C(nn.Module):
    def __init__(self):
        super(ImageClassifier_1C, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3),
            nn.ReLU()
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 26 * 26, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x
    
classifier_1C = ImageClassifier_1C().to(device)

CLassifier with 2 convolutation layer

In [9]:
class ImageClassifier_2C(nn.Module):
    def __init__(self):
        super(ImageClassifier_2C, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 24 * 24, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

classifier_2C = ImageClassifier_2C().to(device)

CLassifier with 3 convolutation layer

In [10]:
class ImageClassifier_3C(nn.Module):
    def __init__(self):
        super(ImageClassifier_3C, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3),
            nn.ReLU()
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 22 * 22, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

classifier_3C = ImageClassifier_3C().to(device)

Training and testing Models

In [11]:
def train_test (model, loss_function, optimizer, train_loader, test_loader, epoch=10):
    print(f"Training \"{model.__class__.__name__}\": \n")
    total_time = time.time()
    for epoch in range(10):  # Train for 10 epochs
        epoch_time = time.time()
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()                   # Reset gradients
            outputs = model(images)                 # Forward pass
            loss = loss_function(outputs, labels)   # Compute loss
            loss.backward()                         # Backward pass
            optimizer.step()                        # Update weights

        print(f"Epoch:{epoch} loss is {loss.item()} in time {time.time() - epoch_time} s")

    print(f"Total Time: {time.time() - total_time} s")

    torch.save(model.state_dict(), f'{model.__class__.__name__}.pt')

    loss = 0
    correct = 0

    for (images, labels) in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        
        loss += loss_function(outputs, labels)


        pred = outputs.data.max(1, keepdim=True)[1]
        correct += pred.eq(labels.data.view_as(pred)).cpu().sum()
        
    loss /= len(test_loader.dataset)
        
    print('\nAverage Val Loss: {:.4f}, Val Accuracy: {}/{} ({:.3f}%)\n\n\n'.format(
        loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [12]:
loss_func = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(classifier_NC.parameters(), lr=0.001)
train_test(classifier_NC, loss_func, optimizer, train_loader, test_loader)

optimizer = torch.optim.Adam(classifier_1C.parameters(), lr=0.001)
train_test(classifier_1C, loss_func, optimizer, train_loader, test_loader)

optimizer = torch.optim.Adam(classifier_2C.parameters(), lr=0.001)
train_test(classifier_2C, loss_func, optimizer, train_loader, test_loader)

optimizer = torch.optim.Adam(classifier_3C.parameters(), lr=0.001)
train_test(classifier_3C, loss_func, optimizer, train_loader, test_loader)

Training "ImageClassifier_NC": 

Epoch:0 loss is 0.37323614954948425 in time 10.333244323730469 s
Epoch:1 loss is 0.3413008153438568 in time 9.686987400054932 s
Epoch:2 loss is 0.19373536109924316 in time 9.72487187385559 s
Epoch:3 loss is 0.44032177329063416 in time 9.53880786895752 s
Epoch:4 loss is 0.3355546295642853 in time 9.600624322891235 s
Epoch:5 loss is 0.46244385838508606 in time 10.086587190628052 s
Epoch:6 loss is 0.5230308771133423 in time 10.000236511230469 s
Epoch:7 loss is 0.3308674991130829 in time 9.869951725006104 s
Epoch:8 loss is 0.2868594229221344 in time 10.101918697357178 s
Epoch:9 loss is 0.33047857880592346 in time 9.964967012405396 s
Total Time: 98.90920519828796 s

Average Val Loss: 0.0025, Val Accuracy: 9094/10000 (90.940%)



Training "ImageClassifier_1C": 

Epoch:0 loss is 0.06258121877908707 in time 9.90872836112976 s
Epoch:1 loss is 0.06430584192276001 in time 10.147522926330566 s
Epoch:2 loss is 0.025894837453961372 in time 10.241491556167603 s
Epoch: