# Install packages and Import Dependencies

In [1]:
try:
    import torch
    from torch import nn 
    from torchvision import datasets, transforms
    # from torchview import draw_graph

except ImportError:
    !python.exe -m pip install --upgrade pip
    %pip install torch torchvision graphviz torchview
    import torch
    from torch import nn 
    from torchvision import datasets, transforms
    # from torchview import draw_graph


In [2]:
import time

# Define transformation of the dataset

In [3]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Load the Dataset

In [4]:
train_set = datasets.MNIST('data/PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
test_set = datasets.MNIST('data/PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)

In [5]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True)

# Model Construction

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Classifier with Fully Connected Layers but no Convolutional layers

In [7]:
class ImageClassifier_NC(nn.Module):
    def __init__(self):
        super(ImageClassifier_NC, self).__init__()
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.fc_layers(x)
        return x
    
classifier_NC = ImageClassifier_NC().to(device)
# classifier_NC_graph = draw_graph(classifier_NC, input_size=(128, 28, 28), graph_name='classifier_NC')
# classifier_NC_graph.visual_graph
print(classifier_NC)

ImageClassifier_NC(
  (fc_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=128, bias=True)
    (2): Dropout(p=0, inplace=25)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)


## Classifier with Fully Connected Layers and 1 Convolutation layer

In [8]:
class ImageClassifier_1C(nn.Module):
    def __init__(self):
        super(ImageClassifier_1C, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=5),
            nn.ReLU()
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 24 * 24, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x
    
classifier_1C = ImageClassifier_1C().to(device)
print(classifier_1C)

ImageClassifier_1C(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
  )
  (fc_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=36864, out_features=128, bias=True)
    (2): Dropout(p=0, inplace=25)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)


## Classifier with Fully Connected Layers and 2 Convolutation layer

In [9]:
class ImageClassifier_2C(nn.Module):
    def __init__(self):
        super(ImageClassifier_2C, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=5),
            nn.ReLU(),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 20 * 20, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

classifier_2C = ImageClassifier_2C().to(device)
print(classifier_2C)

ImageClassifier_2C(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
  (fc_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=25600, out_features=128, bias=True)
    (2): Dropout(p=0, inplace=25)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)


## CLassifier with Fully Connected Layers and 3 Convolutation layer

In [10]:
class ImageClassifier_3C(nn.Module):
    def __init__(self):
        super(ImageClassifier_3C, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=5),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=5),
            nn.ReLU()
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 16 * 16, 128),
            nn.Dropout(0,25),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

classifier_3C = ImageClassifier_3C().to(device)
print(classifier_3C)

ImageClassifier_3C(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
    (5): ReLU()
  )
  (fc_layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=16384, out_features=128, bias=True)
    (2): Dropout(p=0, inplace=25)
    (3): Linear(in_features=128, out_features=10, bias=True)
  )
)


# Training and Testing Models

In [11]:
def train_test (model, loss_function, optimizer, train_loader, test_loader, epoch=10):
    print(f"Training \"{model.__class__.__name__}\": \n")
    total_time = time.time()
    for epoch in range(10):  # Train for 10 epochs
        epoch_time = time.time()
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()                   # Reset gradients
            outputs = model(images)                 # Forward pass
            loss = loss_function(outputs, labels)   # Compute loss
            loss.backward()                         # Backward pass
            optimizer.step()                        # Update weights

        print(f"Epoch:{epoch} loss is {loss.item()} in time {time.time() - epoch_time} s")

    print(f"Total Time: {time.time() - total_time} s")

    torch.save(model.state_dict(), f'{model.__class__.__name__}.pt')

    loss = 0
    correct = 0

    for (images, labels) in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        
        loss += loss_function(outputs, labels)


        pred = outputs.data.max(1, keepdim=True)[1]
        correct += pred.eq(labels.data.view_as(pred)).cpu().sum()
        
    loss /= len(test_loader.dataset)
        
    print('\nAverage Val Loss: {:.4f}, Val Accuracy: {}/{} ({:.3f}%)\n\n\n'.format(
        loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [12]:
loss_func = nn.CrossEntropyLoss()

In [13]:
optimizer = torch.optim.Adam(classifier_NC.parameters(), lr=0.001)
train_test(classifier_NC, loss_func, optimizer, train_loader, test_loader)


Training "ImageClassifier_NC": 



Epoch:0 loss is 0.6718680262565613 in time 9.651752948760986 s
Epoch:1 loss is 0.19072626531124115 in time 9.713176250457764 s
Epoch:2 loss is 0.31419822573661804 in time 9.654514789581299 s
Epoch:3 loss is 0.1851852387189865 in time 9.542652606964111 s
Epoch:4 loss is 0.5819029808044434 in time 9.592023611068726 s
Epoch:5 loss is 0.19526368379592896 in time 9.49001932144165 s
Epoch:6 loss is 0.38364362716674805 in time 9.709181547164917 s
Epoch:7 loss is 0.35233238339424133 in time 9.46996283531189 s
Epoch:8 loss is 0.313971608877182 in time 9.433669567108154 s
Epoch:9 loss is 0.42868658900260925 in time 9.481484413146973 s
Total Time: 95.73944139480591 s

Average Val Loss: 0.0024, Val Accuracy: 9114/10000 (91.140%)





In [14]:
optimizer = torch.optim.Adam(classifier_1C.parameters(), lr=0.001)
train_test(classifier_1C, loss_func, optimizer, train_loader, test_loader)

Training "ImageClassifier_1C": 

Epoch:0 loss is 0.0852247104048729 in time 9.702810049057007 s
Epoch:1 loss is 0.07627455145120621 in time 9.352596998214722 s
Epoch:2 loss is 0.0751468762755394 in time 9.330811262130737 s
Epoch:3 loss is 0.02063712850213051 in time 9.367757797241211 s
Epoch:4 loss is 0.03853776678442955 in time 9.386722564697266 s
Epoch:5 loss is 0.11105111241340637 in time 9.410910367965698 s
Epoch:6 loss is 0.02509750984609127 in time 9.420533418655396 s
Epoch:7 loss is 0.016959430649876595 in time 9.539626598358154 s
Epoch:8 loss is 0.0015376415103673935 in time 9.724543571472168 s
Epoch:9 loss is 0.015790818259119987 in time 9.372647523880005 s
Total Time: 94.61096024513245 s

Average Val Loss: 0.0007, Val Accuracy: 9800/10000 (98.000%)





In [15]:
optimizer = torch.optim.Adam(classifier_2C.parameters(), lr=0.001)
train_test(classifier_2C, loss_func, optimizer, train_loader, test_loader)

Training "ImageClassifier_2C": 

Epoch:0 loss is 0.01912803389132023 in time 9.763724565505981 s
Epoch:1 loss is 0.06677054613828659 in time 9.737218379974365 s
Epoch:2 loss is 0.03951314464211464 in time 9.683794736862183 s
Epoch:3 loss is 0.03499991074204445 in time 9.782262802124023 s
Epoch:4 loss is 0.001068312325514853 in time 9.704952001571655 s
Epoch:5 loss is 0.0159052275121212 in time 9.685008764266968 s
Epoch:6 loss is 0.0020952646154910326 in time 9.676980257034302 s
Epoch:7 loss is 0.0019339787540957332 in time 9.732737064361572 s
Epoch:8 loss is 0.0011260228930041194 in time 9.656583547592163 s
Epoch:9 loss is 0.0001701136789051816 in time 9.66437029838562 s
Total Time: 97.08763241767883 s

Average Val Loss: 0.0003, Val Accuracy: 9904/10000 (99.040%)





In [16]:
optimizer = torch.optim.Adam(classifier_3C.parameters(), lr=0.001)
train_test(classifier_3C, loss_func, optimizer, train_loader, test_loader)

Training "ImageClassifier_3C": 

Epoch:0 loss is 0.07090054452419281 in time 9.743892669677734 s
Epoch:1 loss is 0.06147100403904915 in time 9.585543632507324 s
Epoch:2 loss is 0.08262497186660767 in time 10.15094780921936 s
Epoch:3 loss is 0.0025818538852036 in time 9.969598293304443 s
Epoch:4 loss is 0.006347530987113714 in time 9.783902645111084 s
Epoch:5 loss is 0.00021773595653939992 in time 9.744743347167969 s
Epoch:6 loss is 0.000452357780886814 in time 9.555383443832397 s
Epoch:7 loss is 0.0166932325810194 in time 9.543778419494629 s
Epoch:8 loss is 0.0005363160162232816 in time 9.559114217758179 s
Epoch:9 loss is 0.0008319418993778527 in time 9.563751220703125 s
Total Time: 97.20065569877625 s

Average Val Loss: 0.0003, Val Accuracy: 9890/10000 (98.900%)



