**Importing Necessary Libraries**

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


**Defining the Convolutional Neural Network Architecture**

In [2]:
# Define a CNN class which is a subclass of nn.Module
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define the convolutional layers and a dropout layer
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)  # 1 input channel, 10 output channels
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)  # 10 input channels, 20 output channels
        self.conv2_drop = nn.Dropout2d()  # Dropout layer to reduce overfitting
        # Define fully connected layers
        self.fc1 = nn.Linear(320, 50)  # 320 input features, 50 output features
        self.fc2 = nn.Linear(50, 10)  # 50 input features, 10 output features (for 10 classes)

    # Define the forward pass
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))  # Apply ReLU activation and max pooling to the first conv layer's output
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))  # Apply conv layer, dropout, ReLU activation and max pooling
        x = x.view(-1, 320)  # Flatten the tensor for the fully connected layer
        x = F.relu(self.fc1(x))  # Apply ReLU activation to the first fully connected layer
        x = F.dropout(x, training=self.training)  # Apply dropout
        x = self.fc2(x)  # Pass through the second fully connected layer
        return F.log_softmax(x, dim=1)  # Apply log softmax to get log-probabilities

# Create an instance of the Net class
net = Net()


**Loading the Data and Defining Hyperparameters**

In [3]:
# Set batch size for the dataloaders and define learning rate and number of epochs for training
batch_size = 64
learning_rate = 0.01
epochs = 5

# Transformations to be applied to the images
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize the images
])

# Load the MNIST training set
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                      download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

# Load the MNIST test set
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                     download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 11549421.44it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 344177.54it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3164023.72it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3099158.74it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






**Training the Network**

In [4]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Loss function for multi-class classification
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)  # Stochastic Gradient Descent as the optimizer

# Training loop
for epoch in range(epochs):  # Loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data  # Get the input data and the labels

        optimizer.zero_grad()  # Zero the parameter gradients to avoid accumulation

        outputs = net(inputs)  # Forward pass: compute the predicted outputs
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backward pass: compute the gradient of the loss w.r.t the model's weights
        optimizer.step()  # Perform an optimization step to update the weights

        running_loss += loss.item()  # Accumulate the loss
        if i % 100 == 99:    # Print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')


[1,   100] loss: 1.671
[1,   200] loss: 0.730
[1,   300] loss: 0.538
[1,   400] loss: 0.444
[1,   500] loss: 0.414
[1,   600] loss: 0.367
[1,   700] loss: 0.344
[1,   800] loss: 0.318
[1,   900] loss: 0.319
[2,   100] loss: 0.278
[2,   200] loss: 0.275
[2,   300] loss: 0.282
[2,   400] loss: 0.273
[2,   500] loss: 0.265
[2,   600] loss: 0.253
[2,   700] loss: 0.254
[2,   800] loss: 0.234
[2,   900] loss: 0.222
[3,   100] loss: 0.203
[3,   200] loss: 0.220
[3,   300] loss: 0.206
[3,   400] loss: 0.203
[3,   500] loss: 0.221
[3,   600] loss: 0.202
[3,   700] loss: 0.189
[3,   800] loss: 0.201
[3,   900] loss: 0.203
[4,   100] loss: 0.196
[4,   200] loss: 0.185
[4,   300] loss: 0.186
[4,   400] loss: 0.181
[4,   500] loss: 0.189
[4,   600] loss: 0.194
[4,   700] loss: 0.192
[4,   800] loss: 0.197
[4,   900] loss: 0.194
[5,   100] loss: 0.173
[5,   200] loss: 0.190
[5,   300] loss: 0.183
[5,   400] loss: 0.167
[5,   500] loss: 0.174
[5,   600] loss: 0.176
[5,   700] loss: 0.163
[5,   800] 