In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader


In [2]:
transform = transforms.Compose([
  transforms.Resize((224, 224)),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.5], std=[0.5])
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4561875.06it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 135125.51it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:06<00:00, 245377.91it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2821882.50it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [3]:
class MNISTResNet(nn.Module):
    def __init__(self):
        super(MNISTResNet, self).__init__()
        self.resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_ftrs, 10)

    def forward(self, x):
        x = self.resnet(x)
        return x

class MNISTAlexNet(nn.Module):
    def __init__(self):
        super(MNISTAlexNet, self).__init__()
        self.alexnet = models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1)
        self.alexnet.features[0] = nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2)
        num_ftrs = self.alexnet.classifier[6].in_features
        self.alexnet.classifier[6] = nn.Linear(num_ftrs, 10)

    def forward(self, x):
        x = self.alexnet(x)
        return x

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

resnet_model = MNISTResNet().to(device)
alexnet_model = MNISTAlexNet().to(device)

criterion = nn.CrossEntropyLoss()
resnet_optimizer = optim.Adam(resnet_model.parameters(), lr=0.001)
alexnet_optimizer = optim.Adam(alexnet_model.parameters(), lr=0.001)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 183MB/s]
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 171MB/s]


In [5]:
def train_model(model, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print(f"internel Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

def evaluate_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the model on the test images: {100 * correct / total} %')

train_model(resnet_model, resnet_optimizer)
evaluate_model(resnet_model)

train_model(alexnet_model, alexnet_optimizer)
evaluate_model(alexnet_model)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
internel Epoch [8/10], Loss: 0.0039
internel Epoch [8/10], Loss: 0.0039
internel Epoch [8/10], Loss: 0.0039
internel Epoch [8/10], Loss: 0.0039
internel Epoch [8/10], Loss: 0.0039
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0040
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041
internel Epoch [8/10], Loss: 0.0041

  return F.conv2d(input, weight, bias, self.stride,


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
internel Epoch [8/10], Loss: 0.7734
internel Epoch [8/10], Loss: 0.7746
internel Epoch [8/10], Loss: 0.7758
internel Epoch [8/10], Loss: 0.7770
internel Epoch [8/10], Loss: 0.7782
internel Epoch [8/10], Loss: 0.7795
internel Epoch [8/10], Loss: 0.7807
internel Epoch [8/10], Loss: 0.7819
internel Epoch [8/10], Loss: 0.7831
internel Epoch [8/10], Loss: 0.7844
internel Epoch [8/10], Loss: 0.7856
internel Epoch [8/10], Loss: 0.7868
internel Epoch [8/10], Loss: 0.7881
internel Epoch [8/10], Loss: 0.7893
internel Epoch [8/10], Loss: 0.7905
internel Epoch [8/10], Loss: 0.7917
internel Epoch [8/10], Loss: 0.7930
internel Epoch [8/10], Loss: 0.7942
internel Epoch [8/10], Loss: 0.7954
internel Epoch [8/10], Loss: 0.7966
internel Epoch [8/10], Loss: 0.7978
internel Epoch [8/10], Loss: 0.7991
internel Epoch [8/10], Loss: 0.8003
internel Epoch [8/10], Loss: 0.8015
internel Epoch [8/10], Loss: 0.8028
internel Epoch [8/10], Loss: 0.8040

**Part 2**

In [12]:
# Define the dataset and dataloaders
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [13]:
# Define Model A
class ModelA(nn.Module):
    def __init__(self):
        super(ModelA, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(8 * 14 * 14, 64)
        self.fc2 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = x.view(-1, 8 * 14 * 14)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model_a = ModelA().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_a = optim.Adam(model_a.parameters(), lr=0.001)

Using device: cuda:0


In [16]:
# Train Model A
def train_model(model, trainloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(trainloader)
        epoch_acc = 100 * correct / total
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")
    print('Finished Training')

train_model(model_a, trainloader, criterion, optimizer_a)

Epoch 1/10, Loss: 0.3241, Accuracy: 90.90%
Epoch 2/10, Loss: 0.1162, Accuracy: 96.56%
Epoch 3/10, Loss: 0.0828, Accuracy: 97.53%
Epoch 4/10, Loss: 0.0674, Accuracy: 97.92%
Epoch 5/10, Loss: 0.0542, Accuracy: 98.35%
Epoch 6/10, Loss: 0.0463, Accuracy: 98.62%
Epoch 7/10, Loss: 0.0383, Accuracy: 98.80%
Epoch 8/10, Loss: 0.0327, Accuracy: 99.02%
Epoch 9/10, Loss: 0.0278, Accuracy: 99.13%
Epoch 10/10, Loss: 0.0250, Accuracy: 99.21%
Finished Training


In [17]:
# Define Model B
class ModelB(nn.Module):
    def __init__(self):
        super(ModelB, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 14 * 14, 64)
        self.fc2 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 16 * 14 * 14)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [18]:
model_b = ModelB().to(device)

# Define optimizer for Model B
optimizer_b = optim.Adam(model_b.parameters(), lr=0.001)

# Train Model B
print("Training Model B")
train_model(model_b, trainloader, criterion, optimizer_b)

Training Model B
Epoch 1/10, Loss: 0.2501, Accuracy: 92.75%
Epoch 2/10, Loss: 0.0741, Accuracy: 97.77%
Epoch 3/10, Loss: 0.0527, Accuracy: 98.38%
Epoch 4/10, Loss: 0.0407, Accuracy: 98.76%
Epoch 5/10, Loss: 0.0318, Accuracy: 98.99%
Epoch 6/10, Loss: 0.0261, Accuracy: 99.13%
Epoch 7/10, Loss: 0.0206, Accuracy: 99.36%
Epoch 8/10, Loss: 0.0171, Accuracy: 99.45%
Epoch 9/10, Loss: 0.0146, Accuracy: 99.52%
Epoch 10/10, Loss: 0.0111, Accuracy: 99.64%
Finished Training


In [26]:
def evaluate_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')

print("Evaluating Model A")
evaluate_model(model_a, testloader)

print("Evaluating Model B")
evaluate_model(model_b, testloader)

Evaluating Model A
Accuracy: 98.01%
Evaluating Model B
Accuracy: 98.40%


4. Explore the effect of different activation functions


*   a. Train model A with all nonlinear activation functions set to ReLu
*   b. Train the model A with all nonlinear activation functions set to Sigmoid
*   c. Train the model A with all nonlinear activation functions set to tanh
*   d. Observe and discuss the differences between changing activation functions and trained model performance

In [22]:
import torch.nn.functional as F

In [29]:
# Model Definition
class ModelA(nn.Module):
    def __init__(self, activation_fn):
        super(ModelA, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(8 * 14 * 14, 64)
        self.fc2 = nn.Linear(64, 10)
        self.activation_fn = activation_fn

    def forward(self, x):
        x = self.activation_fn(self.conv1(x))
        x = self.pool(x)
        x = x.view(-1, 8 * 14 * 14)
        x = self.activation_fn(self.fc1(x))
        x = self.fc2(x)
        return x

# Training function
def train_model(model, trainloader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(trainloader)}')

# Evaluation function
def evaluate_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy

In [28]:
def train_and_evaluate(activation_fn_name):
    activation_fn_map = {
        'relu': F.relu,
        'sigmoid': torch.sigmoid,
        'tanh': torch.tanh
    }

    if activation_fn_name not in activation_fn_map:
        raise ValueError(f"Invalid activation function name: {activation_fn_name}")

    activation_fn = activation_fn_map[activation_fn_name]

    model = ModelA(activation_fn).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    print(f"Training Model A with {activation_fn_name}")
    train_model(model, trainloader, criterion, optimizer)

    print(f"Evaluating Model A with {activation_fn_name}")
    accuracy = evaluate_model(model, testloader)

    return accuracy

In [30]:
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    accuracies = {}

    for activation_fn_name in ['relu', 'sigmoid', 'tanh']:
        accuracies[activation_fn_name] = train_and_evaluate(activation_fn_name)

    for activation_fn_name, accuracy in accuracies.items():
        print(f'Accuracy with {activation_fn_name}: {accuracy:.2f}%')

if __name__ == "__main__":
    main()

Training Model A with relu
Epoch 1, Loss: 0.2622549746896444
Epoch 2, Loss: 0.09237140592665219
Epoch 3, Loss: 0.06379774169795818
Epoch 4, Loss: 0.05083773753616705
Epoch 5, Loss: 0.04050432891907123
Epoch 6, Loss: 0.03209748219024501
Epoch 7, Loss: 0.02623323303318944
Epoch 8, Loss: 0.02103983305241001
Epoch 9, Loss: 0.018867505782821613
Epoch 10, Loss: 0.015745794736647552
Evaluating Model A with relu
Accuracy: 98.38%
Training Model A with sigmoid
Epoch 1, Loss: 0.6959881792857703
Epoch 2, Loss: 0.24261725960430433
Epoch 3, Loss: 0.18063185149942762
Epoch 4, Loss: 0.14713584512734273
Epoch 5, Loss: 0.12255552371761311
Epoch 6, Loss: 0.10429592034034828
Epoch 7, Loss: 0.09021611512898764
Epoch 8, Loss: 0.07939458834622969
Epoch 9, Loss: 0.07006490364053182
Epoch 10, Loss: 0.0618295467762487
Evaluating Model A with sigmoid
Accuracy: 97.47%
Training Model A with tanh
Epoch 1, Loss: 0.30177286642589674
Epoch 2, Loss: 0.1058166246396551
Epoch 3, Loss: 0.06893190059032459
Epoch 4, Loss: 0

5.
Effect of the optimizer learning rate

a. Trained the Model B on Adam optimizer with a learning rate of 0.1

b. Trained the Model B on Adam optimizer with a learning rate of 0.01

c. Trained the Model B on Adam optimizer with a learning rate of 0.001

d. Observe and discuss the effect of learning rate on model performance

In [31]:
# Model B definition
class ModelB(nn.Module):
    def __init__(self, activation_fn):
        super(ModelB, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 14 * 14, 64)
        self.fc2 = nn.Linear(64, 10)
        self.activation_fn = activation_fn

    def forward(self, x):
        x = self.activation_fn(self.conv1(x))
        x = self.activation_fn(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 16 * 14 * 14)
        x = self.activation_fn(self.fc1(x))
        x = self.fc2(x)
        return x

# Training function
def train_model(model, trainloader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(trainloader)}')

# Evaluation function
def evaluate_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy


In [32]:
def train_and_evaluate_model_b(learning_rate):
    activation_fn = F.relu  # We'll use ReLU as the activation function for this experiment

    model = ModelB(activation_fn).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    print(f"Training Model B with learning rate {learning_rate}")
    train_model(model, trainloader, criterion, optimizer)

    print(f"Evaluating Model B with learning rate {learning_rate}")
    accuracy = evaluate_model(model, testloader)

    return accuracy

In [33]:
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    learning_rates = [0.1, 0.01, 0.001]
    accuracies = {}

    for lr in learning_rates:
        accuracies[lr] = train_and_evaluate_model_b(lr)

    for lr, accuracy in accuracies.items():
        print(f'Accuracy with learning rate {lr}: {accuracy:.2f}%')

if __name__ == "__main__":
    main()

Training Model B with learning rate 0.1
Epoch 1, Loss: 3.1682685369621715
Epoch 2, Loss: 2.3112466470010755
Epoch 3, Loss: 2.3099096639832455
Epoch 4, Loss: 2.3100080756998773
Epoch 5, Loss: 2.3091255126477304
Epoch 6, Loss: 2.309664592559912
Epoch 7, Loss: 2.310169122112331
Epoch 8, Loss: 2.310427940730601
Epoch 9, Loss: 2.309945506327696
Epoch 10, Loss: 2.309613952250369
Evaluating Model B with learning rate 0.1
Accuracy: 10.28%
Training Model B with learning rate 0.01
Epoch 1, Loss: 0.14332583316774078
Epoch 2, Loss: 0.058663648607739025
Epoch 3, Loss: 0.044932348150624656
Epoch 4, Loss: 0.04377097321344808
Epoch 5, Loss: 0.037517758690681996
Epoch 6, Loss: 0.04166975125700623
Epoch 7, Loss: 0.035174594889222734
Epoch 8, Loss: 0.03148056521214544
Epoch 9, Loss: 0.036009359429970925
Epoch 10, Loss: 0.03161515637489379
Evaluating Model B with learning rate 0.01
Accuracy: 98.07%
Training Model B with learning rate 0.001
Epoch 1, Loss: 0.2649895834363203
Epoch 2, Loss: 0.076625481695194