In [2]:
import torch
print(torch.__version__)
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import wandb
import os
import cv2

2.1.0+cpu


In [3]:
source_directory = './double_mnist_seed_123_image_size_64_64/train/'

x_train = []
y_train = []

for root, dirs, files in os.walk(source_directory):
    for file in files:
        subdirectory_name = os.path.basename(root)
        image_path = os.path.join(root, file)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        x_train.append([image])
        y_train.append(subdirectory_name)

In [4]:
source_directory = './double_mnist_seed_123_image_size_64_64/val/'

x_val = []
y_val = []

for root, dirs, files in os.walk(source_directory):
    for file in files:
        subdirectory_name = os.path.basename(root)
        image_path = os.path.join(root, file)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        x_val.append([image])
        y_val.append(subdirectory_name)

In [5]:
source_directory = './double_mnist_seed_123_image_size_64_64/test/'

x_test = []
y_test = []

for root, dirs, files in os.walk(source_directory):
    for file in files:
        subdirectory_name = os.path.basename(root)
        image_path = os.path.join(root, file)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        x_test.append([image])
        y_test.append(subdirectory_name)

In [6]:
from torch.utils.data import TensorDataset

x_train = np.array(x_train)
x_val = np.array(x_val)
x_test = np.array(x_test)

y_train = np.array(y_train).astype(np.int64)
y_val = np.array(y_val).astype(np.int64)
y_test = np.array(y_test).astype(np.int64)

x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train).long()
x_test = torch.from_numpy(x_test)
y_test = torch.from_numpy(y_test).long()
x_val = torch.from_numpy(x_val)
y_val = torch.from_numpy(y_val).long()

x_train = x_train.to(torch.float32)
x_test = x_test.to(torch.float32)
x_val = x_val.to(torch.float32)

train_set = TensorDataset(x_train, y_train)
test_set = TensorDataset(x_test, y_test)
val_set = TensorDataset(x_val, y_val)

In [32]:
import torch.nn as nn

class mlp(nn.Module):
    def __init__(self):
        super(mlp, self).__init__()
        
        self.fc1 = nn.Linear(4096, 2000)
        self.fc2 = nn.Linear(2000, 1000)
        self.fc3 = nn.Linear(1000, 1000)
        self.fc4 = nn.Linear(1000, 500)
        self.lstm = nn.LSTM(500, 50, batch_first=True, bidirectional=True)
        self.fc5 = nn.Linear(100, 10)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = x.view(x.size(0), -1, 500)
        x, _ = self.lstm(x)
        x = x.mean(dim=1)
        x = self.fc5(x)
        x = self.sigmoid(x)
        
        return x

In [8]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 32
learning_rate = 0.001
num_epochs = 5

best_model = mlp()
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        optimizer.zero_grad()
        outputs = best_model(images)
        loss = criterion(outputs, one_hot_outputs)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        top_values, top_indices = outputs.topk(2, dim=1)
        output_tensor = torch.zeros_like(outputs)
        output_tensor.scatter_(1, top_indices, 1)

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        equal_to_1 = np.logical_and(output_tensor.numpy() == 1, one_hot_outputs.numpy() == 1)
        num_equal_to_1 = np.sum(equal_to_1)

        correct += num_equal_to_1
        total += len(images)

    accuracy = 100 * correct / total
    print(accuracy)
    print(loss)

31.25
tensor(0.4757, grad_fn=<BinaryCrossEntropyBackward0>)
37.5
tensor(0.4781, grad_fn=<BinaryCrossEntropyBackward0>)
37.5
tensor(0.4829, grad_fn=<BinaryCrossEntropyBackward0>)
31.25
tensor(0.4728, grad_fn=<BinaryCrossEntropyBackward0>)
37.5
tensor(0.4712, grad_fn=<BinaryCrossEntropyBackward0>)


In [9]:
best_model.eval()
correct = 0
total = 0
for images, labels in test_loader:
    outputs = best_model(images)
    top_values, top_indices = outputs.topk(2, dim=1)
    output_tensor = torch.zeros_like(outputs)
    output_tensor.scatter_(1, top_indices, 1)

    one_hot_outputs = torch.zeros(images.size(0), 10)
    for i, digit in enumerate(labels):
        digit = int(digit)
        if digit < 10:
            one_hot_outputs[i, digit] = 1
        else:
            tens_digit = digit // 10
            ones_digit = digit % 10
            one_hot_outputs[i, tens_digit] = 1
            one_hot_outputs[i, ones_digit] = 1
    one_hot_outputs = one_hot_outputs.float()

    equal_to_1 = np.logical_and(output_tensor.numpy() == 1, one_hot_outputs.numpy() == 1)
    num_equal_to_1 = np.sum(equal_to_1)

    correct += num_equal_to_1

    total += len(images)

accuracy = 100 * correct / total
print(accuracy)
print(loss)

20.0
tensor(0.4712, grad_fn=<BinaryCrossEntropyBackward0>)


In [10]:
wandb.init(project="smai_assignment_3_5_1_mlp", entity="harshitaggarwal4")

config = wandb.config
config.learning_rate = [0.01, 0.001]
config.batch_size = [64]
config.num_epochs = 1

best_accuracy = 0.0
best_model_state = None

for lr in config.learning_rate:
    for batch_size in config.batch_size:
        best_model = mlp()
        criterion = nn.BCELoss()
        optimizer = optim.Adam(best_model.parameters(), lr=lr)

        for epoch in range(num_epochs):
            best_model.train()
            for images, labels in train_loader:

                one_hot_outputs = torch.zeros(images.size(0), 10)
                for i, digit in enumerate(labels):
                    digit = int(digit)
                    if digit < 10:
                        one_hot_outputs[i, digit] = 1
                    else:
                        tens_digit = digit // 10
                        ones_digit = digit % 10
                        one_hot_outputs[i, tens_digit] = 1
                        one_hot_outputs[i, ones_digit] = 1
                one_hot_outputs = one_hot_outputs.float()

                optimizer.zero_grad()
                outputs = best_model(images)
                loss = criterion(outputs, one_hot_outputs)
                loss.backward()
                optimizer.step()

            best_model.eval()
            correct = 0
            total = 0
            for images, labels in val_loader:
                outputs = best_model(images)
                top_values, top_indices = outputs.topk(2, dim=1)
                output_tensor = torch.zeros_like(outputs)
                output_tensor.scatter_(1, top_indices, 1)

                one_hot_outputs = torch.zeros(images.size(0), 10)
                for i, digit in enumerate(labels):
                    digit = int(digit)
                    if digit < 10:
                        one_hot_outputs[i, digit] = 1
                    else:
                        tens_digit = digit // 10
                        ones_digit = digit % 10
                        one_hot_outputs[i, tens_digit] = 1
                        one_hot_outputs[i, ones_digit] = 1
                one_hot_outputs = one_hot_outputs.float()

                equal_to_1 = np.logical_and(output_tensor.numpy() == 1, one_hot_outputs.numpy() == 1)
                num_equal_to_1 = np.sum(equal_to_1)

                correct += num_equal_to_1
                total += len(images)

            accuracy = 100 * correct / total

            wandb.log({
                "Learning Rate": lr,
                "Batch Size": batch_size,
                "Epoch": epoch + 1,
                "Train Loss": loss.item(),
                "Validation Accuracy": accuracy
            })

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model_state = best_model.state_dict()

torch.save(best_model_state, 'model_best.pth')

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mharshitaggarwal4[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
Batch Size,▁▁▁▁▁▁▁▁▁▁
Epoch,▁▃▅▆█▁▃▅▆█
Learning Rate,█████▁▁▁▁▁
Train Loss,▆▁▄▇▄█▆▆▄▇
Validation Accuracy,█▃▁▃▃▁▁▁▁▆

0,1
Batch Size,64.0
Epoch,5.0
Learning Rate,0.001
Train Loss,0.47952
Validation Accuracy,37.5


In [33]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 64
learning_rate = 0.001
num_epochs = 5

best_model = mlp()
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        optimizer.zero_grad()
        outputs = best_model(images)
        loss = criterion(outputs, one_hot_outputs)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        top_values, top_indices = outputs.topk(2, dim=1)
        output_tensor = torch.zeros_like(outputs)
        output_tensor.scatter_(1, top_indices, 1)

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        equal_to_1 = np.logical_and(output_tensor.numpy() == 1, one_hot_outputs.numpy() == 1)
        num_equal_to_1 = np.sum(equal_to_1)

        correct += num_equal_to_1
        total += len(images)

    accuracy = 100 * correct / total
    print(accuracy)
    print(loss)

31.25
tensor(0.4808, grad_fn=<BinaryCrossEntropyBackward0>)
31.25
tensor(0.4617, grad_fn=<BinaryCrossEntropyBackward0>)
31.25
tensor(0.4697, grad_fn=<BinaryCrossEntropyBackward0>)
37.5
tensor(0.4707, grad_fn=<BinaryCrossEntropyBackward0>)
31.25
tensor(0.4689, grad_fn=<BinaryCrossEntropyBackward0>)


In [34]:
best_model.eval()
correct = 0
total = 0
for images, labels in test_loader:
    outputs = best_model(images)
    top_values, top_indices = outputs.topk(2, dim=1)
    output_tensor = torch.zeros_like(outputs)
    output_tensor.scatter_(1, top_indices, 1)

    one_hot_outputs = torch.zeros(images.size(0), 10)
    for i, digit in enumerate(labels):
        digit = int(digit)
        if digit < 10:
            one_hot_outputs[i, digit] = 1
        else:
            tens_digit = digit // 10
            ones_digit = digit % 10
            one_hot_outputs[i, tens_digit] = 1
            one_hot_outputs[i, ones_digit] = 1
    one_hot_outputs = one_hot_outputs.float()

    equal_to_1 = np.logical_and(output_tensor.numpy() == 1, one_hot_outputs.numpy() == 1)
    num_equal_to_1 = np.sum(equal_to_1)

    correct += num_equal_to_1

    total += len(images)

accuracy = 100 * correct / total
print(accuracy)
print(loss)

20.0
tensor(0.4689, grad_fn=<BinaryCrossEntropyBackward0>)


In [7]:
import torch.nn as nn

class CNNModel(nn.Module):
    def __init__(self, dropout_rate = 0.25):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(32 * 16 * 16, 10)
        self.sigmoidd = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.dropout1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.dropout2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.sigmoidd(x)
        return x

In [12]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 32
learning_rate = 0.001
dropout_rate = 0.25
num_epochs = 10

best_model = CNNModel(dropout_rate = dropout_rate)
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = create_data_loader(train_set, batch_size)
val_loader = create_data_loader(val_set, batch_size)
test_loader = create_data_loader(test_set, batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        optimizer.zero_grad()
        print(images.shape)
        outputs = best_model(images)
        loss = criterion(outputs, one_hot_outputs)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        top_values, top_indices = outputs.topk(2, dim=1)
        output_tensor = torch.zeros_like(outputs)
        output_tensor.scatter_(1, top_indices, 1)

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        matching_rows = np.all(output_tensor.numpy() == one_hot_outputs.numpy(), axis=1)
        num_matching_rows = np.sum(matching_rows)

        correct += num_matching_rows
        total += len(images)

    accuracy = 100 * correct / total
    print(accuracy)
    print(loss)

torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 6

In [13]:
best_model.eval()
correct = 0
total = 0
for images, labels in test_loader:
    outputs = best_model(images)
    top_values, top_indices = outputs.topk(2, dim=1)
    output_tensor = torch.zeros_like(outputs)
    output_tensor.scatter_(1, top_indices, 1)

    one_hot_outputs = torch.zeros(images.size(0), 10)
    for i, digit in enumerate(labels):
        digit = int(digit)
        if digit < 10:
            one_hot_outputs[i, digit] = 1
        else:
            tens_digit = digit // 10
            ones_digit = digit % 10
            one_hot_outputs[i, tens_digit] = 1
            one_hot_outputs[i, ones_digit] = 1
    one_hot_outputs = one_hot_outputs.float()

    matching_rows = np.all(output_tensor.numpy() == one_hot_outputs.numpy(), axis=1)
    num_matching_rows = np.sum(matching_rows)

    correct += num_matching_rows
    total += len(images)

accuracy = 100 * correct / total
print(accuracy)

46.82


In [14]:
wandb.init(project="smai_assignment_3_5_1_cnn", entity="harshitaggarwal4")

config = wandb.config
config.learning_rate = [0.01, 0.001]
config.batch_size = [32, 64]
config.num_epochs = 10

best_accuracy = 0.0
best_model_state = None

for lr in config.learning_rate:
    for batch_size in config.batch_size:
        best_model = CNNModel(dropout_rate = dropout_rate)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(best_model.parameters(), lr=lr)

        train_loader = create_data_loader(train_set, batch_size)
        val_loader = create_data_loader(val_set, batch_size)
        test_loader = create_data_loader(test_set, batch_size)

        for epoch in range(num_epochs):
            best_model.train()
            for images, labels in train_loader:

                one_hot_outputs = torch.zeros(images.size(0), 10)
                for i, digit in enumerate(labels):
                    digit = int(digit)
                    if digit < 10:
                        one_hot_outputs[i, digit] = 1
                    else:
                        tens_digit = digit // 10
                        ones_digit = digit % 10
                        one_hot_outputs[i, tens_digit] = 1
                        one_hot_outputs[i, ones_digit] = 1
                one_hot_outputs = one_hot_outputs.float()

                optimizer.zero_grad()
                print(images.shape)
                outputs = best_model(images)
                loss = criterion(outputs, one_hot_outputs)
                loss.backward()
                optimizer.step()

            best_model.eval()
            correct = 0
            total = 0
            for images, labels in val_loader:
                outputs = best_model(images)
                top_values, top_indices = outputs.topk(2, dim=1)
                output_tensor = torch.zeros_like(outputs)
                output_tensor.scatter_(1, top_indices, 1)

                one_hot_outputs = torch.zeros(images.size(0), 10)
                for i, digit in enumerate(labels):
                    digit = int(digit)
                    if digit < 10:
                        one_hot_outputs[i, digit] = 1
                    else:
                        tens_digit = digit // 10
                        ones_digit = digit % 10
                        one_hot_outputs[i, tens_digit] = 1
                        one_hot_outputs[i, ones_digit] = 1
                one_hot_outputs = one_hot_outputs.float()

                matching_rows = np.all(output_tensor.numpy() == one_hot_outputs.numpy(), axis=1)
                num_matching_rows = np.sum(matching_rows)

                correct += num_matching_rows
                total += len(images)

            accuracy = 100 * correct / total
            print(accuracy)
            print(loss)

            wandb.log({
                "Learning Rate": lr,
                "Batch Size": batch_size,
                "Epoch": epoch + 1,
                "Train Loss": loss.item(),
                "Validation Accuracy": accuracy
            })

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model_state = best_model.state_dict()

torch.save(best_model_state, 'model_best.pth')

wandb.finish()

torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 64, 64])
torch.Size([32, 1, 6

0,1
Batch Size,▁▁▁▁▁▁▁▁▁▁██████████▁▁▁▁▁▁▁▁▁▁██████████
Epoch,▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█
Learning Rate,████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Loss,███▆██▆█▇█▆▆▆▆▆▆▆▆▆▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation Accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▅▆▇▇▇█████▅▆▇▇▇▇████

0,1
Batch Size,64.0
Epoch,10.0
Learning Rate,0.001
Train Loss,0.14414
Validation Accuracy,47.91875


In [8]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 64
learning_rate = 0.001
dropout_rate = 0.25
num_epochs = 10

best_model = CNNModel(dropout_rate = dropout_rate)
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = create_data_loader(train_set, batch_size)
val_loader = create_data_loader(val_set, batch_size)
test_loader = create_data_loader(test_set, batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        optimizer.zero_grad()
        print(images.shape)
        outputs = best_model(images)
        loss = criterion(outputs, one_hot_outputs)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        top_values, top_indices = outputs.topk(2, dim=1)
        output_tensor = torch.zeros_like(outputs)
        output_tensor.scatter_(1, top_indices, 1)

        one_hot_outputs = torch.zeros(images.size(0), 10)
        for i, digit in enumerate(labels):
            digit = int(digit)
            if digit < 10:
                one_hot_outputs[i, digit] = 1
            else:
                tens_digit = digit // 10
                ones_digit = digit % 10
                one_hot_outputs[i, tens_digit] = 1
                one_hot_outputs[i, ones_digit] = 1
        one_hot_outputs = one_hot_outputs.float()

        matching_rows = np.all(output_tensor.numpy() == one_hot_outputs.numpy(), axis=1)
        num_matching_rows = np.sum(matching_rows)

        correct += num_matching_rows
        total += len(images)

    accuracy = 100 * correct / total
    print(accuracy)
    print(loss)

torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 64, 64])
torch.Size([64, 1, 6

In [9]:
best_model.eval()
correct = 0
total = 0
for images, labels in test_loader:
    outputs = best_model(images)
    top_values, top_indices = outputs.topk(2, dim=1)
    output_tensor = torch.zeros_like(outputs)
    output_tensor.scatter_(1, top_indices, 1)

    one_hot_outputs = torch.zeros(images.size(0), 10)
    for i, digit in enumerate(labels):
        digit = int(digit)
        if digit < 10:
            one_hot_outputs[i, digit] = 1
        else:
            tens_digit = digit // 10
            ones_digit = digit % 10
            one_hot_outputs[i, tens_digit] = 1
            one_hot_outputs[i, ones_digit] = 1
    one_hot_outputs = one_hot_outputs.float()

    matching_rows = np.all(output_tensor.numpy() == one_hot_outputs.numpy(), axis=1)
    num_matching_rows = np.sum(matching_rows)

    correct += num_matching_rows
    total += len(images)

accuracy = 100 * correct / total
print(accuracy)

48.84


In [15]:
npz_file_path = "permuted_mnist.npz"
data = np.load(npz_file_path)
keys = data.files
print(keys)

['train_images', 'train_labels', 'test_images', 'test_labels']


In [16]:
train_images = np.array(data['train_images']).reshape(-1, 1, 28, 28)
train_labels = np.array(data['train_labels'])
test_images = np.array(data['test_images']).reshape(-1, 1, 28, 28)
test_labels = np.array(data['test_labels'])

In [17]:
train_images = torch.from_numpy(train_images)
train_labels = torch.from_numpy(train_labels)
test_images = torch.from_numpy(test_images)
test_labels = torch.from_numpy(test_labels)

train_images = train_images.to(torch.float32)
train_labels = train_labels.to(torch.long)
test_images = test_images.to(torch.float32)
test_labels = test_labels.to(torch.long)

train_set = TensorDataset(train_images, train_labels)
test_set = TensorDataset(test_images, test_labels)

In [18]:
dataset_len = len(test_set)
split_point = dataset_len // 2
val_set = TensorDataset(*test_set[:split_point])
test_set = TensorDataset(*test_set[split_point:])

In [19]:
import torch.nn as nn

class CNNModel(nn.Module):
    def __init__(self, dropout_rate = 0.25):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.dropout1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.dropout2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [20]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 32
learning_rate = 0.01
num_epochs = 10

best_model = CNNModel(dropout_rate = dropout_rate)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = best_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        _, predicted = torch.max(outputs, 1)
        label = labels
        total += label.size(0)
        correct += (predicted == label).sum().item()
    accuracy = 100 * correct / total
    print(accuracy)
    print(loss)

22.26
tensor(2.2185, grad_fn=<NllLossBackward0>)
45.86
tensor(1.5793, grad_fn=<NllLossBackward0>)
68.76
tensor(1.1907, grad_fn=<NllLossBackward0>)
71.04
tensor(0.8383, grad_fn=<NllLossBackward0>)
70.88
tensor(1.1374, grad_fn=<NllLossBackward0>)
71.04
tensor(1.1594, grad_fn=<NllLossBackward0>)
71.66
tensor(1.7722, grad_fn=<NllLossBackward0>)
72.24
tensor(1.3335, grad_fn=<NllLossBackward0>)
75.88
tensor(0.7999, grad_fn=<NllLossBackward0>)
71.02
tensor(1.0662, grad_fn=<NllLossBackward0>)


In [21]:
wandb.init(project="smai_assignment_3_5_2_cnn", entity="harshitaggarwal4")

config = wandb.config
config.learning_rate = [0.01, 0.001]
config.batch_size = [32, 64]
config.num_epochs = 10

best_accuracy = 0.0
best_model_state = None

for lr in config.learning_rate:
    for batch_size in config.batch_size:
        best_model = CNNModel(dropout_rate = dropout_rate)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(best_model.parameters(), lr=lr)

        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size)
        test_loader = DataLoader(test_set, batch_size=batch_size)

        for epoch in range(num_epochs):
            best_model.train()
            for images, labels in train_loader:
                optimizer.zero_grad()
                outputs = best_model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            best_model.eval()
            correct = 0
            total = 0
            for images, labels in val_loader:
                outputs = best_model(images)
                _, predicted = torch.max(outputs, 1)
                label = labels
                total += label.size(0)
                correct += (predicted == label).sum().item()
            accuracy = 100 * correct / total

            wandb.log({
                "Learning Rate": lr,
                "Batch Size": batch_size,
                "Epoch": epoch + 1,
                "Train Loss": loss.item(),
                "Validation Accuracy": accuracy
            })

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model_state = best_model.state_dict()

torch.save(best_model_state, 'model_best.pth')

wandb.finish()

0,1
Batch Size,▁▁▁▁▁▁▁▁▁▁██████████▁▁▁▁▁▁▁▁▁▁██████████
Epoch,▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█
Learning Rate,████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Loss,█▇▃▄▄▅▄▅▆▂█▅▃▅▃▃▄▂▃▃▂▂▂▁▁▁▁▁▁▃▂▃▁▂▂▁▂▁▁▂
Validation Accuracy,▁▃▅▆▆▅▆▆▄▆▂▅▆▆▆▇▇▆▆▆▇█████████▇█████████

0,1
Batch Size,64.0
Epoch,10.0
Learning Rate,0.001
Train Loss,0.28883
Validation Accuracy,93.3


In [27]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 64
learning_rate = 0.001
num_epochs = 10

best_model = CNNModel(dropout_rate = dropout_rate)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = best_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        _, predicted = torch.max(outputs, 1)
        label = labels
        total += label.size(0)
        correct += (predicted == label).sum().item()
    accuracy = 100 * correct / total
    print(accuracy)
    print(loss)

85.58
tensor(0.4121, grad_fn=<NllLossBackward0>)
89.22
tensor(0.4798, grad_fn=<NllLossBackward0>)
91.0
tensor(0.2659, grad_fn=<NllLossBackward0>)
92.26
tensor(0.4707, grad_fn=<NllLossBackward0>)
91.62
tensor(0.3279, grad_fn=<NllLossBackward0>)
93.16
tensor(0.1113, grad_fn=<NllLossBackward0>)
93.04
tensor(0.4583, grad_fn=<NllLossBackward0>)
93.44
tensor(0.0476, grad_fn=<NllLossBackward0>)
93.42
tensor(0.0387, grad_fn=<NllLossBackward0>)
93.58
tensor(0.2037, grad_fn=<NllLossBackward0>)


In [22]:
class mlp(nn.Module):
    def __init__(self):
        super(mlp, self).__init__()
        
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc5(x)
        
        return x

In [23]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 32
learning_rate = 0.001
num_epochs = 10

best_model = mlp()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = best_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        _, predicted = torch.max(outputs, 1)
        label = labels
        total += label.size(0)
        correct += (predicted == label).sum().item()
    accuracy = 100 * correct / total
    print(accuracy)

84.06
82.76
85.06
84.56
84.54
84.92
87.02
86.24
87.8
83.04


In [24]:
wandb.init(project="smai_assignment_3_5_2_mlp", entity="harshitaggarwal4")

config = wandb.config
config.learning_rate = [0.01, 0.001]
config.batch_size = [32, 64]
config.num_epochs = 10

best_accuracy = 0.0
best_model_state = None

for lr in config.learning_rate:
    for batch_size in config.batch_size:
        best_model = mlp()
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(best_model.parameters(), lr=lr)

        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size)
        test_loader = DataLoader(test_set, batch_size=batch_size)

        for epoch in range(num_epochs):
            best_model.train()
            for images, labels in train_loader:
                optimizer.zero_grad()
                outputs = best_model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            best_model.eval()
            correct = 0
            total = 0
            for images, labels in val_loader:
                outputs = best_model(images)
                _, predicted = torch.max(outputs, 1)
                label = labels
                total += label.size(0)
                correct += (predicted == label).sum().item()
            accuracy = 100 * correct / total

            wandb.log({
                "Learning Rate": lr,
                "Batch Size": batch_size,
                "Epoch": epoch + 1,
                "Train Loss": loss.item(),
                "Validation Accuracy": accuracy
            })

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model_state = best_model.state_dict()

torch.save(best_model_state, 'model_best.pth')

wandb.finish()

0,1
Batch Size,▁▁▁▁▁▁▁▁▁▁██████████▁▁▁▁▁▁▁▁▁▁██████████
Epoch,▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█▁▂▃▃▄▅▆▆▇█
Learning Rate,████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Loss,▁▂▂▂▁▁█▁▁▁▁▁▁▁▁▇▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation Accuracy,▅▇▆▅▄▁▆▆▆▆▇▇▆▂▄▆▇▇▇▇▇▆█▇████▇█▇▇█▇████▇█

0,1
Batch Size,64.0
Epoch,10.0
Learning Rate,0.001
Train Loss,0.10228
Validation Accuracy,88.76


In [25]:
def create_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 64
learning_rate = 0.001
num_epochs = 10

best_model = mlp()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(best_model.parameters(), lr=learning_rate)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)

for epoch in range(num_epochs):
    best_model.train()
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = best_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    best_model.eval()
    correct = 0
    total = 0
    for images, labels in val_loader:
        outputs = best_model(images)
        _, predicted = torch.max(outputs, 1)
        label = labels
        total += label.size(0)
        correct += (predicted == label).sum().item()
    accuracy = 100 * correct / total
    print(accuracy)

86.66
86.66
87.78
87.32
87.74
88.46
87.78
88.54
88.06
86.82


For dataset 1(Double mnist), the CNNs perform better than mlps since the input is spacially connected while in dataset 2(permuted mnist), the mlp performs better since the dataset is not spacially connected.
During training of the mlp, we will have to flatten the image due to which, the spacial information is lost.
The mlp is difficult to train on double mnist dataset as it is not performing very well and thus, I tried adding more number of trainable parameters.
Bit of overfitting is observed in the case of permuted mnist mlp and cnn since the validation accuracy increases monotonically till a point and then decrease a bit.
Even in the case of double mnist, we can observe overfitting as the accuracy on the train set increases very high(above 80%) while that on the validation set is very less. Even for cnn, there is a little bit of overfitting in the end.