# Problem 5

Import necessary libraries and load dataset.

In [67]:
import time
import random
import numpy as np

import torch
from torch import nn, tensor
from torch.nn import functional as F
from torch.optim import Optimizer
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import transforms

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


First we load dataset, and filter whose label is 4 or 9.

In [68]:
label_1, label_2 = 4, 9

train_set = datasets.MNIST(root='./mnist_data/', train=True, transform=transforms.ToTensor(), download=True)

idx = (train_set.targets == label_1) + (train_set.targets == label_2)
train_set.data = train_set.data[idx]
train_set.targets = train_set.targets[idx]
train_set.targets[train_set.targets == label_1] = -1
train_set.targets[train_set.targets == label_2] = 1

test_set = datasets.MNIST(root='./mnist_data/', train=False, transform=transforms.ToTensor())

idx = (test_set.targets == label_1) + (test_set.targets == label_2)
test_set.data = test_set.data[idx]
test_set.targets = test_set.targets[idx]
test_set.targets[test_set.targets == label_1] = -1
test_set.targets[test_set.targets == label_2] = 1

Now, we compare logistic regerssion and L2-loss. \
Before starting, let us define a linear term containing in $f$. This will be used in both case.

In [69]:
class LR(nn.Module) :
    def __init__(self, input_dim=28*28) :
        super().__init__()
        self.linear = nn.Linear(input_dim, 1, bias=True)

    def forward(self, x) :
        return self.linear(x.float().view(-1, 28*28))

First we perform with logistic regerssion. \
We use log-sigmoid as loss, and SGD optimizer.

In [70]:
logistic = LR()                                   # Define a Neural Network Model

def logistic_loss(output, target):
    return -torch.nn.functional.logsigmoid(target*output)

loss_function = logistic_loss                                                   # Specify loss function
optimizer = torch.optim.SGD(logistic.parameters(), lr=1e-4)   # specify SGD with learning rate

In [71]:
start = time.time()
for _ in range(1000) :
    ind = random.randint(0, len(train_set.data)-1)
    image, label = train_set.data[ind], train_set.targets[ind]

    optimizer.zero_grad()

    train_loss = loss_function(logistic(image), label.float())
    train_loss.backward()

    optimizer.step()
    
end = time.time()

In [72]:
correct = 0

with torch.no_grad():
    for i in range(len(test_set.data)):
        predicted = logistic(test_set.data[i])
        
        if predicted.item() * test_set.targets[i].item() >= 0:
            correct += 1
print(f"Logistic Regression summary\n \
        Time ellapsed in training is: {end-start}, Accuracy: {correct}/{len(test_set.data)}")

Logistic Regression summary
         Time ellapsed in training is: 0.24887609481811523, Accuracy: 1884/1991


We have quite high (almost 92%) accuracy in test step. \
Next, perform with L2-loss, introduced in hint. Everything except loss function is same.

In [73]:
l2 = LR()

def l2_loss_function(output, target):
    return torch.sigmoid(-target*output)**2 + (1-torch.sigmoid(target*output))**2

loss_function = l2_loss_function
optimizer = torch.optim.SGD(l2.parameters(), lr=1e-4)

start = time.time()
for _ in range(1000) :
    ind = random.randint(0, len(train_set.data)-1)
    image, label = train_set.data[ind], train_set.targets[ind]

    optimizer.zero_grad()

    train_loss = loss_function(l2(image), label.float())
    train_loss.backward()

    optimizer.step()
    
    
end = time.time()

correct = 0

with torch.no_grad():
    for i in range(len(test_set.data)):
        predicted = l2(test_set.data[i])
        if predicted[0].item() * test_set.targets[i].item() >= 0:
            correct += 1
print(f"L2-loss Regression summary\n \
Time ellapsed in training is: {end-start}, Accuracy: {correct}/{len(test_set.data)}")

L2-loss Regression summary
 Time ellapsed in training is: 0.3280670642852783, Accuracy: 1242/1991


Unlike logistic, L2 shows poor performance.