In [None]:
"""
TODO: Finish and submit your code for logistic regression, neural network, and hyperparameter search.

"""
import random
import itertools
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from collections import namedtuple
from tqdm import tqdm


class LogisticRegressionModel(nn.Module):
    def __init__(self):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(28 * 28, 10)  # 28x28 image input, 10 classes

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the input
        return self.linear(x)  # Forward pass


""" - Part 1 - """


def logistic_regression(device):
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

    train_dataset = datasets.MNIST(
        root='./MNIST_dataset', train=True, download=True, transform=transform)

    train_size = 48000
    val_size = 12000
    train_set, _ = random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)

    model = LogisticRegressionModel().to(device)

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(model.parameters(),
                          lr=0.011, weight_decay=0.001)

    epochs = 15
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(
            f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")

    results = dict(
        model=model
    )

    return results


""" - Part 2 - """


class FNN(nn.Module):
    def __init__(self, loss_type, num_classes):
        super(FNN, self).__init__()

        self.loss_type = loss_type
        self.num_classes = num_classes

        self.fc1 = nn.Linear(32 * 32 * 3, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

        self.flatten = nn.Flatten()

    def forward(self, x):
        x = self.flatten(x)
        x = torch.tanh(self.fc1(x))
        x = F.relu(self.fc2(x))
        output = self.fc3(x)
        if self.loss_type != "ce":
            output = F.softmax(output, dim=1)

        return output

    def get_loss(self, output, target):
        if self.loss_type in ['cross_entropy', 'ce']:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(output, target)
        else:
            raise ValueError(f"Unknown loss type: {self.loss_type}")

        return loss


""" - Part 3 - """


def tune_hyper_parameter(target_metric, device):
    # Define the search space for hyperparameters (reduced for faster tuning)
    learning_rates = [0.001,0.00075,0.00001]
    weight_decays = [0.0001,0.00001]
    batch_sizes = [64,128]

    # Named tuple to store hyperparameters
    HyperParams = namedtuple(
        'HyperParams', ['learning_rate', 'batch_size', 'weight_decay'])

    # Initialize variables to track the best results
    best_logistic_params = None
    best_fnn_params = None
    best_logistic_metric = float('-inf')
    best_fnn_metric = float('-inf')

    # Define the dataloader creation logic inside the function
    def create_dataloaders(batch_size, dataset_name="MNIST"):
        if dataset_name == "MNIST":
            transform = transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
            train_dataset = datasets.MNIST(
                root='./MNIST_dataset', train=True, download=True, transform=transform)
            test_dataset = datasets.MNIST(
                root='./MNIST_dataset', train=False, download=True, transform=transform)
            # Using smaller training size to speed up tuning
            train_size = 50000
            val_size = 10000
            train_set, val_set = random_split(
                train_dataset, [train_size, val_size])
            train_loader = DataLoader(
                train_set, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(
                val_set, batch_size=batch_size, shuffle=False)
            test_loader = DataLoader(
                test_dataset, batch_size=batch_size, shuffle=False)

        elif dataset_name == "CIFAR10":
            transform = transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
            train_dataset = datasets.CIFAR10(
                root='./CIFAR10_dataset', train=True, download=True, transform=transform)
            test_dataset = datasets.CIFAR10(
                root='./CIFAR10_dataset', train=False, download=True, transform=transform)
            # Using smaller training size to speed up tuning
            train_size = 45000
            val_size = 5000
            train_set, val_set = random_split(
                train_dataset, [train_size, val_size])
            train_loader = DataLoader(
                train_set, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(
                val_set, batch_size=batch_size, shuffle=False)
            test_loader = DataLoader(
                test_dataset, batch_size=batch_size, shuffle=False)

        else:
            raise ValueError(f"Unknown dataset: {dataset_name}")

        return train_loader, val_loader, test_loader

    # --- Training function ---
    def train(model, optimizer, train_loader, device, epochs=5, patience=2):
        model.train()
        criterion = nn.CrossEntropyLoss()
        best_val_acc = -float('inf')
        no_improve_epochs = 0

        for epoch in range(epochs):
            running_loss = 0.0
            model.train()
            pbar = tqdm(train_loader, ncols=100, position=0, leave=True)
            for batch_idx, (data, target) in enumerate(pbar):
                optimizer.zero_grad()
                data, target = data.to(device), target.to(device)
                output = model(data)

                if hasattr(model, 'get_loss'):
                    loss = model.get_loss(output, target)
                else:
                    loss = criterion(output, target)

                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            print(
                f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}")

            # Early stopping logic
            val_accuracy = validation(model, val_loader, device)
            if val_accuracy > best_val_acc:
                best_val_acc = val_accuracy
                no_improve_epochs = 0
            else:
                no_improve_epochs += 1
                if no_improve_epochs >= patience:
                    print(f"Early stopping triggered after {epoch+1} epochs")
                    break

    # --- Validation function ---
    def validation(model, validation_loader, device):
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in validation_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
        return 100 * correct / total

    # --- Logistic Regression Hyperparameter Search (MNIST) ---
    for lr, wd, batch_size in itertools.product(learning_rates, weight_decays, batch_sizes):
        print(f"Testing Logistic Regression with lr: {lr}, weight_decay: {wd}, batch_size: {batch_size}")
        train_loader, val_loader, _ = create_dataloaders(
            batch_size=batch_size, dataset_name="MNIST")
        model = LogisticRegressionModel().to(device)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        train(model, optimizer, train_loader,
              device, epochs=8)  # Train the model
        val_accuracy = validation(
            model, val_loader, device)  # Validate the model
        if val_accuracy > best_logistic_metric:
            best_logistic_metric = val_accuracy
            best_logistic_params = HyperParams(lr, batch_size, wd)

    # --- FNN Hyperparameter Search (CIFAR-10) ---
    for lr, wd, batch_size in itertools.product(learning_rates, weight_decays, batch_sizes):
        print(f"Testing FNN with lr: {lr}, weight_decay: {wd}, batch_size: {batch_size}")
        train_loader, val_loader, _ = create_dataloaders(
            batch_size=batch_size, dataset_name="CIFAR10")
        model = FNN(loss_type='cross_entropy', num_classes=10).to(device)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        train(model, optimizer, train_loader,
              device, epochs=8)  # Train the model
        val_accuracy = validation(
            model, val_loader, device)  # Validate the model
        if val_accuracy > best_fnn_metric:
            best_fnn_metric = val_accuracy
            best_fnn_params = HyperParams(lr, batch_size, wd)

    # Return the best parameters and metrics for both models
    best_params = [
        {
            "logistic_regression": {
                "learning_rate": best_logistic_params.learning_rate,
                "batch_size": best_logistic_params.batch_size,
                "weight_decay": best_logistic_params.weight_decay
            }
        },
        {
            "FNN": {
                "learning_rate": best_fnn_params.learning_rate,
                "batch_size": best_fnn_params.batch_size,
                "weight_decay": best_fnn_params.weight_decay
            }
        }
    ]

    best_metric = [
        {
            "logistic_regression": {
                "accuracy": best_logistic_metric
            }
        },
        {
            "FNN": {
                "accuracy": best_fnn_metric
            }
        }
    ]

    return best_params, best_metric



In [None]:
from tqdm import tqdm


import timeit



class Params:
    class BatchSize:
        train = 128
        val = 128
        test = 1000

    def __init__(self):
        self.mode = 'tune'
        # self.model = 'tune'
        self.target_metric = 'accuracy'
        # self.target_metric = 'loss'

        self.device = 'gpu'
        self.loss_type = "ce"
        self.batch_size = Params.BatchSize()
        self.n_epochs = 10
        self.learning_rate = 1e-1
        self.momentum = 0.5


def get_dataloaders(batch_size):

    import torch
    from torch.utils.data import random_split
    import torchvision

    """

    :param Params.BatchSize batch_size:
    :return:
    """

    CIFAR_training = torchvision.datasets.CIFAR10('.', train=True, download=True,
                                                  transform=torchvision.transforms.Compose([
                                                      torchvision.transforms.ToTensor(),
                                                      torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

    CIFAR_test_set = torchvision.datasets.CIFAR10('.', train=False, download=True,
                                                  transform=torchvision.transforms.Compose([
                                                      torchvision.transforms.ToTensor(),
                                                      torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

    # create a training and a validation set
    CIFAR_train_set, CIFAR_val_set = random_split(
        CIFAR_training, [40000, 10000])

    train_loader = torch.utils.data.DataLoader(
        CIFAR_train_set, batch_size=batch_size.train, shuffle=True)

    val_loader = torch.utils.data.DataLoader(
        CIFAR_val_set, batch_size=batch_size.val, shuffle=False)

    test_loader = torch.utils.data.DataLoader(CIFAR_test_set,
                                              batch_size=batch_size.test, shuffle=False)

    return train_loader, val_loader, test_loader


def train(net, optimizer, train_loader, device):
    net.train()
    pbar = tqdm(train_loader, ncols=100, position=0, leave=True)
    avg_loss = 0
    for batch_idx, (data, target) in enumerate(pbar):
        optimizer.zero_grad()
        data = data.to(device)
        target = target.to(device)
        output = net(data)
        loss = net.get_loss(output, target)
        loss.backward()
        optimizer.step()

        loss_sc = loss.item()

        avg_loss += (loss_sc - avg_loss) / (batch_idx + 1)

        pbar.set_description(
            'train loss: {:.6f} avg loss: {:.6f}'.format(loss_sc, avg_loss))


def validation(net, validation_loader, device):
    net.eval()
    validation_loss = 0
    correct = 0
    for data, target in validation_loader:
        data = data.to(device)
        target = target.to(device)
        output = net(data)
        loss = net.get_loss(output, target)
        validation_loss += loss.item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()

    validation_loss /= len(validation_loader.dataset)
    print('\nValidation set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        validation_loss, correct, len(validation_loader.dataset),
        100. * correct / len(validation_loader.dataset)))


def test(net, test_loader, device):
    net.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.to(device)
        target = target.to(device)

        output = net(data)
        loss = net.get_loss(output, target)

        test_loss += loss.item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def main():
    params = Params()

    try:
        import paramparse
    except ImportError:
        print("paramparse is unavailable so commandline arguments will not work")
    else:
        paramparse.process(params)

    import torch
    import torch.optim as optim

    import torch.nn.functional as F
    import torchvision

    random_seed = 1
    torch.manual_seed(random_seed)

    if params.device != 'cpu' and torch.cuda.is_available():
        device = torch.device("cuda")
        print('Running on GPU: {}'.format(torch.cuda.get_device_name(0)))
    else:
        device = torch.device("cpu")
        print('Running on CPU')

    if params.mode == 'fnn':
        train_loader, val_loader, test_loader = get_dataloaders(
            params.batch_size)

        net = FNN(params.loss_type, 10).to(device)
        optimizer = optim.SGD(net.parameters(), lr=params.learning_rate,
                              momentum=params.momentum)

        start = timeit.default_timer()

        with torch.no_grad():
            validation(net, val_loader, device)
        for epoch in range(params.n_epochs):
            print(f'\nepoch {epoch + 1} / {params.n_epochs}\n')
            train_start = timeit.default_timer()

            train(net, optimizer, train_loader, device)

            train_stop = timeit.default_timer()
            train_runtime = train_stop - train_start
            print(f'\ntrain runtime: {train_runtime:.2f} secs')

            with torch.no_grad():
                validation(net, val_loader, device)
        with torch.no_grad():
            test(net, test_loader, device)

        stop = timeit.default_timer()

        runtime = stop - start

        print(f'total runtime: {runtime:.2f} secs')

    elif params.mode == 'tune':
        start = timeit.default_timer()
        best_params, best_metric = tune_hyper_parameter(
            params.target_metric, device)
        stop = timeit.default_timer()
        run_time = stop - start
        print()
        print(f"Best {params.target_metric}: {best_metric}")
        print(f"Best params:\n{best_params}")
        print(f"runtime of tune_hyper_parameter: {run_time}")
    else:
        raise AssertionError(f'invalid mode: {params.mode}')


if __name__ == "__main__":
    main()


paramparse is unavailable so commandline arguments will not work
Running on GPU: Tesla T4
Testing Logistic Regression with lr: 0.001, weight_decay: 0.0001, batch_size: 64
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST_dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 16103076.22it/s]


Extracting ./MNIST_dataset/MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST_dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST_dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 488669.10it/s]


Extracting ./MNIST_dataset/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST_dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST_dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 4524664.85it/s]


Extracting ./MNIST_dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST_dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST_dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 12211877.42it/s]


Extracting ./MNIST_dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST_dataset/MNIST/raw



100%|█████████████████████████████████████████████████████████████| 782/782 [00:17<00:00, 45.68it/s]


Epoch [1/8], Loss: 0.4090


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 65.71it/s]


Epoch [2/8], Loss: 0.2983


100%|█████████████████████████████████████████████████████████████| 782/782 [00:12<00:00, 62.44it/s]


Epoch [3/8], Loss: 0.2832


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 65.22it/s]


Epoch [4/8], Loss: 0.2774


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.30it/s]


Epoch [5/8], Loss: 0.2715


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.28it/s]


Epoch [6/8], Loss: 0.2686


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.78it/s]


Epoch [7/8], Loss: 0.2652
Early stopping triggered after 7 epochs
Testing Logistic Regression with lr: 0.001, weight_decay: 0.0001, batch_size: 128


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.21it/s]


Epoch [1/8], Loss: 0.4655


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.20it/s]


Epoch [2/8], Loss: 0.3061


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.35it/s]


Epoch [3/8], Loss: 0.2876


100%|█████████████████████████████████████████████████████████████| 391/391 [00:10<00:00, 36.80it/s]


Epoch [4/8], Loss: 0.2781


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.19it/s]


Epoch [5/8], Loss: 0.2722


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.01it/s]


Epoch [6/8], Loss: 0.2678


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.31it/s]


Epoch [7/8], Loss: 0.2640
Early stopping triggered after 7 epochs
Testing Logistic Regression with lr: 0.001, weight_decay: 1e-05, batch_size: 64


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.23it/s]


Epoch [1/8], Loss: 0.4084


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.56it/s]


Epoch [2/8], Loss: 0.2952


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.75it/s]


Epoch [3/8], Loss: 0.2809


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.58it/s]


Epoch [4/8], Loss: 0.2741


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 68.08it/s]


Epoch [5/8], Loss: 0.2690


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.73it/s]


Epoch [6/8], Loss: 0.2653
Early stopping triggered after 6 epochs
Testing Logistic Regression with lr: 0.001, weight_decay: 1e-05, batch_size: 128


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.64it/s]


Epoch [1/8], Loss: 0.4630


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.98it/s]


Epoch [2/8], Loss: 0.3064


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.94it/s]


Epoch [3/8], Loss: 0.2868


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.09it/s]


Epoch [4/8], Loss: 0.2778


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.79it/s]


Epoch [5/8], Loss: 0.2728


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.02it/s]


Epoch [6/8], Loss: 0.2669


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.51it/s]


Epoch [7/8], Loss: 0.2639


100%|█████████████████████████████████████████████████████████████| 391/391 [00:10<00:00, 35.64it/s]


Epoch [8/8], Loss: 0.2609
Early stopping triggered after 8 epochs
Testing Logistic Regression with lr: 0.00075, weight_decay: 0.0001, batch_size: 64


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.02it/s]


Epoch [1/8], Loss: 0.4356


100%|█████████████████████████████████████████████████████████████| 782/782 [00:12<00:00, 62.45it/s]


Epoch [2/8], Loss: 0.2970


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.71it/s]


Epoch [3/8], Loss: 0.2820


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.57it/s]


Epoch [4/8], Loss: 0.2727


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.33it/s]


Epoch [5/8], Loss: 0.2668


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.29it/s]


Epoch [6/8], Loss: 0.2629
Early stopping triggered after 6 epochs
Testing Logistic Regression with lr: 0.00075, weight_decay: 0.0001, batch_size: 128


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.03it/s]


Epoch [1/8], Loss: 0.5017


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.42it/s]


Epoch [2/8], Loss: 0.3150


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.88it/s]


Epoch [3/8], Loss: 0.2907


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.84it/s]


Epoch [4/8], Loss: 0.2803


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.66it/s]


Epoch [5/8], Loss: 0.2739


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.96it/s]


Epoch [6/8], Loss: 0.2679


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.82it/s]


Epoch [7/8], Loss: 0.2639
Early stopping triggered after 7 epochs
Testing Logistic Regression with lr: 0.00075, weight_decay: 1e-05, batch_size: 64


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.03it/s]


Epoch [1/8], Loss: 0.4293


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.87it/s]


Epoch [2/8], Loss: 0.2983


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.26it/s]


Epoch [3/8], Loss: 0.2837


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.49it/s]


Epoch [4/8], Loss: 0.2753


100%|█████████████████████████████████████████████████████████████| 782/782 [00:12<00:00, 62.98it/s]


Epoch [5/8], Loss: 0.2706
Early stopping triggered after 5 epochs
Testing Logistic Regression with lr: 0.00075, weight_decay: 1e-05, batch_size: 128


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.12it/s]


Epoch [1/8], Loss: 0.5117


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.62it/s]


Epoch [2/8], Loss: 0.3148


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.91it/s]


Epoch [3/8], Loss: 0.2908


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.78it/s]


Epoch [4/8], Loss: 0.2800


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.39it/s]


Epoch [5/8], Loss: 0.2735


100%|█████████████████████████████████████████████████████████████| 391/391 [00:10<00:00, 36.16it/s]


Epoch [6/8], Loss: 0.2694


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.15it/s]


Epoch [7/8], Loss: 0.2653
Early stopping triggered after 7 epochs
Testing Logistic Regression with lr: 1e-05, weight_decay: 0.0001, batch_size: 64


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.74it/s]


Epoch [1/8], Loss: 1.9650


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.88it/s]


Epoch [2/8], Loss: 1.3715


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.00it/s]


Epoch [3/8], Loss: 1.0516


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.75it/s]


Epoch [4/8], Loss: 0.8642


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.73it/s]


Epoch [5/8], Loss: 0.7429


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.31it/s]


Epoch [6/8], Loss: 0.6591


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.94it/s]


Epoch [7/8], Loss: 0.5979


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.16it/s]


Epoch [8/8], Loss: 0.5520
Testing Logistic Regression with lr: 1e-05, weight_decay: 0.0001, batch_size: 128


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.49it/s]


Epoch [1/8], Loss: 2.2277


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.93it/s]


Epoch [2/8], Loss: 1.7824


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.40it/s]


Epoch [3/8], Loss: 1.4609


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.87it/s]


Epoch [4/8], Loss: 1.2320


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.98it/s]


Epoch [5/8], Loss: 1.0676


100%|█████████████████████████████████████████████████████████████| 391/391 [00:10<00:00, 35.73it/s]


Epoch [6/8], Loss: 0.9463


100%|█████████████████████████████████████████████████████████████| 391/391 [00:10<00:00, 36.28it/s]


Epoch [7/8], Loss: 0.8538


100%|█████████████████████████████████████████████████████████████| 391/391 [00:10<00:00, 36.06it/s]


Epoch [8/8], Loss: 0.7813
Testing Logistic Regression with lr: 1e-05, weight_decay: 1e-05, batch_size: 64


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.54it/s]


Epoch [1/8], Loss: 2.0092


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.21it/s]


Epoch [2/8], Loss: 1.4111


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.65it/s]


Epoch [3/8], Loss: 1.0795


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.54it/s]


Epoch [4/8], Loss: 0.8828


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 67.01it/s]


Epoch [5/8], Loss: 0.7560


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.74it/s]


Epoch [6/8], Loss: 0.6684


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 68.96it/s]


Epoch [7/8], Loss: 0.6051


100%|█████████████████████████████████████████████████████████████| 782/782 [00:11<00:00, 66.56it/s]


Epoch [8/8], Loss: 0.5571
Testing Logistic Regression with lr: 1e-05, weight_decay: 1e-05, batch_size: 128


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.79it/s]


Epoch [1/8], Loss: 2.1501


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.53it/s]


Epoch [2/8], Loss: 1.7286


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.09it/s]


Epoch [3/8], Loss: 1.4340


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.95it/s]


Epoch [4/8], Loss: 1.2223


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.28it/s]


Epoch [5/8], Loss: 1.0676


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.07it/s]


Epoch [6/8], Loss: 0.9508


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 34.88it/s]


Epoch [7/8], Loss: 0.8605


100%|█████████████████████████████████████████████████████████████| 391/391 [00:11<00:00, 35.18it/s]


Epoch [8/8], Loss: 0.7889
Testing FNN with lr: 0.001, weight_decay: 0.0001, batch_size: 64
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./CIFAR10_dataset/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43332921.96it/s]


Extracting ./CIFAR10_dataset/cifar-10-python.tar.gz to ./CIFAR10_dataset
Files already downloaded and verified


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 54.72it/s]


Epoch [1/8], Loss: 2.1221


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.37it/s]


Epoch [2/8], Loss: 2.0707


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.75it/s]


Epoch [3/8], Loss: 2.0524


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.53it/s]


Epoch [4/8], Loss: 2.0404


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.76it/s]


Epoch [5/8], Loss: 2.0342


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.57it/s]


Epoch [6/8], Loss: 2.0263


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.75it/s]


Epoch [7/8], Loss: 2.0202


100%|█████████████████████████████████████████████████████████████| 704/704 [00:12<00:00, 55.95it/s]


Epoch [8/8], Loss: 2.0173
Testing FNN with lr: 0.001, weight_decay: 0.0001, batch_size: 128
Files already downloaded and verified
Files already downloaded and verified


100%|█████████████████████████████████████████████████████████████| 352/352 [00:11<00:00, 29.70it/s]


Epoch [1/8], Loss: 2.1451


 43%|██████████████████████████▎                                  | 152/352 [00:04<00:06, 31.23it/s]

In [None]:
import timeit
from collections import OrderedDict

import torch
from torchvision import transforms, datasets

#from A1_submission import logistic_regression

torch.multiprocessing.set_sharing_strategy('file_system')


def compute_score(acc, acc_thresh):
    min_thres, max_thres = acc_thresh
    if acc <= min_thres:
        score = 0.0
    elif acc >= max_thres:
        score = 100.0
    else:
        score = float(acc - min_thres) / (max_thres - min_thres) \
                     * 100
    return score


def test(
        model,
        device,

):
    test_dataset = datasets.MNIST(
        root='./data',
        train=False,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))]))

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=False)

    model.eval()
    num_correct = 0
    total = 0
    for batch_idx, (data, targets) in enumerate(test_loader):
        data = data.to(device)
        targets = targets.to(device)

        with torch.no_grad():
            output = model(data)
            predicted = torch.argmax(output, dim=1)
            total += targets.size(0)
            num_correct += (predicted == targets).sum().item()

    acc = float(num_correct) / total
    return acc


class Args:
    """
    command-line arguments
    """

    """
    'logistic': run logistic regression on the specified dataset (part 1)
    'tune': run hyper parameter tuning (part 3)
    """
    mode = 'logistic'

    """
    metric with respect to which hyper parameters are to be tuned
    'acc': validation classification accuracy
    'loss': validation loss
    """
    target_metric = 'acc'
    # target_metric = 'loss'

    """
    set to 0 to run on cpu
    """
    gpu = 1


def main():
    args = Args()
    try:
        import paramparse
        paramparse.process(args)
    except ImportError:
        pass

    device = torch.device("cuda" if args.gpu and torch.cuda.is_available() else "cpu")

    acc_thresh = dict(
        logistic=(0.83, 0.93),
    )

    if args.mode == 'logistic':
        start = timeit.default_timer()
        results = logistic_regression(device)
        model = results['model']

        if model is None:
            print('model is None')
            return

        stop = timeit.default_timer()
        run_time = stop - start

        accuracy = test(
            model,
            device,
        )

        score = compute_score(accuracy, acc_thresh[args.mode])
        result = OrderedDict(
            accuracy=accuracy,
            score=score,
            run_time=run_time
        )
        print(f"result on {args.mode}:")
        for key in result:
            print(f"\t{key}: {result[key]}")



if __name__ == "__main__":
    main()


Epoch [1/15], Loss: 0.4864
Epoch [2/15], Loss: 0.3381
Epoch [3/15], Loss: 0.3149
Epoch [4/15], Loss: 0.3024
Epoch [5/15], Loss: 0.2941
Epoch [6/15], Loss: 0.2887
Epoch [7/15], Loss: 0.2843
Epoch [8/15], Loss: 0.2807
Epoch [9/15], Loss: 0.2778
Epoch [10/15], Loss: 0.2750
Epoch [11/15], Loss: 0.2728
Epoch [12/15], Loss: 0.2711
Epoch [13/15], Loss: 0.2691
Epoch [14/15], Loss: 0.2677
Epoch [15/15], Loss: 0.2662
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 47851260.51it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1970102.52it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 14309165.16it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3677708.26it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






result on logistic:
	accuracy: 0.9201
	score: 90.1
	run_time: 206.18473782100045
