In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
from sklearn.svm import SVR

import matplotlib.pyplot as plt

from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch

import time
import pandas as pd
import numpy as np

datapath = '../data-unversions/p1ch7/'
cifar10 = datasets.CIFAR10(root= datapath, train=True, download = True, transform=transforms.ToTensor())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversions/p1ch7/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:05<00:00, 30.8MB/s]


Extracting ../data-unversions/p1ch7/cifar-10-python.tar.gz to ../data-unversions/p1ch7/


Pre-Processing.

In [2]:
imgs = torch.stack([img_t for img_t, _ in cifar10], dim=3)
mean = imgs.view(3, -1).mean(dim=1)
std = imgs.view(3, -1).std(dim=1)

normalize = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean,std)])

train_data = datasets.CIFAR10(root= datapath, train=True, download = True, transform=normalize)
cifar10_train = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

valid_data = datasets.CIFAR10(root= datapath, train=False, download = True, transform=normalize)
cifar10_valid = torch.utils.data.DataLoader(valid_data, batch_size=64, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


Training Loop Structure

In [3]:
def training(epochs, optimizer, model, loss_fn, train_set, valid_set):
  trainStart = time.time()
  for epoch in range(epochs+1):
    for imgs, labels in train_set:
      batch = imgs.shape[0]
      outputs = model(imgs.view(batch, -1))
      train_loss = loss_fn(outputs, labels)

      optimizer.zero_grad()
      train_loss.backward()
      optimizer.step()

    if epoch % 10 == 0:
      print("Epoch: %d, Training Loss: %f" % (epoch, float(train_loss)))
  trainStop = time.time()
  trainDuration = trainStop - trainStart

  print(f'Total Training Time: {trainDuration} seconds')

### Validation Evaluation ###

  total = 0
  correctClass = 0
  validStart = time.time()
  with torch.no_grad():
    for imgs, labels in valid_set:
      batch = imgs.shape[0]
      outputs = model(imgs.view(batch, -1))
      _, predicted = torch.max(outputs, dim=1)
      total += labels.shape[0]
      correctClass += int((predicted == labels).sum())

  validStop = time.time()
  validDuration = validStop - validStart

  print(f'\nValidation Accuracy: {(correctClass/total) * 100}%')
  print(f'Total Validation Time: {validDuration} seconds')
  print(f'Total Runtime: {trainDuration + validDuration} seconds')

3a. SGD Optimized Neural Network with 1 hidden layer

In [4]:
seq_model = nn.Sequential(
    nn.Linear(3072, 256),
    nn.ReLU(),
    nn.Linear(256, 10))

optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training(
    epochs = 100,
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.CrossEntropyLoss(),
    train_set = cifar10_train,
    valid_set = cifar10_valid
)

Epoch: 0, Training Loss: 1.798072
Epoch: 10, Training Loss: 1.392652
Epoch: 20, Training Loss: 1.493782
Epoch: 30, Training Loss: 1.630089
Epoch: 40, Training Loss: 1.889883
Epoch: 50, Training Loss: 1.571380
Epoch: 60, Training Loss: 1.316353
Epoch: 70, Training Loss: 1.646533
Epoch: 80, Training Loss: 0.631608
Epoch: 90, Training Loss: 1.143018
Epoch: 100, Training Loss: 1.233894
Total Training Time: 1400.026341676712 seconds

Validation Accuracy: 53.55%
Total Validation Time: 2.264516830444336 seconds
Total Runtime: 1402.2908585071564 seconds


3b. SGD Optimized Neural Network with two more hidden layers

In [5]:
seq_model = nn.Sequential(
    nn.Linear(3072, 256),
    nn.ReLU(),
    nn.Linear(256, 512),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Linear(128,10))

optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training(
    epochs = 100,
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.CrossEntropyLoss(),
    train_set = cifar10_train,
    valid_set = cifar10_valid
)

Epoch: 0, Training Loss: 2.265142
Epoch: 10, Training Loss: 1.711616
Epoch: 20, Training Loss: 1.616932
Epoch: 30, Training Loss: 1.511662
Epoch: 40, Training Loss: 1.408389
Epoch: 50, Training Loss: 1.193405
Epoch: 60, Training Loss: 0.802058
Epoch: 70, Training Loss: 0.973443
Epoch: 80, Training Loss: 1.296362
Epoch: 90, Training Loss: 1.484643
Epoch: 100, Training Loss: 0.756942
Total Training Time: 1500.6941385269165 seconds

Validation Accuracy: 52.2%
Total Validation Time: 2.5166783332824707 seconds
Total Runtime: 1503.210816860199 seconds


In [6]:
# Comparison Function for Two Models
def compare_models(model1, model2, train1_loss, train2_loss, val1_acc, val2_acc):
    print("### Comparison of Models ###\n")

    print("Model 1: Single Hidden Layer")
    print(f"Final Training Loss: {train1_loss:.4f}")
    print(f"Validation Accuracy: {val1_acc:.2f}%\n")

    print("Model 2: Multiple Hidden Layers")
    print(f"Final Training Loss: {train2_loss:.4f}")
    print(f"Validation Accuracy: {val2_acc:.2f}%\n")

    # Determine the better model
    if val1_acc > val2_acc:
        print("Conclusion: Model 1 performs better on validation accuracy.")
    elif val1_acc < val2_acc:
        print("Conclusion: Model 2 performs better on validation accuracy.")
    else:
        print("Conclusion: Both models have equal validation performance.")

# Analysis section to call the comparison function
# Assuming the following are the metrics from training the two models:
# Model 1: Single Hidden Layer
train1_loss = 0.670261  # Replace with actual value
val1_acc = 53.71       # Replace with actual value

# Model 2: Multiple Hidden Layers
train2_loss = 0.697456  # Replace with actual value
val2_acc = 52.93        # Replace with actual value

# Call comparison function
compare_models("Model 1", "Model 2", train1_loss, train2_loss, val1_acc, val2_acc)


### Comparison of Models ###

Model 1: Single Hidden Layer
Final Training Loss: 0.6703
Validation Accuracy: 53.71%

Model 2: Multiple Hidden Layers
Final Training Loss: 0.6975
Validation Accuracy: 52.93%

Conclusion: Model 1 performs better on validation accuracy.
