# Chapter 2.1: "AlexNet"

AlexNet, developed by Alex Krizhevsky et al., is a convolutional neural network (CNN) and generally seen as the one to help CNNs to become the dominant image recognition architecture (up until Visual Transformers replaced them). It single-handedly lifted the field of computer vision from what can almost be described as obscurity and heralded a rennaissance era of explosive growth in terms of funding, researchers, GPU capabilities, new architectures, and, ultimately, applications, by making use of multi-GPU training.

Hopefully, you have paid attention during the presentation. At any rate, you can find the paper here: https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html

Over the next sessions, we will recreate this milestone architecture (and a few others) in PyTorch.

In [1]:
import sys
sys.path.append("/datashare/MLCourse/Course_Materials") # Preferentially import from the datashare.
sys.path.append("../") # Otherwise, import from the local folder's parent folder, where your stuff lives.

import numpy as np
import time
import torch, torch.nn as nn
import torchvision, torchvision.transforms as tt
from torch.multiprocessing import Manager
torch.multiprocessing.set_sharing_strategy("file_system")

from utility import utils as uu
from utility.eval import evaluate_classifier_model

### TASK: Add some data augmentations of your choice (or None, if you want to test something else).

In [None]:
# TODO: Your data augments go here
data_augments = None

In [None]:
# Train, Val, and Test datasets are all contained within this dataset.
# They can be selected by setting 'ds.set_mode(selection)'.

# We could also cache any data we read from disk to shared memory, or
# to regular memory, where each dataloader worker caches the entire
# dataset. Option 1 creates more overhead than gain for this problem,
# while option 2 requires more memory than we have. Hence, we still
# read everything from disk.

cache_me = False
if cache_me is True:
    cache_mgr = Manager()
    cache_mgr.data = cache_mgr.dict()
    cache_mgr.cached = cache_mgr.dict()
    for k in ["train", "val", "test"]:
        cache_mgr.data[k] = cache_mgr.dict()
        cache_mgr.cached[k] = False

ds = uu.LiTS_Classification_Dataset(
    data_dir = "/home/coder/Course_Materials/data/Clean_LiTS/",
    transforms = data_augments,
    verbose = True,
    cache_data = cache_me,
    cache_mgr = (cache_mgr if cache_me is True else None),
    debug = True,
)

### TASK: Play around with the hyperparameters (if you feel like it).

In [3]:
# Default settings
batch_size = 32
learning_rate = 1e-4
weight_decay = 5e-6
epochs = 10
run_name = "AlexNet"
device = ("cuda" if torch.cuda.is_available() else "cpu")
time_me = True

In [4]:
# Dataloader
dl = torch.utils.data.DataLoader(
    dataset = ds, 
    batch_size = batch_size, 
    num_workers = 4, 
    shuffle = True, 
    drop_last = False, 
    pin_memory = True,
    persistent_workers = (not cache_me),
    prefetch_factor = 1
    )

### TASK: Construct AlexNet (this one you have to do).

In [5]:
# Stand-in example model (if you want to test something else)
model = torchvision.models.resnet18()
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = nn.Linear(model.fc.in_features, 3)
model = model.to(device)

In [6]:
# Your implementation
class AlexNet(torch.nn.Module):
    pass

In [None]:
# Create an instance of your model
# model = AlexNet()
# model.to(device)

In [7]:
optimizer = torch.optim.AdamW(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
criterion = nn.CrossEntropyLoss()

In [None]:
if time_me is True:
    c_start = time.time()

num_steps = len(ds.file_names['train'])//batch_size

for epoch in range(epochs):
    
    # If we are caching, we now have all data and let the (potentially non-persistent) workers know
    if cache_me is True and epoch > 0:
        dl.dataset.set_cached("train")
        dl.dataset.set_cached("val")
    
    # Time me
    if time_me is True:
        e_start = time.time()

    # Go to train mode
    ds.set_mode("train")
    model.train()

    # Train loop
    for step, (data, targets) in enumerate(dl):

        # Manually drop last batch (this is for example relevant with BatchNorm)
        if step == num_steps - 1 and (epoch > 0 or ds.cache_data is False):
            continue

        # Train loop: Zero gradients, forward step, evaluate, log, backward step
        optimizer.zero_grad()
        data, targets = data.to(device), targets.to(device)
        if time_me is True:
            c_end = time.time()
            if step % 100 == 0:
                print(f"CPU time: {c_end-c_start:.4f}s")
            g_start = time.time()
        predictions = model(data)
        if time_me is True:
            g_end = time.time()
            c_start = time.time()
        if step % 100 == 0 and time_me is True:
            print(f"GPU time: {g_end-g_start:.4f}s")
        loss = criterion(predictions, targets)
        if step % 100 == 0:
            print(f"Epoch [{epoch+1}/{epochs}]\t Step [{step+1}/{num_steps}]\t Train Loss: {loss.item():.4f}")
        uu.csv_logger(
            logfile = f"../logs/{run_name}_train.csv",
            content = {"epoch": epoch, "step": step, "loss": loss.item()},
            first = (epoch == 0 and step == 0),
            overwrite = (epoch == 0 and step == 0)
                )
        loss.backward()
        optimizer.step()

    # Go to eval mode
    ds.set_mode("val")
    model.eval()

    # Validation loop
    val_accuracy, avg_val_loss = evaluate_classifier_model(model = model, dataloader = dl, device = device)
    print(f"Epoch [{epoch+1}/{epochs}]\t Val Loss: {avg_val_loss:.4f}\t Val Accuracy: {val_accuracy:.4f}")
    uu.csv_logger(
        logfile = f"../logs/{run_name}_val.csv",
        content = {"epoch": epoch, "val_loss": avg_val_loss, "val_accuracy": val_accuracy},
        first = (epoch == 0),
        overwrite = (epoch == 0)
            )
        
    if time_me is True:
        print(f"Epoch time: {time.time()-e_start:.4f}s")

# Finally, test time
ds.set_mode("test")
model.eval()

test_accuracy, avg_test_loss = evaluate_classifier_model(model = model, dataloader = dl, device = device)
print(f"Epoch [{epoch+1}/{epochs}]\t Test Loss: {avg_test_loss:.4f}\t Test Accuracy: {test_accuracy:.4f}")
uu.csv_logger(
    logfile = f"../logs/{run_name}_test.csv",
    content = {"epoch": epoch, "test_loss": avg_test_loss, "test_accuracy": test_accuracy},
    first = True,
    overwrite = True
        )