# CNN Model 🔍

### Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import os
from pathlib import Path
import json
import transformers
import numpy as np
from utils import *
import matplotlib as mpl
from models import CNN9, CNN100
mpl.rcParams['figure.dpi'] = 500
mpl.rcParams['figure.figsize'] = [7, 5]
mpl.rcParams['savefig.pad_inches'] = 0

## 9-way Composer

In [2]:
output_model9_path = Path("./cnn9/")
output_model9_path.mkdir(exist_ok=True)
batch_size = 64
epochs = 48
learning_rate = 5e-3
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

### Load data and define model

$60/15/15$ split b/w train, val, test data


$X, y$ are numpy arrays of size $(n, 64, 62), (n,)$ respectively

In [3]:
train_dataset, val_dataset, test_dataset, label2id, id2label = create_dataset("/mnt/data0/BSCRC/data/9_way_dataset.pkl", add_pad=False)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model9 = CNN9()
if torch.cuda.device_count() > 1:
    model9 = nn.DataParallel(model9)
model9.to(device)

optimizer = torch.optim.Adam(model9.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

### Train classifier

In [None]:
num_training_steps = len(trainloader) * epochs
progress_bar = tqdm(range(num_training_steps))
log_history = []
lr_scheduler = transformers.get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

for epoch in range(epochs):

    model9.train()
    train_loss = 0.0
    
    # training loop
    for data in trainloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad() # zero the parameter gradients
        outputs = model9(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        progress_bar.update(1)
        train_loss += loss.item()

    train_loss /= len(trainloader)
    
    model9.eval()
    val_loss = 0.0
    correct1, correct5, total = 0, 0, 0

    # validation loop
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model9(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, top1 = torch.topk(outputs.data, k=1, dim=1)
            _, top5 = torch.topk(outputs.data, k=5, dim=1)

            total += labels.size(0)
            correct1 += torch.sum(top1 == labels[:, None], dim=1).sum().item()
            correct5 += torch.sum(top5 == labels[:, None], dim=1).sum().item()

    val_loss /= len(valloader)
    val_acc1 = correct1 / total
    val_acc5 = correct5 / total

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}, val_acc1: {val_acc1:.3f}, val_acc5: {val_acc5:.3f}")
    log_history.append({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss, "val_acc1": val_acc1, "val_acc5": val_acc5})
    torch.save(model9.state_dict(), f"{output_model9_path}/epoch_{epoch}.pth")

with open(output_model9_path/"log_history.json", "w") as f:
    json.dump(log_history, f) 

### Plot training curves

In [None]:
def plot_train(path):

    plt.style.use("ggplot")

    with open(path/"log_history.json", "r") as f:
        log = json.load(f)

    epochs = [metric['epoch'] for metric in log]
    train_loss = [metric['train_loss'] for metric in log]
    val_loss = [metric['val_loss'] for metric in log]
    val_acc1 = [metric['val_acc1'] for metric in log]
    val_acc5 = [metric['val_acc5'] for metric in log]

    best_index = val_loss.index(min(val_loss))
    best_acc_index = val_acc1.index(max(val_acc1))
    
    plt.plot(epochs, train_loss, color="royalblue", label=f'Train (low {train_loss[best_index]:.3f})', linewidth=2.5)
    plt.plot(epochs, val_loss, color="lightseagreen", label=f'Validation (low {val_loss[best_index]:.3f})', linewidth=2.5)
    plt.scatter(epochs, train_loss, color="royalblue")
    plt.scatter(epochs, val_loss, color="lightseagreen")
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.tick_params(left=False, right=False)
    plt.legend(fontsize=13, fancybox=True, borderpad=0.8, labelspacing=0.8, borderaxespad=1.8, framealpha=0.7, facecolor='white')
    
    print(f"Lowest Validation Loss: {val_loss[best_index]:.5f}")
    print(f"Corresponding Train Loss: {train_loss[best_index]:.5f}")

    print(f"Best Validation Acc@1: {val_acc1[best_acc_index]:.5f}")
    print(f"Corresponding Validation Acc@5: {val_acc5[best_acc_index]:.5f}")
    plt.show()

In [None]:
plot_train(output_model9_path)

### Save last epoch validation/test predictions to disk

In [None]:
model9 = CNN9()
model9.load_state_dict(torch.load(output_model9_path/"epoch_47.pth"))
if torch.cuda.device_count() > 1:
    model9 = nn.DataParallel(model9)
model9.to(device)

model9.eval()

# validation evaluation loop
val_preds, val_correct1, val_correct5, val_total = [], 0, 0, 0
with torch.no_grad():
    for data in valloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model9(inputs)

        _, top1 = torch.topk(outputs.data, k=1, dim=1)
        _, top5 = torch.topk(outputs.data, k=5, dim=1)

        val_total += labels.size(0)
        val_correct1 += torch.sum(top1 == labels[:, None], dim=1).sum().item()
        val_correct5 += torch.sum(top5 == labels[:, None], dim=1).sum().item()

        val_preds.append(outputs.logits.cpu().numpy())

val_acc1 = correct1 / total
val_acc5 = correct5 / total

print(f"Validation Accuracy Top 1: {val_acc1}")
print(f"Validation Accuracy Top 5: {val_acc5}")

val_preds = np.concatenate(val_preds, axis=0)
with open(f"{output_model9_path}/val_preds.npy", "wb") as f:
    np.save(f, val_preds)

In [None]:
# test evaluation loop
test_preds, test_correct1, test_correct5, test_total = [], 0, 0, 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model9(inputs)

        _, top1 = torch.topk(outputs.data, k=1, dim=1)
        _, top5 = torch.topk(outputs.data, k=5, dim=1)

        test_total += labels.size(0)
        test_correct1 += torch.sum(top1 == labels[:, None], dim=1).sum().item()
        test_correct5 += torch.sum(top5 == labels[:, None], dim=1).sum().item()

        test_preds.append(outputs.logits.cpu().numpy())

test_acc1 = correct1 / total
test_acc5 = correct5 / total

print(f"Test Accuracy Top 1: {test_acc1}")
print(f"Test Accuracy Top 5: {test_acc5}")

test_preds = np.concatenate(test_preds, axis=0)
with open(f"{output_model9_path}/test_preds.npy", "wb") as f:
    np.save(f, test_preds)

## 100-way Composer

In [5]:
output_model100_path = Path("./cnn100/")
os.makedirs(output_model100_path , exist_ok=True)
batch_size = 64
epochs = 48
learning_rate = 5e-3
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

### Load data and define model

In [6]:
train_dataset, val_dataset, test_dataset, label2id, id2label = create_dataset("/mnt/data0/BSCRC/data/100_way_dataset.pkl", add_pad=False)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model100 = CNN100()
if torch.cuda.device_count() > 1:
    model100 = nn.DataParallel(model9)
model100.to(device)

optimizer = torch.optim.Adam(model100.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

### Train classifier

In [None]:
num_training_steps = len(trainloader) * epochs
progress_bar = tqdm(range(num_training_steps))
log_history = []
lr_scheduler = transformers.get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

for epoch in range(epochs):

    model100.train()
    train_loss = 0.0
    
    # training loop
    for data in trainloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad() # zero the parameter gradients
        outputs = model100(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        progress_bar.update(1)
        train_loss += loss.item()

    train_loss /= len(trainloader)
    
    model100.eval()
    val_loss = 0.0
    correct1, correct5, total = 0, 0, 0

    # validation loop
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model100(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, top1 = torch.topk(outputs.data, k=1, dim=1)
            _, top5 = torch.topk(outputs.data, k=5, dim=1)

            total += labels.size(0)
            correct1 += torch.sum(top1 == labels[:, None], dim=1).sum().item()
            correct5 += torch.sum(top5 == labels[:, None], dim=1).sum().item()

    val_loss /= len(valloader)
    val_acc1 = correct1 / total
    val_acc5 = correct5 / total

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}, val_acc1: {val_acc1:.3f}, val_acc5: {val_acc5:.3f}")
    log_history.append({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss, "val_acc1": val_acc1, "val_acc5": val_acc5})
    torch.save(model100.state_dict(), f"{output_model100_path}/epoch_{epoch}.pth")

with open(output_model100_path/"log_history.json", "w") as f:
    json.dump(log_history, f) 

### Plot training curves

plot_train(output_model100_path)

### Save last epoch validation/test predictions to disk

In [None]:
model100 = CNN100()
model100.load_state_dict(torch.load(output_model100_path/"epoch_47.pth"))
if torch.cuda.device_count() > 1:
    model100 = nn.DataParallel(model100)
model100.to(device)

model100.eval()

# validation evaluation loop
val_preds, val_correct1, val_correct5, val_total = [], 0, 0, 0
with torch.no_grad():
    for data in valloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model100(inputs)

        _, top1 = torch.topk(outputs.data, k=1, dim=1)
        _, top5 = torch.topk(outputs.data, k=5, dim=1)

        val_total += labels.size(0)
        val_correct1 += torch.sum(top1 == labels[:, None], dim=1).sum().item()
        val_correct5 += torch.sum(top5 == labels[:, None], dim=1).sum().item()

        val_preds.append(outputs.logits.cpu().numpy())

val_acc1 = correct1 / total
val_acc5 = correct5 / total

print(f"Validation Accuracy Top 1: {val_acc1}")
print(f"Validation Accuracy Top 5: {val_acc5}")

val_preds = np.concatenate(val_preds, axis=0)
with open(f"{output_model100_path}/val_preds.npy", "wb") as f:
    np.save(f, val_preds)

In [None]:
# test evaluation loop
test_preds, test_correct1, test_correct5, test_total = [], 0, 0, 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model100(inputs)

        _, top1 = torch.topk(outputs.data, k=1, dim=1)
        _, top5 = torch.topk(outputs.data, k=5, dim=1)

        test_total += labels.size(0)
        test_correct1 += torch.sum(top1 == labels[:, None], dim=1).sum().item()
        test_correct5 += torch.sum(top5 == labels[:, None], dim=1).sum().item()

        test_preds.append(outputs.logits.cpu().numpy())

test_acc1 = correct1 / total
test_acc5 = correct5 / total

print(f"Test Accuracy Top 1: {test_acc1}")
print(f"Test Accuracy Top 5: {test_acc5}")

test_preds = np.concatenate(test_preds, axis=0)
with open(f"{output_model100_path}/test_preds.npy", "wb") as f:
    np.save(f, test_preds)