In [1]:
import torch
from torch.utils.data import DataLoader

from torchvision.models import alexnet, vgg16, googlenet, inception_v3, resnet18, densenet161
from torchvision.datasets import PCAM
import torchvision.transforms as transforms

from torcheval.metrics import MulticlassAUROC, MulticlassAccuracy

import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm as _tqdm
import ipywidgets as widgets
from IPython.core.display import display


[33m(Deprecated) Installing extensions with the jupyter labextension install command is now deprecated and will be removed in a future major version of JupyterLab.

Users should manage prebuilt extensions with package managers like pip and conda, and extension authors are encouraged to distribute their extensions as prebuilt packages [0m
usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

optional arguments:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as m

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


cuda


In [3]:
## Dataset and data loaders

transform = transforms.Compose([
    transforms.PILToTensor()
])

train_dataset = PCAM(root='data', split='train', download=True, transform=transform)
val_dataset = PCAM(root='data', split='val', download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)


In [10]:
## Model Selection

model_widget = widgets.Select(
    options=[('AlexNet', alexnet), ('VGG-16', vgg16), ('GoogleNet', googlenet), ('Inception-v3', inception_v3), ('ResNet-18', resnet18), ('DenseNet-161', densenet161)],
    value=alexnet,
    description='Model:',
    disabled=False,
)
display(model_widget)

Select(description='Model:', options=(('AlexNet', <function alexnet at 0x7fafaefcaa60>), ('VGG-16', <function …

In [13]:
## Model Initialization
 
print(f'Selected Model: {model_widget.value.__name__}')
model = model_widget.value(pretrained=True)
model.to(device)

# Freeze all layers except last
for param in model.parameters():
    param.requires_grad = False

# Create classification layer    
num_classes = 2
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

## Optimizer
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.01, momentum=0.9)

## Loss Function
loss_fun = torch.nn.CrossEntropyLoss()


Selected Model: resnet18


RuntimeError: CUDA error: CUDA-capable device(s) is/are busy or unavailable
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [5]:
def uniquify(path):
    """
    Creates unique path name by appending number if given path already exists
    """
    
    filename, extension = os.path.splitext(path)
    counter = 1

    while os.path.exists(path):
        path = filename + "_" + str(counter) + extension
        counter += 1

    return path


def tqdm(*args, **kwargs):
    """
    Wrapper for loop progress bar
    """
    
    return _tqdm(*args, **kwargs, mininterval=1)  # Safety, do not overflow buffer


def train(model, train_loader, val_loader, loss_fun, optimizer, num_epochs, num_classes, device, save_ckpt_path=None):
    """
    Trains model
    """
    
    model.to(device)
    
    # Create metric monitors
    auc = MulticlassAUROC(num_classes=num_classes)
    accuracy = MulticlassAccuracy()
    
    for epoch in range(num_epochs):
        
        # Set the model to train mode
        model.train()

        # Initialize the running loss and metrics
        curr_loss = 0.0
        auc.reset()
        accuracy.reset()
        
        ## Train
        for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}, Training'):
            
            # Move the inputs and labels to the device
            inputs = inputs.float().to(device)
            labels = labels.to(device)

            # Zero the optimizer gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = loss_fun(outputs, labels)

            # Backward pass and optimizer step
            loss.backward()
            optimizer.step()

            # Update the running loss and metrics
            curr_loss += loss.item() * inputs.size(0)
            auc.update(outputs, labels)
            accuracy.update(outputs, labels)

        # Calculate the train loss and metrics
        train_loss = curr_loss / len(train_dataset)
        train_acc = accuracy.compute()
        train_auc = auc.compute()

        # Set the model to evaluation mode
        model.eval()

        # Initialize the running loss
        curr_loss = 0.0
        
        # Initialize the metrics
        auc.reset()
        accuracy.reset()

        ## Validate
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs}, Validation'):
                
                # Move the inputs and labels to the device
                inputs = inputs.float().to(device)
                labels = labels.to(device)

                # Forward pass
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = loss_fun(outputs, labels)

                # Update the running loss and metrics
                curr_loss += loss.item() * inputs.size(0)
                auc.update(outputs, labels)
                accuracy.update(outputs, labels)

        # Calculate the validation loss, accuracy and AUC
        val_loss = curr_loss / len(val_dataset)
        val_acc = accuracy.compute()
        val_auc = auc.compute()

        # Print the epoch results
        print('Train Loss: {:.4f}, Train Acc: {:.4f}, Train AUC: {:.4f}, \n Val Loss: {:.4f}, Val Acc: {:.4f}, Val AUC: {:.4f}\n'
              .format(train_loss, train_acc, train_auc, val_loss, val_acc, val_auc))
        
        ## Save model checkpoint
        if save_ckpt_path is None:
            save_ckpt_path = os.path.join('models',f'{model.__class__.__name__}.pt')
            if not os.path.exists('models'):  # If folder 'models' doesn't exist, create it
                os.makedirs('models')
        save_ckpt_path = uniquify(save_ckpt_path)  # Create unique path name by appending number if given path already exists
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'train_acc': train_acc,
            'train_auc': train_auc,
            'val_loss': val_loss,
            'val_acc':val_acc,
            'val_auc': val_auc,
            }, save_ckpt_path)
        print(f'Saved checkpoint at: {save_ckpt_path}')
        

        

In [6]:
train(model, train_loader, val_loader, loss_fun, optimizer, num_epochs=5, num_classes = 2, device=device)

Epoch 1/5, Training: 100%|██████████| 8192/8192 [04:18<00:00, 31.64it/s]
Epoch 1/5, Validation: 100%|██████████| 1024/1024 [00:29<00:00, 34.23it/s]


Train Loss: 0.9527, Train Acc: 0.7503, Train AUC: 0.8198, 
 Val Loss: 1.0491, Val Acc: 0.7138, Val AUC: 0.8218


Epoch 2/5, Training: 100%|██████████| 8192/8192 [04:00<00:00, 34.12it/s]
Epoch 2/5, Validation: 100%|██████████| 1024/1024 [00:28<00:00, 35.50it/s]


Train Loss: 0.9426, Train Acc: 0.7517, Train AUC: 0.8207, 
 Val Loss: 1.5198, Val Acc: 0.6844, Val AUC: 0.8244


Epoch 3/5, Training: 100%|██████████| 8192/8192 [03:57<00:00, 34.42it/s]
Epoch 3/5, Validation: 100%|██████████| 1024/1024 [00:31<00:00, 32.42it/s]


Train Loss: 0.9563, Train Acc: 0.7496, Train AUC: 0.8196, 
 Val Loss: 1.2499, Val Acc: 0.7279, Val AUC: 0.8508


Epoch 4/5, Training: 100%|██████████| 8192/8192 [04:19<00:00, 31.59it/s]
Epoch 4/5, Validation: 100%|██████████| 1024/1024 [00:32<00:00, 31.86it/s]


Train Loss: 0.9953, Train Acc: 0.7487, Train AUC: 0.8180, 
 Val Loss: 0.7975, Val Acc: 0.7909, Val AUC: 0.8697


Epoch 5/5, Training: 100%|██████████| 8192/8192 [04:24<00:00, 30.97it/s]
Epoch 5/5, Validation: 100%|██████████| 1024/1024 [00:36<00:00, 28.40it/s]


Train Loss: 0.9534, Train Acc: 0.7523, Train AUC: 0.8222, 
 Val Loss: 0.8085, Val Acc: 0.7602, Val AUC: 0.8360
