# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# %reload_ext autoreload

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
import time
from itertools import islice
from dataclasses import dataclass

# pd.set_option('display.max_columns', 500)
# pd.set_option('display.max_rows', 500)
# import warnings
# warnings.filterwarnings('ignore')
# C:/Users/sshar/AppData/Roaming/jupyter/nbextensions/snippets /snippets.json (jupyter --data-dir)

In [4]:
from dataset import CheXpertDataset
import utils
from utils import vprint
from utils import to_gpu

# Configs 

In [5]:
@dataclass
class TrainingConfigs:
    DATA_DIR = r"..\data\CheXpert"
    CHECKPOINT_DIR = r"checkpoints"
    BATCH_SIZE = 4
    EPOCHS = 3
    LEARNING_RATE = 0.0001
    CHECKPOINT_TIME_INTERVAL = 3 # seconds
    MODEL_VERSION = "densenet121"
    TRAINED_MODEL_PATH = None
    TRAIN_LOADER_SIZE = None
    VALID_LOADER_SIZE = None

In [6]:
utils.set_seed()

# Training

## Training Setup

In [7]:
train_transform = transforms.Compose([
    transforms.Resize((320,320)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
valid_transform = transforms.Compose([
    transforms.Resize((320,320)),
    transforms.ToTensor(), 
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [8]:
# Create data loaders.
train_dataset = CheXpertDataset(mode='train', data_dir=TrainingConfigs.DATA_DIR, transform=train_transform)
train_dataloader = DataLoader(train_dataset, batch_size=TrainingConfigs.BATCH_SIZE, shuffle=True)
TrainingConfigs.TRAIN_LOADER_SIZE = len(train_dataloader)
len(train_dataset)

223414

In [9]:
valid_dataset = CheXpertDataset(mode='valid', data_dir=TrainingConfigs.DATA_DIR, transform=valid_transform)
valid_dataset.labels = valid_dataset.labels[:12] # hack for speed debugging
valid_dataloader = DataLoader(valid_dataset, batch_size=TrainingConfigs.BATCH_SIZE, shuffle=False)
TrainingConfigs.VALID_LOADER_SIZE = len(valid_dataloader)
len(valid_dataset)

12

In [10]:
torch.hub._validate_not_a_forked_repo = lambda a,b,c: True # workaround for torch.hub
model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=True)

Using cache found in C:\Users\sshar/.cache\torch\hub\pytorch_vision_v0.10.0


In [14]:
num_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Linear(num_features, num_features, bias=True),
    nn.ReLU(),
    nn.Dropout(p=0.1),
    nn.Linear(in_features=num_features, out_features=utils.Configs.NUM_CLASSES, bias=True)
)

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr=TrainingConfigs.LEARNING_RATE, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, mode='min')
criterion = nn.BCEWithLogitsLoss(reduction='mean') # combines BCEntropy and sigmoid
# final nn labels: torch.round(torch.sigmoid(pred))
# simple solution to handle the multi label problem (probabilities don't have to sum to 1)

## Training Loop 

In [16]:
model, results, last_epoch, last_iter = utils.get_previos_training_place(model, TrainingConfigs)
model.train()
model = to_gpu(model)
start_time = time.time()
for epoch in range(last_epoch, TrainingConfigs.EPOCHS):
    train_dataloader_iter = islice(tqdm(enumerate(train_dataloader), total=len(train_dataloader)), 
                                   last_iter+1, len(train_dataloader)) # fast foward dataloader
    for i, (images, labels) in train_dataloader_iter:
        images = to_gpu(images)
        labels = to_gpu(labels)        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        results['train_loss'].append(loss.item())
        if time.time()-start_time > TrainingConfigs.CHECKPOINT_TIME_INTERVAL:
            utils.create_checkpoint(model, epoch, i, valid_dataloader, criterion, results, TrainingConfigs)
            start_time = time.time()
    scheduler.step(np.mean(results["valid_loss"][-len(train_dataloader):]))

Loaded model - epoch:0, iter:18


  0%|          | 0/55854 [00:00<?, ?it/s]

2022_07_05-21_10: Checkpoint Created.
Epoch [1/3],   Iter [19/55853],   Train Loss: 0.5585,   Valid Loss: 0.5115,   Valid AUC: 0.0097

2022_07_05-21_10: Checkpoint Created.
Epoch [1/3],   Iter [20/55853],   Train Loss: 0.5663,   Valid Loss: 0.5121,   Valid AUC: 0.0113

2022_07_05-21_10: Checkpoint Created.
Epoch [1/3],   Iter [21/55853],   Train Loss: 0.5658,   Valid Loss: 0.5117,   Valid AUC: 0.0137

2022_07_05-21_10: Checkpoint Created.
Epoch [1/3],   Iter [22/55853],   Train Loss: 0.5615,   Valid Loss: 0.5097,   Valid AUC: 0.0133

2022_07_05-21_11: Checkpoint Created.
Epoch [1/3],   Iter [23/55853],   Train Loss: 0.5573,   Valid Loss: 0.5102,   Valid AUC: 0.0125

2022_07_05-21_11: Checkpoint Created.
Epoch [1/3],   Iter [24/55853],   Train Loss: 0.5540,   Valid Loss: 0.5107,   Valid AUC: 0.0123

2022_07_05-21_11: Checkpoint Created.
Epoch [1/3],   Iter [25/55853],   Train Loss: 0.5510,   Valid Loss: 0.5105,   Valid AUC: 0.0143

2022_07_05-21_11: Checkpoint Created.
Epoch [1/3],   It

KeyboardInterrupt: 