# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [17]:
%reload_ext autoreload

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
import time
from itertools import islice
from dataclasses import dataclass
import torchvision
from torchvision.models import densenet161, DenseNet161_Weights, vit_b_16, ViT_B_16_Weights, densenet121, DenseNet121_Weights
import os
import sys
from pathlib import Path

In [9]:
sys.path.append(str(Path.cwd().parent.parent))
from dataset import CheXpertDiseaseDataset
from CheXpert.shared_utils import vprint, to_gpu
from CheXpert import shared_utils
from utils import Configs

# Configs 

In [7]:
@dataclass
class TrainingConfigs(Configs):
    DATA_DIR = os.path.join("..", "..", "data", "CheXpert", "CheXpert-v1.0-small")
    TRAIN_LABELS_FILENAME = "train.csv"
    VALID_LABELS_FILENAME = "valid_small.csv"
    CHECKPOINT_DIR = r"checkpoints"
    BATCH_SIZE = 4
    EPOCHS = 10
    LEARNING_RATE = 0.0001
    CHECKPOINT_TIME_INTERVAL = 5 # seconds
    MODEL_VERSION = "densenet161"
    TRAINED_MODEL_PATH = os.path.join("checkpoints", 
                                      "2022_07_09-10_06__densenet161__epoch-4__iter-17793__trainLastLoss-0.3651__validAUC-0.8895.dict")
    TRAIN_LOADER_SIZE = None
    VALID_LOADER_SIZE = None

In [None]:
shared_utils.set_seed(TrainingConfigs.SEED)

In [32]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    vprint(torch.cuda.memory_summary(device=None, abbreviated=False), TrainingConfigs)

# Training

## Training Setup

In [11]:
train_transform = transforms.Compose([
    transforms.Resize((320,320)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

valid_transform = transforms.Compose([
    transforms.Resize((320,320)),
    transforms.ToTensor(), 
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [12]:
# Create data loaders.
train_dataset = CheXpertDiseaseDataset(data_dir=TrainingConfigs.DATA_DIR, 
                                       labels_filename=TrainingConfigs.TRAIN_LABELS_FILENAME,
                                       transform=train_transform)
train_dataloader = DataLoader(train_dataset, batch_size=TrainingConfigs.BATCH_SIZE, shuffle=False)
TrainingConfigs.TRAIN_LOADER_SIZE = len(train_dataloader)
len(train_dataset)

223414

In [13]:
valid_dataset = CheXpertDiseaseDataset(data_dir=TrainingConfigs.DATA_DIR, 
                                       labels_filename=TrainingConfigs.VALID_LABELS_FILENAME,
                                       transform=valid_transform)
valid_dataloader = DataLoader(valid_dataset, batch_size=TrainingConfigs.BATCH_SIZE, shuffle=False)
TrainingConfigs.VALID_LOADER_SIZE = len(valid_dataloader)
len(valid_dataset)

10

In [14]:
model = densenet161(weights=DenseNet161_Weights.DEFAULT)
num_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Linear(num_features, num_features, bias=True),
    nn.ReLU(),
    nn.Dropout(p=0.1),
    nn.Linear(in_features=num_features, out_features=TrainingConfigs.NUM_CLASSES, bias=True)
)

In [15]:
optimizer = torch.optim.Adam(model.parameters(), lr=TrainingConfigs.LEARNING_RATE, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, mode='min')
criterion = nn.BCEWithLogitsLoss(reduction='mean') # combines BCEntropy and sigmoid
# final nn labels: torch.round(torch.sigmoid(pred))
# simple solution to handle the multi label problem (probabilities don't have to sum to 1)

## Training Loop 

In [31]:
model, results, last_epoch, last_iter = shared_utils.get_previous_training_place(model, TrainingConfigs)
score_dict = {
    "auc": "valid_auc",
    "loss": "valid_loss"
}
model.train()
model = to_gpu(model)
start_time = time.time()
vprint("Start Training", TrainingConfigs)
for epoch in range(last_epoch, TrainingConfigs.EPOCHS):
    train_dataloader_iter = islice(tqdm(enumerate(train_dataloader), total=len(train_dataloader)), 
                                   last_iter+1, len(train_dataloader)) # fast foward dataloader
    for i, (images, labels) in train_dataloader_iter:
        images = to_gpu(images)
        labels = to_gpu(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        results['train_loss'].append(loss.item())
        if time.time()-start_time > TrainingConfigs.CHECKPOINT_TIME_INTERVAL:
            shared_utils.create_checkpoint(model, epoch, i, valid_dataloader, criterion, results, TrainingConfigs,
                                    score_dict, by_study='max', challenge_ann_only=True)
            start_time = time.time()
    scheduler.step(np.mean(results["valid_loss"][-len(train_dataloader):]))

2022-07-14 11:31: Start Training


  0%|          | 0/55854 [00:00<?, ?it/s]

2022-07-14 11:33: 2022_07_14-11_33: Checkpoint Created For densenet161.
2022-07-14 11:33: Epoch [5/10],   Iter [1/55853],   Train Loss: 0.3673,   Valid Loss: 0.9361,   Valid AUC: 0.9583



KeyboardInterrupt: 