In [1]:
import os
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision

In [2]:
# Must be run only once for running session
os.chdir("..")

In [3]:
from torch_trainer import (
    EarlyStopping, ModelCheckpoint,
    History, Profiler,
    Trainer 
)
from utils.losses import FocalLoss
from utils.metrics import MulticlassAccuracy, MulticlassPrecision, MulticlassRecall, MulticlassF1Score

#### Note: <br>
• **utils.losses** contains custom loss definitions <br>
• **utils.metrics** contains custom metric definitions <br>

# ------------------------------------------------------------

# 1) Model Profiling

#### Note: <br>
• It is good to perform profiling of the model before training so that we will get to know the model complexity. <br>
• If performing the profiling for device "cuda", it is suggested to set **"gpu_warmup"** to **True** for accurate time measurements. <br>

In [None]:
model = torchvision.models.mobilenet_v3_small()
sample_inputs = torch.randn((2, 3, 224, 224))

In [None]:
profiler = Profiler()
results = profiler(
    model=model, 
    inputs=sample_inputs,      # Single input = A torch.tensor(); Multiple inputs = A list of torch.tensor()'s
    devices=["cpu", "cuda"],   # A list of target device(s) e.g ["cpu"], ["cuda"], ["cpu", "cuda"]
    n_iters=10,                # Number of iterations to be performed
    gpu_warmup=True            # Must be set to True if devices=["cuda"] for warming up the gpu before profiling
)

In [None]:
# To get the results as pandas dataframe
results.to_pandas()

In [None]:
# To get the results as dictionary
results.to_dict()

# ------------------------------------------------------------

# 2) Model Types

#### Note: <br>
• Trainer requires the dataloader and the model to follow some I/O structure.

### 2.1) Single Input - Single Output

In [None]:
x = [i for i in range(1,51)]
y = [2 * i for i in range(1, 51)]

# 2.1.a) Dataloader
class DatasetPreprocessor(Dataset):
    """ 
    A Single Input - Single Output Dataset 
    Should return a sequence of (input, target)
    """
    def __init__(self, inputs: list, targets: list):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index: int):
        
        # input
        x = self.inputs[index]

        # target
        y = self.targets[index]

        return (
            np.array([x], dtype=np.float32),
            np.array([y], dtype=np.float32)
        )

dataset = DatasetPreprocessor(x, y)
dataloader = DataLoader(
                        dataset=dataset, 
                        batch_size=10,
                        shuffle=True, 
                        num_workers=0,
                        pin_memory=True
                    )

# 2.1.b) Model
class SISO(nn.Module):
    """ 
    A Single Input - Single Output Model 
    """
    def __init__(self):
        super().__init__()
        self.layer = example_conv_layer()

    def forward(self, x):
        out = self.layer(x)
        return out

model = SISO()
device = torch.device("cpu")

# 2.1.c) Instantiate Trainer
trainer = Trainer(model, device=device)

### 2.2) Single Input - Multiple Outputs

In [None]:
x = [i for i in range(1,51)]
y1 = [2 * i for i in range(1, 51)]
y2 = [3 * i for i in range(1, 51)]
y3 = [4* i for i in range(1, 51)]

# 2.2.a) Dataloader
class DatasetPreprocessor(Dataset):
    """ 
    A Single Input - Multiple Outputs Dataset.
    Should return a sequence of (input, target_1, ... target_N)
    """
    def __init__(self, inputs: list, targets1: list, targets2: list, targets3: list):
        self.inputs = inputs
        self.targets1 = targets1
        self.targets2 = targets2
        self.targets3 = targets3

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index: int):
        
        # input
        x = self.inputs[index]

        # targets
        y1 = self.targets1[index]
        y2 = self.targets2[index]
        y3 = self.targets3[index]

        return (
            np.array([x], dtype=np.float32),
            np.array([y1], dtype=np.float32),
            np.array([y2], dtype=np.float32),
            np.array([y3], dtype=np.float32)
        )

dataset = DatasetPreprocessor(x, y1, y2, y3)
dataloader = DataLoader(
                        dataset=dataset, 
                        batch_size=10,
                        shuffle=True, 
                        num_workers=0,
                        pin_memory=True
                    )

# 2.2.b) Model
class SIMO(nn.Module):
    """ 
    A Single Input - Multiple Outputs Model
    Should return a sequence of (outpu_1, ... output_N) 
    """
    def __init__(self):
        super().__init__()
        self.layer1 = example_conv_layer1()
        self.layer2 = example_conv_layer2()
        self.layer3 = example_conv_layer3()

    def forward(self, x):
        out1 = self.layer1(x)
        out2 = self.layer2(x)
        out3 = self.layer3(x)
        return out1, out2, out3
    
model = SIMO()
device = torch.device("cpu")

# 2.2.c) Instantiate Trainer
trainer = Trainer(model, device=device)

### 2.3) Multiple Inputs - Multiple Outputs

In [None]:
x1 = [i for i in range(1,51)]
x2 = [i for i in range(1,51)]
y1 = [2 * i for i in range(1, 51)]
y2 = [3 * i for i in range(1, 51)]
y3 = [4 * i for i in range(1, 51)]

# 2.3.a) Dataloader
class DatasetPreprocessor(Dataset):
    """ 
    A Single Input - Multiple Outputs Dataset 
    Should return a sequence of (input_1, ... input_N, target_1, ... target_N)
    """
    def __init__(self, inputs1: list, inputs2: list, targets1: list, targets2: list, targets3: list):
        self.inputs1 = inputs1
        self.inputs2 = inputs2
        self.targets1 = targets1
        self.targets2 = targets2
        self.targets3 = targets3

    def __len__(self):
        return len(self.inputs1)

    def __getitem__(self, index: int):
        
        # inputs
        x1 = self.inputs1[index]
        x2 = self.inputs2[index]

        # targets
        y1 = self.targets1[index]
        y2 = self.targets2[index]
        y3 = self.targets3[index]

        return (
            np.array([x1], dtype=np.float32),
            np.array([x2], dtype=np.float32),
            np.array([y1], dtype=np.float32),
            np.array([y2], dtype=np.float32),
            np.array([y3], dtype=np.float32)
        )

dataset = DatasetPreprocessor(x1, x2, y1, y2, y3)
dataloader = DataLoader(
                        dataset=dataset, 
                        batch_size=10,
                        shuffle=True, 
                        num_workers=0,
                        pin_memory=True
                    )

# 2.3.b) Model
class MIMO(nn.Module):
    """ 
    A Multiple Inputs - Multiple Outputs Model 
    Should return a sequence of (outpu_1, ... output_N)
    """
    def __init__(self):
        super().__init__()
        self.layer1 = example_conv_layer1()
        self.layer2 = example_conv_layer2()
        self.layer3 = example_conv_layer3()

    def forward(self, x1, x2):
        out1 = self.layer1(x1)
        out2 = self.layer2(x2)
        out3 = self.layer3(x2)
        return out1, out2, out3
    
model = MIMO()
device = torch.device("cpu")

# 2.3.c) Instantiate Trainer
trainer = Trainer(model, num_inputs=2, device=device)

# ------------------------------------------------------------

## Consider, we have the following things in place:

1) model  -> A model object of parent type **torch.nn**  e.g. torch.nn.Conv2d <br>
2) train_dataloader -> A dataloader object of type **torch.utils.data.DataLoader** <br>
3) [Optional] val_dataloader -> A dataloader object of type **torch.utils.data.DataLoader**

### The most basic usage of the Trainer would look as the following

In [None]:
model = MyModel()
device = torch.device("cuda")
criterion = FocalLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)


# Instantiate Trainer
trainer = Trainer(model, device=device)

# Compile Trainer
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,    
)

# Fit Trainer
history = trainer.fit(
    num_epochs=100,
    train_dataloader=train_dataloader
)

# OR if we have validation dataloader,
# then;
history = trainer.fit(
    num_epochs=100,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloder
)

# + Logging epoch results
history = trainer.fit(
    num_epochs=100,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloder,
    verbose=True,                   # Default value = False. Enables logging for epoch results (epoch_num, loss, lr, metrics(s))
    verbose_epochs_frequency=10     # Default value = 1. Frequency of epochs for logging. e.g. if set to 10, will log for every 10 epochs
    verbose_steps_frequency=50     # Default value = 0. Frequency of steps for logging (only for verbosed epochs).
)                                   # e.g. if set to 50, will log for every 50 steps (only for verbosed epochs)

# ------------------------------------------------------------

# 3) + Learning Rate Scheduler: OneCycleLR

In [None]:
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, 
    max_lr=1e-4,
    epochs=100,
    steps_per_epoch=10,  # len(train_dataloader)
    div_factor=3,
    pct_start=0.3,
    anneal_strategy='cos'
)

# Compile Trainer
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler
)

# ------------------------------------------------------------

# 4) + Metrics

In [None]:
# metrics
metric_accuracy = MulticlassAccuracy(num_classes=10, device=device)
metric_precision = MulticlassPrecision(num_classes=10, device=device)
metric_recall = MulticlassRecall(num_classes=10, device=device)
metric_f1_score = MulticlassF1Score(num_classes=10, device=device)

# Compile Trainer
## Single metric
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy
    }
)

## Multiple metrics
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    }
)

# ------------------------------------------------------------

# 5) + Callbacks

In [None]:
# Model checkpointing
checkpoint = ModelCheckpoint(
    root_dir="experiments",
    name="my_model",
    save_best_only=True,     # saves only the best model; if set to False, saves model for every epoch
    save_model_only=True     # saves only the model; if set to False, saves model + optimizer + scheduler if provided
)

# Early stopping
early_stopping = EarlyStopping(patience=7, verbose=True)

# Compile Trainer
## Only Model checkpointing
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[checkpoint]
)

## Only Early stopping
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[early_stopping]
)

## Both - Order does not matter
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[checkpoint, early_stopping]
)

# ------------------------------------------------------------

# 6) + Training Precisions

In [None]:
# Compile Trainer
## FP32 (If not specified, default Precision is FP32; Can be explicitly defined as well)
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[checkpoint, early_stopping],
    precision="32"    # Default value = "32"
)

## FP16 AMP (Automatic Mixed Precision)
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[checkpoint, early_stopping],
    precision="16-mixed"
)

## BFP16 AMP (Automatic Mixed Precision)
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[checkpoint, early_stopping],
    precision="bf16-mixed"
)

# ------------------------------------------------------------

# 7) + Gradient Accumulation

In [None]:
gradient_acc_steps = 8
if len(train_dataloader) % gradient_acc_steps == 0:
    scheduler_steps_per_epoch = int(len(train_dataloader) / gradient_acc_steps)
else:
    scheduler_steps_per_epoch = int(len(train_dataloader) / gradient_acc_steps) + 1

scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, 
    max_lr=1e-4,
    epochs=100,
    steps_per_epoch=scheduler_steps_per_epoch,  # num gradient accumulation steps per epoch
    div_factor=3,
    pct_start=0.3,
    anneal_strategy='cos'
)

# Compile Trainer
trainer.compile(
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    metrics={
        "accuracy" : metric_accuracy,
        "precision" : metric_precision,
        "recall" : metric_recall,
        "f1_score" : metric_f1_score
    },
    callbacks=[checkpoint, early_stopping],
    precision="32",
    gradient_acc=True,       # enables training with accumulation of gradients
    gradient_acc_steps=gradient_acc_steps     # the number of steps/ iterations that the gradients to be accumulated
)

# ------------------------------------------------------------

# 8) History

In [None]:
# Fit Trainer
history = trainer.fit(
    num_epochs=100,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloder
)

# see keys
print(history.keys())

### 8.1) Plot loss

In [None]:
## Plot both losses (train and val)
history.plot_loss()
### + save to file
history.plot_loss(save_path="/path/to/file.png")

## Plot only train loss
history.plot_loss(_type="train")
### + save to file
history.plot_loss(_type="train", save_path="/path/to/file.png")

## Plot only val loss
history.plot_loss(_type="val")
### + save to file
history.plot_loss(_type="val", save_path="/path/to/file.png")

### 8.2) Plot Metrics

In [None]:
## Plot all metrics (e.g accuracy, precision, recall, f1_score) - train and val
history.plot_metrics()

### + only plot train metrics
history.plot_metrics(_type="train")
### + save to file
history.plot_metrics(_type="train", save_path="/path/to/file.png")

### + only plot val metrics
history.plot_metrics(_type="val")
### + save to file
history.plot_metrics(_type="val", save_path="/path/to/file.png")

# -----------------------------------------------------------------------------------------

## Plot specific metric (e.g only accuracy)
history.plot_metric(name="accuracy")

### + only plot train metric
history.plot_metric(name="accuracy", _type="train")
### + save to file
history.plot_metric(name="accuracy", _type="train", save_path="/path/to/file.png")

### + only plot val metric
history.plot_metric(name="accuracy", _type="val")
### + save to file
history.plot_metric(name="accuracy", _type="val", save_path="/path/to/file.png")

### 8.3) View History

In [None]:
# as dictionary
hist_dict = history.to_dict()

# as pandas
hist_df = history.to_pandas()

### 8.4) Save History

In [None]:
# Save history (dict) to pickle file
history.save_history(save_path="/path/to/file.pkl")

# Save as csv file
history.to_csv(save_path="/path/to/file.csv")

### 8.5) Load History

In [None]:
history = History()

# Load history (dict) from pickle file
history.load_history(load_path="/load/from/file.pkl")

# ------------------------------------------------------------

# 9) Progress Bar

In [None]:
# Fit Trainer
history = trainer.fit(
    num_epochs=100,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloder,
    progress_bar=True        # Default value = False. Displays the progress bar for every epoch
)                            # with (epoch_num, loss, lr, metrics(s)).


#### Note: <br>
• Only **torch.optim.lr_scheduler.OneCycleLR** scheduler is supported by the Trainer as this scheduler is popular and widely used. It is based on a 2018 paper titled "Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates" (https://arxiv.org/abs/1708.07120) <br>

• Loss & Metrics must return an average value or reduction by 'mean'. The Loss function and Metric(s) functions(s) should accept arguments (preds, targets). If a model produces multiple outputs (output_1, output_2, ..., output_N), the respective metric function should access its desired output using index. e.g. preds[0] & target[0]. <br>  

• In checkpoint callbacks, it is suggested to set **"save_best_only"** to **True**, otherwise the Trainer will unnecessarily save the checkpoints for all epochs which results in increasing disk usage. <br>

• In checkpoint callbacks, if **"save_model_only"** is set to **True** then the Trainer will only save the model as checkpoint. If set to **False**, the Trainer will save the model, optimizer, scheduler as checkpoint. Trainer will create a checkpoint directory: **<root_dir>/<name>/runs_0/** and will save the checkpoints at this location. <br>

• Early Stopping will stop the training if the model is overfitting. It has a "patience" parameter that defines for how many upcoming epochs the training should continue (if validation loss is not improved since last epoch) before terminating the training. It only works if validation dataloader is passed to trainer.fit() method. <br>

• Gradient Accumulation should be used when you are facing OOM errors. Possible reasons: the model is huge (in terms of size, flops, and number of parameters) or large batch size is used for training. It helps in solving this OOM error, by let us train with less number of batch size, but updating the gradients for the desired batch size i.e. `gradient_acc_steps`.

• For "trainer.fit()" method, **progress_bar** and **verbose** both can be set to `False` or any one of them can be set to `False` and other to `True`. But if both are set to `True`, **progress_bar** will be selected and **verbose** will be disabled.