# Train your model

In [1]:
import cv2
import os
import random
import time
import warnings

import librosa
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from catalyst.dl import SupervisedRunner, CallbackOrder, Callback, CheckpointCallback

In [2]:
from csrc.utils import TrainingDirs as TD
from csrc.configurations import DatasetConfig as DC
from csrc.configurations import ModelConfig as MC
from csrc.utils import seed_dataset, seed_all 

## Train configurations

In [3]:
# For better debugging.

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [4]:
### The folder name of your dataset.
DATASET = "standard-p2-32khz"
# DATASET = "test-p2"

### Whether you have split your dataset.
### If False then the test dataset will be generated as configured in TrainParams and choose the split ratio.
BUILD_TEST = False
PREBUILD_TEST = False
TEST_RATIO = 5

### The ratio to split your train/validaion dataset.
VALID_RATIO = 5
### Whether to shuffle the dataset.
SHUFFLE = True

### Clip length that will be used for training.
### Default to be the same as the audio clip length in the dataset.
PERIOD = DC.dataset_clip_time
print(f"Training clip length (sencods): {PERIOD}")

### Batch size for training. For example: 8gb GPU for 5s clips - batch size 32.
BS = 64

### Training epochs.
EPOCHS = 30

### Weights file path used for training.
### Default under weights folder.
WEIGHTS_PATH = "./weights/Cnn14_DecisionLevelAtt_mAP0.425.pth"

### Default path to store your model.
LOG_DIR = "./train/logs/sp2-32000hz/"

Training clip length (sencods): 2


In [5]:
# Random seeding.
# Change seed will change your validation set randomly picked from the dataset.

SEED = 42
seed_all(SEED)
seed_dataset(SEED)

## Process

In [6]:
# Set up working folder for training.

dirs = TD(DATASET, PREBUILD_TEST)
DATASET_FOLDER = dirs.dataset_folder
TRAIN_FOLDER = dirs.train_folder
TEST_FOLDER = dirs.test_folder

### Currently we are training so we set up the training folder as the working folder.
TRAIN_WORKING_FOLDER = TRAIN_FOLDER
TEST_WORKING_FOLDER = TEST_FOLDER if TEST_FOLDER else TRAIN_FOLDER

print(f"FOLDER_FOR_TRAINING: {TRAIN_WORKING_FOLDER}")
print(f"FOLDER_FOR_TEST: {TEST_WORKING_FOLDER}")

Working with dataset under D:\Dev\asfg\data\standard-p2-32khz.
FOLDER_FOR_TRAINING: D:\Dev\asfg\data\standard-p2-32khz
FOLDER_FOR_TEST: D:\Dev\asfg\data\standard-p2-32khz


In [7]:
# Train/Test split. If the test folder has not been manually selected, then split the test folder.

def sort_index(x):
    return int(x.split("-")[0])

if not TEST_FOLDER:
    all_files = os.listdir(TRAIN_FOLDER)
    all_files.sort(key=sort_index)
    test_index = len(all_files) // TEST_RATIO
    test_files = all_files[-test_index:]
    train_files = all_files[:-test_index]
else:
    train_files = os.listdir(TRAIN_FOLDER)
    test_files = os.listdir(TEST_FOLDER)

print(f"Files for training: {len(train_files)}")
print(f"Files for testing: {len(test_files)}")

Files for training: 45736
Files for testing: 11434


In [8]:
# Train/Validation split

if SHUFFLE:
    random.shuffle(train_files)

if not BUILD_TEST:
    train_files.extend(test_files)

valid_idx = len(train_files) // VALID_RATIO
valid_files = train_files[-valid_idx:]
train_files = train_files[:-valid_idx]

print(f"Files for training: {len(train_files)}")
print(f"Files for validation: {len(valid_files)}")
print(f"Validation file samples: {valid_files[:5]}")

Files for training: 45736
Files for validation: 11434
Validation file samples: ['3081-the-kingdom-of-heaven-eng-0.wav', '3081-the-kings-speech-eng-0.wav', '3082-american-beauty-eng-0.wav', '3082-dallas-buyers-club-eng-1.wav', '3082-src-fkdsc-0.wav']


## Dataset

In [9]:
from csrc.dataset import PANNsDataset

## Transformer

In [10]:
from csrc.transformers import BaseAug

## Set up dataloader 

In [11]:
loaders = {
    "train": data.DataLoader(PANNsDataset(train_files, training_folder=TRAIN_WORKING_FOLDER, test_folder=TEST_WORKING_FOLDER, waveform_transforms=BaseAug), # Build training set
                            batch_size=BS,
                            shuffle=True,
                            num_workers=0, # 0 for windows system.
                            pin_memory=True,
                            drop_last=True),
    "valid": data.DataLoader(PANNsDataset(valid_files, training_folder=TRAIN_WORKING_FOLDER, test_folder=TEST_WORKING_FOLDER, waveform_transforms=None), # Build training set.\n",
                             batch_size=BS,
                             shuffle=False,
                             num_workers=0,
                             pin_memory=True,
                             drop_last=False)
}

## Model

In [12]:
from csrc.models import AttBlock, PANNsCNN14Att

## Loss

In [13]:
from csrc.losses import ImprovedPANNsLoss

## Callbacks

In [14]:
from csrc.callbacks import F1Callback, mAPCallback, PrecisionCallback
from catalyst import dl

## Training Configurations

In [15]:
# # device
# device = torch.device("cuda:0")

# # model
# model = PANNsCNN14Att(**MC.sed_model_config)
# weights = torch.load(WEIGHTS_PATH)
# model.load_state_dict(weights["model"])
# model.att_block = AttBlock(2048, 2, activation="sigmoid")
# model.att_block.init_weights()
# model.to(device)

# # optimizer
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # scheduler
# scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# # loss
# loss = ImprovedPANNsLoss().to(device)

# # callbacks
# callbacks = [
#     F1Callback(),
#     mAPCallback(),
#     PrecisionCallback(),
#     CheckpointCallback(save_n_best=3),
# ]

In [16]:
### FOR catalyst 21

# device
device = torch.device("cuda:0")

# model
model = PANNsCNN14Att(**MC.sed_model_config)
weights = torch.load(WEIGHTS_PATH)
model.load_state_dict(weights["model"])
model.att_block = AttBlock(2048, 2, activation="sigmoid")
model.att_block.init_weights()
model.to(device)

# optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# loss
loss = ImprovedPANNsLoss().to(device)

# callbacks
callbacks = [
    F1Callback(),
    mAPCallback(),
    PrecisionCallback(),
    CheckpointCallback(save_n_best=3, logdir=LOG_DIR + 'checkpoints/', loader_key="valid", metric_key="precision", minimize=True),
]

# callbacks = [
#     dl.MAPCallback(
#         input_key="logits", 
#         target_key="targets", 
#         topk_args=(1, 3, 5), 
#         prefix="mAP")
# ]

## Training

In [17]:
# warnings.simplefilter("ignore")

# runner = SupervisedRunner(
#     device=device,
#     input_key="waveform",
#     input_target_key="targets")

# runner.train(
#     model=model,
#     criterion=loss,
#     loaders=loaders,
#     optimizer=optimizer,
#     scheduler=scheduler,
#     num_epochs=EPOCHS,
#     verbose=True,
#     callbacks=callbacks,
#     logdir=LOG_DIR,
#     main_metric="epoch_precision",
#     minimize_metric=True,
#     amp=True
# )

In [18]:
### For catalyst 21
warnings.simplefilter("ignore")

runner = SupervisedRunner(
    input_key="waveform",
    target_key="targets"
)

runner.train(
    model=model,
    criterion=loss,
    loaders=loaders,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=EPOCHS,
    verbose=True,
    callbacks=callbacks,
    valid_loader="valid",
    valid_metric="precision",
    logdir=LOG_DIR,
    minimize_valid_metric=True,
    # amp=True # Using fixed-precision for training can suit this task pretty well but I can't handle the nan/inf problem for both pytorch.amp and nvidia apex.
)

1/30 * Epoch (train):  15%|█▌        | 108/714 [04:22<24:33,  2.43s/it, loss=1.156, lr=1.000e-03, mAP=0.846, macro_f1=0.797, momentum=0.900, precision=0.897]
Keyboard Interrupt
