# Train your model

In [1]:
import cv2
import os
import random
import time
import warnings

import librosa
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from catalyst.dl import SupervisedRunner, State, CallbackOrder, Callback, CheckpointCallback

In [2]:
from csrc.utils import TrainingDirs as TD
from csrc.configurations import DatasetConfig as DC
from csrc.configurations import ModelConfig as MC
from csrc.utils import seed_dataset, seed_all 

## Train configurations

In [3]:
# For better debugging.

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [4]:
### The folder name of your dataset.
DATASET = "standard-p3"

### Whether you have split your dataset.
### If False then the test dataset will be generated as configured in TrainParams and choose the split ratio.
BUILD_TEST = False
PREBUILD_TEST = False
TEST_RATIO = 5

### The ratio to split your train/validaion dataset.
VALID_RATIO = 5
### Whether to shuffle the dataset.
SHUFFLE = True

### Clip length that will be used for training.
### Default to be the same as the audio clip length in the dataset.
PERIOD = DC.dataset_clip_time
print(f"Training clip length (sencods): {PERIOD}")

### Batch size for training. For example: 8gb GPU for 5s clips - batch size 32.
BS = 48

### Training epochs.
EPOCHS = 30

### Weights file path used for training.
### Default under weights folder.
WEIGHTS_PATH = "./weights/Cnn14_DecisionLevelAtt_mAP0.425.pth"

### Default path to store your model.
LOG_DIR = "./train/logs/sp3-3/"

Training clip length (sencods): 3


In [5]:
# Random seeding.
# Change seed will change your validation set randomly picked from the dataset.

SEED = 42
seed_all(SEED)
seed_dataset(SEED)

## Process

In [6]:
# Set up working folder for training.

dirs = TD(DATASET, PREBUILD_TEST)
DATASET_FOLDER = dirs.dataset_folder
TRAIN_FOLDER = dirs.train_folder
TEST_FOLDER = dirs.test_folder

### Currently we are training so we set up the training folder as the working folder.
TRAIN_WORKING_FOLDER = TRAIN_FOLDER
TEST_WORKING_FOLDER = TEST_FOLDER if TEST_FOLDER else TRAIN_FOLDER

print(f"FOLDER_FOR_TRAINING: {TRAIN_WORKING_FOLDER}")
print(f"FOLDER_FOR_TEST: {TEST_WORKING_FOLDER}")

Working with dataset under data\standard-p3.
FOLDER_FOR_TRAINING: data\standard-p3
FOLDER_FOR_TEST: data\standard-p3


In [7]:
# Train/Test split. If the test folder has not been manually selected, then split the test folder.

def sort_index(x):
    return int(x.split("-")[0])

if not TEST_FOLDER:
    all_files = os.listdir(TRAIN_FOLDER)
    all_files.sort(key=sort_index)
    test_index = len(all_files) // TEST_RATIO
    test_files = all_files[-test_index:]
    train_files = all_files[:-test_index]
else:
    train_files = os.listdir(TRAIN_FOLDER)
    test_files = os.listdir(TEST_FOLDER)

print(f"Files for training: {len(train_files)}")
print(f"Files for testing: {len(test_files)}")

Files for training: 15228
Files for testing: 3806


In [8]:
# Train/Validation split

if SHUFFLE:
    random.shuffle(train_files)

if not BUILD_TEST:
    train_files.extend(test_files)

valid_idx = len(train_files) // VALID_RATIO
valid_files = train_files[-valid_idx:]
train_files = train_files[:-valid_idx]

print(f"Files for training: {len(train_files)}")
print(f"Files for validation: {len(valid_files)}")
print(f"Validation file samples: {valid_files[:5]}")

Files for training: 15228
Files for validation: 3806
Validation file samples: ['2181-dallas-buyers-club-eng-1.wav', '2181-mission-impossible-iv-1.wav', '2181-the-dark-knight-rises-eng-0.wav', '2181-the-kingdom-of-heaven-eng-1.wav', '2181-the-kings-speech-eng-0.wav']


## Dataset

In [9]:
from csrc.dataset import PANNsDataset

## Transformer

In [10]:
from csrc.transformers import BaseAug

## Set up dataloader 

In [11]:
loaders = {
    "train": data.DataLoader(PANNsDataset(train_files, training_folder=TRAIN_WORKING_FOLDER, test_folder=TEST_WORKING_FOLDER, waveform_transforms=BaseAug), # Build training set
                            batch_size=BS,
                            shuffle=True,
                            num_workers=0, # 0 for windows system.
                            pin_memory=True,
                            drop_last=True),
    "valid": data.DataLoader(PANNsDataset(valid_files, training_folder=TRAIN_WORKING_FOLDER, test_folder=TEST_WORKING_FOLDER, waveform_transforms=None), # Build training set.\n",
                             batch_size=BS,
                             shuffle=False,
                             num_workers=0,
                             pin_memory=True,
                             drop_last=False)
}

## Model

In [12]:
from csrc.models import AttBlock, PANNsCNN14Att

## Loss

In [13]:
from csrc.losses import ImprovedPANNsLoss

## Callbacks

In [14]:
from csrc.callbacks import F1Callback, mAPCallback, PrecisionCallback

## Training Configurations

In [15]:
# device
device = torch.device("cuda:0")

# model
model = PANNsCNN14Att(**MC.sed_model_config)
weights = torch.load(WEIGHTS_PATH)
model.load_state_dict(weights["model"])
model.att_block = AttBlock(2048, 2, activation="sigmoid")
model.att_block.init_weights()
model.to(device)

# optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# scheduler
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# loss
loss = ImprovedPANNsLoss().to(device)

# callbacks
callbacks = [
    F1Callback(),
    mAPCallback(),
    PrecisionCallback(),
    CheckpointCallback(save_n_best=3),
]

## Training

In [16]:
warnings.simplefilter("ignore")

runner = SupervisedRunner(
    device=device,
    input_key="waveform",
    input_target_key="targets")

runner.train(
    model=model,
    criterion=loss,
    loaders=loaders,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=EPOCHS,
    verbose=True,
    logdir=LOG_DIR,
    callbacks=callbacks,
    main_metric="epoch_precision",
    minimize_metric=False,
    # fp16=True,
)

1/30 * Epoch (train): 100% 317/317 [12:17<00:00,  2.33s/it, loss=1.165, mAP=0.835, macro_f1=0.723, precision=0.913]
1/30 * Epoch (valid): 100% 80/80 [00:20<00:00,  3.94it/s, loss=1.796, mAP=0.500, macro_f1=0.364, precision=0.000e+00]
[2021-02-12 10:42:02,018] 
1/30 * Epoch 1 (_base): lr=0.0010 | momentum=0.9000
1/30 * Epoch 1 (train): epoch_mAP=0.8601 | epoch_macro_f1=0.8238 | epoch_precision=0.8784 | loss=1.1902 | mAP=0.8735 | macro_f1=0.8127 | precision=0.8863
1/30 * Epoch 1 (valid): epoch_mAP=0.9348 | epoch_macro_f1=0.8901 | epoch_precision=0.8869 | loss=0.8952 | mAP=0.8876 | macro_f1=0.8527 | precision=0.8097
2/30 * Epoch (train): 100% 317/317 [11:50<00:00,  2.24s/it, loss=1.044, mAP=0.918, macro_f1=0.838, precision=0.875]
2/30 * Epoch (valid): 100% 80/80 [00:16<00:00,  4.77it/s, loss=1.368, mAP=0.500, macro_f1=1.000, precision=0.000e+00]
[2021-02-12 10:54:13,367] 
2/30 * Epoch 2 (_base): lr=0.0009 | momentum=0.9000
2/30 * Epoch 2 (train): epoch_mAP=0.8825 | epoch_macro_f1=0.8561 |