# Birdsong Pytorch Baseline: ResNeSt50-fast (Training)

In [1]:
import os
import gc
import time
import shutil
import random
import warnings
import typing as tp
from pathlib import Path
from contextlib import contextmanager

import yaml
from joblib import delayed, Parallel

import cv2
import librosa
import audioread
import soundfile as sf

import numpy as np
import pandas as pd

from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import resnest.torch as resnest_torch

import pytorch_pfn_extras as ppe
from pytorch_pfn_extras.training import extensions as ppe_extensions

pd.options.display.max_rows = 500
pd.options.display.max_columns = 500
#from tqdm import tqdm_notebook as tqdm
from tqdm.autonotebook import tqdm
from matplotlib import pyplot as plt

import catalyst
from catalyst.dl import SupervisedRunner
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from catalyst.dl.callbacks import MixupCallback

In [2]:
from utils.settings import settings
NUM_CORES, BS = settings()

SEED: 42
Is fp16: True
Number of cores CPU: 12
GPU: GeForce GTX 1050 Ti
Batch size: 128
Total device memory: 4042


### read data

In [3]:
ROOT = Path.cwd()#.parent
INPUT_ROOT = ROOT / "data"
RAW_DATA = INPUT_ROOT / "birdsong-recognition"
TRAIN_AUDIO_DIR = INPUT_ROOT / "train_audio"
TEST_AUDIO_DIR = RAW_DATA / "test_audio"
TRAIN_RESAMPLED_DIR = INPUT_ROOT / "train_audio_resampled"
OUTPUT_ROOT = ROOT / "output"

In [4]:
ROOT

PosixPath('/media/dmi/5F9CFB7847A8B8FE/kaggle/birdsong')

In [5]:
# train = pd.read_csv(RAW_DATA / "train.csv")
train = pd.read_csv(INPUT_ROOT / "train_mod.csv")[:100]

In [6]:
# if not TEST_AUDIO_DIR.exists():
#     TEST_AUDIO_DIR = INPUT_ROOT / "birdcall_check" / "test_audio"
#     test = pd.read_csv(INPUT_ROOT / "birdcall_check" / "test.csv")
# else:
#     test = pd.read_csv(RAW_DATA / "test.csv")

In [7]:
from utils.dataset import get_loaders
import collections


In [8]:
# im = next(iter(loaders["train"]))[0][0].permute(1, 2, 0)
# plt.imshow(im);

### Training Utility

In [9]:
def get_resnest(model='resnest50_fast_1s1x64d', pretrained=True, n_classes=264):
    model = getattr(resnest_torch, model)(pretrained=pretrained)
    del model.fc
    # # use the same head as the baseline notebook.
    model.fc = nn.Sequential(
        nn.Linear(2048, 1024), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(1024, n_classes))
    
    return model

In [10]:
torch.hub.list('zhanghang1989/ResNeSt', force_reload=True)

Downloading: "https://github.com/zhanghang1989/ResNeSt/archive/master.zip" to /home/dmi/.cache/torch/hub/master.zip


['resnest101',
 'resnest200',
 'resnest269',
 'resnest50',
 'resnest50_fast_1s1x64d',
 'resnest50_fast_1s2x40d',
 'resnest50_fast_1s4x24d',
 'resnest50_fast_2s1x64d',
 'resnest50_fast_2s2x40d',
 'resnest50_fast_4s1x64d',
 'resnest50_fast_4s2x40d']

In [11]:
# model = get_resnest('resnest101') #'resnest200'
# model;

In [12]:
def get_lr(model: object, head_lr: float = 0.001, reduce: float = 0.3):

    lr = [
        {'params': model.conv1.parameters(), 'lr': head_lr * reduce * reduce * reduce}, 
        {'params': model.layer1.parameters(), 'lr': head_lr * reduce * reduce}, 
        {'params': model.layer2.parameters(), 'lr': head_lr * reduce * reduce}, 
        {'params': model.layer3.parameters(), 'lr': head_lr * reduce}, 
        {'params': model.layer4.parameters(), 'lr': head_lr * reduce}, 
        {'params': model.fc.parameters(), 'lr': head_lr}]

    return lr  

# get_lr(model)

## Training

## run training

In [13]:
from utils.albusound import train_transforms
loaders = collections.OrderedDict()
loaders["train"], loaders["valid"] = get_loaders(0, BS=2, waveform_transforms=train_transforms)

[fold 0] train: 80, val: 20


In [14]:
#set_seed(settings["globals"]["seed"])
device = catalyst.utils.get_device()
output_dir = OUTPUT_ROOT
fold = 0
# # # get model
model = get_resnest()
model = model.to(device)

lr = 0.001 # get_lr_seresnext(model, 0.01, 0.8)
optimizer = torch.optim.AdamW(model.parameters())
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75, patience=2)

logdir = f"{ROOT}/.logs{fold}"
device = catalyst.utils.get_device()

runner = SupervisedRunner(device=device, model=model)
from catalyst.dl.callbacks import AccuracyCallback, AUCCallback, F1ScoreCallback
criterion = nn.BCEWithLogitsLoss()

runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
loaders=loaders,
callbacks=[
        F1ScoreCallback(prefix="f1_score"),
        #MixupCallback(alpha=0.4)
        ],
logdir=logdir,
num_epochs=5,
#main_metric="total",
minimize_metric=False,
# for FP16. It uses the variable from the very first cell
# fp16=True,
# for external monitoring tools, like Alchemy
#monitoring_params=monitoring_params,
verbose=True
)


Selected optimization level O0:  Pure FP32 training.

Defaults for this optimization level are:
enabled                : True
opt_level              : O0
cast_model_type        : torch.float32
patch_torch_functions  : False
keep_batchnorm_fp32    : None
master_weights         : False
loss_scale             : 1.0
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O0
cast_model_type        : torch.float32
patch_torch_functions  : False
keep_batchnorm_fp32    : None
master_weights         : False
loss_scale             : 1.0
1/5 * Epoch (train): 100% 40/40 [00:09<00:00,  4.25it/s, f1_score=1.000, loss=0.000e+00]
1/5 * Epoch (valid): 100% 10/10 [00:01<00:00,  8.23it/s, f1_score=1.000, loss=0.000e+00]
[2020-09-01 18:48:11,025] 
1/5 * Epoch 1 (_base): lr=0.0010 | momentum=0.9000
1/5 * Epoch 1 (train): f1_score=0.8860 | loss=0.0433
1/5 * Epoch 1 (valid): f1_score=0.9872

In [23]:
batch = next(iter(loaders["valid"]))
weight_path = '/media/dmi/5F9CFB7847A8B8FE/kaggle/birdsong/.logs0/checkpoints/best.pth'
model_ = get_resnest()
state_dict = torch.load(weight_path, map_location=device)
model_.load_state_dict(state_dict['model_state_dict'])
model_.to(device)
model_.eval();

In [24]:
%%time
model_(batch[0].cuda())


CPU times: user 20.9 ms, sys: 0 ns, total: 20.9 ms
Wall time: 19.8 ms


tensor([[ 103.6954, -111.7115, -111.3646, -113.4016, -109.4264, -119.0708,
         -113.7141, -119.9767, -105.4297, -123.9350, -115.9076, -107.7744,
         -108.8389, -114.1867, -115.0496, -112.8139, -118.1462, -113.5535,
         -114.4292, -109.8800, -113.2666, -121.7823, -112.0840, -114.3512,
         -116.8399, -108.1066, -120.2018, -111.8183, -113.0793, -112.0995,
         -109.1594, -113.2400, -120.9752, -123.5482, -126.6694, -123.8388,
         -115.5321, -123.5138, -110.7690, -120.7488, -122.2058, -111.1512,
         -113.3404, -123.0019, -118.4223, -116.0455, -108.9969, -118.7995,
         -116.4718, -111.9320, -114.0347, -112.6292, -103.0237, -111.0363,
         -106.6555, -118.9390, -114.5587, -113.9174, -113.3249, -114.9006,
         -111.3956, -114.3081, -117.2529, -121.9041, -120.6378, -119.9404,
         -104.5584, -110.7326, -120.8921, -118.0907, -114.0547, -114.9733,
         -108.8574, -110.3351, -117.1495, -112.3753, -120.9414, -119.2530,
         -111.3816, -112.

In [25]:
del model_

In [26]:
from catalyst.dl.utils import trace
runner.trace(model=model, loader=loaders['valid'], device=device, logdir=logdir);

In [27]:
model = trace.load_traced_model(
        f"{logdir}/trace/traced-forward.pth", 
        device=device
    )

In [28]:
%%time
model(batch[0].cuda())

CPU times: user 315 ms, sys: 1.98 ms, total: 316 ms
Wall time: 315 ms


tensor([[  54.6440,  -58.6450,  -58.4889,  -59.5359,  -57.6162,  -62.2326,
          -59.7460,  -62.8181,  -55.5352,  -64.8026,  -60.8743,  -56.7018,
          -57.1119,  -59.7730,  -60.4132,  -59.2375,  -62.0670,  -59.6546,
          -59.9390,  -57.7122,  -59.5339,  -63.8517,  -58.7750,  -60.0015,
          -61.3063,  -56.8712,  -63.1277,  -58.7849,  -59.4385,  -58.8474,
          -57.3169,  -59.6150,  -63.3780,  -64.7284,  -66.3006,  -65.0466,
          -60.6108,  -64.5835,  -58.0950,  -63.1412,  -64.0844,  -58.4242,
          -59.5909,  -64.4159,  -62.2140,  -60.8786,  -57.2271,  -62.2216,
          -61.0809,  -58.6868,  -59.8414,  -59.1977,  -54.2146,  -58.3470,
          -56.1229,  -62.3063,  -60.1628,  -59.7255,  -59.4520,  -60.2280,
          -58.4957,  -59.9447,  -61.4386,  -63.8333,  -63.1749,  -62.8327,
          -55.0157,  -58.2557,  -63.2988,  -62.0229,  -59.7733,  -60.3885,
          -57.2244,  -57.9879,  -61.3960,  -58.9418,  -63.4301,  -62.4207,
          -58.5092,  -59.

## save results

In [21]:
%%bash
ls /kaggle/training_output

ls: невозможно получить доступ к '/kaggle/training_output': Нет такого файла или каталога


CalledProcessError: Command 'b'ls /kaggle/training_output\n'' returned non-zero exit status 2.

In [None]:
for f_name in ["log","loss.png", "lr.png"]:
    shutil.copy(output_dir / f_name, f_name)

In [None]:
log = pd.read_json("log")
best_epoch = log["val/loss"].idxmin() + 1
log.iloc[[best_epoch - 1],]

In [None]:
shutil.copy(output_dir / "snapshot_epoch_{}.pth".format(best_epoch), "best_model.pth")

In [None]:
m = get_model({
    'name': settings["model"]["name"],
    'params': {'pretrained': False, 'n_classes': 264}})
state_dict = torch.load('best_model.pth')
m.load_state_dict(state_dict)