In [1]:
!pip install ../input/efficientnet/EfficientNet-PyTorch-master/

Processing /kaggle/input/efficientnet/EfficientNet-PyTorch-master
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-py3-none-any.whl size=20139 sha256=1896348b341fbbe9f09de46c75a802aa67b6e33b90a24c6f88682932f550e000
  Stored in directory: /root/.cache/pip/wheels/b8/19/b9/77a444bb2bd1e95e7c5e547c97f8f895c711f0a91553122d6d
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.0


In [2]:
import os
import gc
import time
import math
import shutil
import random
import warnings
import typing as tp
from pathlib import Path
from contextlib import contextmanager

import yaml
from joblib import delayed, Parallel

import cv2
import librosa
import audioread
import soundfile as sf

import numpy as np
import pandas as pd

from fastprogress import progress_bar
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU
from torch.nn.modules.utils import _pair
import torch.utils.data as data
from efficientnet_pytorch import EfficientNet


pd.options.display.max_rows = 500
pd.options.display.max_columns = 500

In [3]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
#     torch.backends.cudnn.deterministic = True  # type: ignore
#     torch.backends.cudnn.benchmark = True  # type: ignore
    

@contextmanager
def timer(name: str) -> None:
    """Timer Util"""
    t0 = time.time()
    print("[{}] start".format(name))
    yield
    print("[{}] done in {:.0f} s".format(name, time.time() - t0))

In [4]:
# logger = get_logger("main.log")
set_seed(42)

In [5]:
ROOT = Path.cwd().parent
INPUT_ROOT = ROOT / "input"
RAW_DATA = INPUT_ROOT / "birdsong-recognition"
TRAIN_AUDIO_DIR = RAW_DATA / "train_audio"
# TRAIN_RESAMPLED_AUDIO_DIRS = [
#   INPUT_ROOT / "birdsong-resampled-train-audio-{:0>2}".format(i)  for i in range(5)
# ]
TEST_AUDIO_DIR = RAW_DATA / "test_audio"

In [6]:
train = pd.read_csv(RAW_DATA / "train.csv")

In [7]:
if not TEST_AUDIO_DIR.exists():
    TEST_AUDIO_DIR = INPUT_ROOT / "birdcall-check" / "test_audio"
    test = pd.read_csv(INPUT_ROOT / "birdcall-check" / "test.csv")
else:
    test = pd.read_csv(RAW_DATA / "test.csv")

In [8]:
sub = pd.read_csv("../input/birdsong-recognition/sample_submission.csv")
sub.to_csv("submission.csv", index=False)  

In [9]:
TARGET_SR = 32000
model_config = {
    "base_model_name": "1",
    "num_classes": 264,
    "trained_weights": "../input/eff1fold050-fold1-40-fold2-15/effnet/best-checkpoint-011epoch.bin"
}

melspectrogram_parameters = {
    "n_mels": 128,
    "fmin": 20,
    "fmax": 16000
}

In [10]:
BIRD_CODE = {
    'aldfly': 0, 'ameavo': 1, 'amebit': 2, 'amecro': 3, 'amegfi': 4,
    'amekes': 5, 'amepip': 6, 'amered': 7, 'amerob': 8, 'amewig': 9,
    'amewoo': 10, 'amtspa': 11, 'annhum': 12, 'astfly': 13, 'baisan': 14,
    'baleag': 15, 'balori': 16, 'banswa': 17, 'barswa': 18, 'bawwar': 19,
    'belkin1': 20, 'belspa2': 21, 'bewwre': 22, 'bkbcuc': 23, 'bkbmag1': 24,
    'bkbwar': 25, 'bkcchi': 26, 'bkchum': 27, 'bkhgro': 28, 'bkpwar': 29,
    'bktspa': 30, 'blkpho': 31, 'blugrb1': 32, 'blujay': 33, 'bnhcow': 34,
    'boboli': 35, 'bongul': 36, 'brdowl': 37, 'brebla': 38, 'brespa': 39,
    'brncre': 40, 'brnthr': 41, 'brthum': 42, 'brwhaw': 43, 'btbwar': 44,
    'btnwar': 45, 'btywar': 46, 'buffle': 47, 'buggna': 48, 'buhvir': 49,
    'bulori': 50, 'bushti': 51, 'buwtea': 52, 'buwwar': 53, 'cacwre': 54,
    'calgul': 55, 'calqua': 56, 'camwar': 57, 'cangoo': 58, 'canwar': 59,
    'canwre': 60, 'carwre': 61, 'casfin': 62, 'caster1': 63, 'casvir': 64,
    'cedwax': 65, 'chispa': 66, 'chiswi': 67, 'chswar': 68, 'chukar': 69,
    'clanut': 70, 'cliswa': 71, 'comgol': 72, 'comgra': 73, 'comloo': 74,
    'commer': 75, 'comnig': 76, 'comrav': 77, 'comred': 78, 'comter': 79,
    'comyel': 80, 'coohaw': 81, 'coshum': 82, 'cowscj1': 83, 'daejun': 84,
    'doccor': 85, 'dowwoo': 86, 'dusfly': 87, 'eargre': 88, 'easblu': 89,
    'easkin': 90, 'easmea': 91, 'easpho': 92, 'eastow': 93, 'eawpew': 94,
    'eucdov': 95, 'eursta': 96, 'evegro': 97, 'fiespa': 98, 'fiscro': 99,
    'foxspa': 100, 'gadwal': 101, 'gcrfin': 102, 'gnttow': 103, 'gnwtea': 104,
    'gockin': 105, 'gocspa': 106, 'goleag': 107, 'grbher3': 108, 'grcfly': 109,
    'greegr': 110, 'greroa': 111, 'greyel': 112, 'grhowl': 113, 'grnher': 114,
    'grtgra': 115, 'grycat': 116, 'gryfly': 117, 'haiwoo': 118, 'hamfly': 119,
    'hergul': 120, 'herthr': 121, 'hoomer': 122, 'hoowar': 123, 'horgre': 124,
    'horlar': 125, 'houfin': 126, 'houspa': 127, 'houwre': 128, 'indbun': 129,
    'juntit1': 130, 'killde': 131, 'labwoo': 132, 'larspa': 133, 'lazbun': 134,
    'leabit': 135, 'leafly': 136, 'leasan': 137, 'lecthr': 138, 'lesgol': 139,
    'lesnig': 140, 'lesyel': 141, 'lewwoo': 142, 'linspa': 143, 'lobcur': 144,
    'lobdow': 145, 'logshr': 146, 'lotduc': 147, 'louwat': 148, 'macwar': 149,
    'magwar': 150, 'mallar3': 151, 'marwre': 152, 'merlin': 153, 'moublu': 154,
    'mouchi': 155, 'moudov': 156, 'norcar': 157, 'norfli': 158, 'norhar2': 159,
    'normoc': 160, 'norpar': 161, 'norpin': 162, 'norsho': 163, 'norwat': 164,
    'nrwswa': 165, 'nutwoo': 166, 'olsfly': 167, 'orcwar': 168, 'osprey': 169,
    'ovenbi1': 170, 'palwar': 171, 'pasfly': 172, 'pecsan': 173, 'perfal': 174,
    'phaino': 175, 'pibgre': 176, 'pilwoo': 177, 'pingro': 178, 'pinjay': 179,
    'pinsis': 180, 'pinwar': 181, 'plsvir': 182, 'prawar': 183, 'purfin': 184,
    'pygnut': 185, 'rebmer': 186, 'rebnut': 187, 'rebsap': 188, 'rebwoo': 189,
    'redcro': 190, 'redhea': 191, 'reevir1': 192, 'renpha': 193, 'reshaw': 194,
    'rethaw': 195, 'rewbla': 196, 'ribgul': 197, 'rinduc': 198, 'robgro': 199,
    'rocpig': 200, 'rocwre': 201, 'rthhum': 202, 'ruckin': 203, 'rudduc': 204,
    'rufgro': 205, 'rufhum': 206, 'rusbla': 207, 'sagspa1': 208, 'sagthr': 209,
    'savspa': 210, 'saypho': 211, 'scatan': 212, 'scoori': 213, 'semplo': 214,
    'semsan': 215, 'sheowl': 216, 'shshaw': 217, 'snobun': 218, 'snogoo': 219,
    'solsan': 220, 'sonspa': 221, 'sora': 222, 'sposan': 223, 'spotow': 224,
    'stejay': 225, 'swahaw': 226, 'swaspa': 227, 'swathr': 228, 'treswa': 229,
    'truswa': 230, 'tuftit': 231, 'tunswa': 232, 'veery': 233, 'vesspa': 234,
    'vigswa': 235, 'warvir': 236, 'wesblu': 237, 'wesgre': 238, 'weskin': 239,
    'wesmea': 240, 'wessan': 241, 'westan': 242, 'wewpew': 243, 'whbnut': 244,
    'whcspa': 245, 'whfibi': 246, 'whtspa': 247, 'whtswi': 248, 'wilfly': 249,
    'wilsni1': 250, 'wiltur': 251, 'winwre3': 252, 'wlswar': 253, 'wooduc': 254,
    'wooscj2': 255, 'woothr': 256, 'y00475': 257, 'yebfly': 258, 'yebsap': 259,
    'yehbla': 260, 'yelwar': 261, 'yerwar': 262, 'yetvir': 263
}

INV_BIRD_CODE = {v: k for k, v in BIRD_CODE.items()}

In [11]:
def mono_to_color(X: np.ndarray,
                  mean=None,
                  std=None,
                  norm_max=None,
                  norm_min=None,
                  eps=1e-6):
    """
    Code from https://www.kaggle.com/daisukelab/creating-fat2019-preprocessed-data
    """
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V


class TestDataset(data.Dataset):
    def __init__(self, df: pd.DataFrame, clip: np.ndarray,
                 img_size=224, melspectrogram_parameters={}):
        self.df = df
        self.clip = clip
        self.img_size = img_size
        self.melspectrogram_parameters = melspectrogram_parameters
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx: int):
        SR = 32000
        sample = self.df.loc[idx, :]
        site = sample.site
        row_id = sample.row_id
        
        if site == "site_3":
            y = self.clip.astype(np.float32)
            len_y = len(y)
            start = 0
            end = SR * 5
            images = []
            while len_y > start:
                y_batch = y[start:end].astype(np.float32)
                if len(y_batch) != (SR * 5):
                    break
                start = end
                end = end + SR * 5
                
                melspec = librosa.feature.melspectrogram(y_batch,
                                                         sr=SR,
                                                         **self.melspectrogram_parameters)
                melspec = librosa.power_to_db(melspec).astype(np.float32)
                image = mono_to_color(melspec)
                height, width, _ = image.shape
                image = cv2.resize(image, (int(width * self.img_size / height), self.img_size))
                image = np.moveaxis(image, 2, 0)
                image = (image / 255.0).astype(np.float32)
                images.append(image)
            images = np.asarray(images)
            return images, row_id, site
        else:
            end_seconds = int(sample.seconds)
            start_seconds = int(end_seconds - 5)
            
            start_index = SR * start_seconds
            end_index = SR * end_seconds
            
            y = self.clip[start_index:end_index].astype(np.float32)

            melspec = librosa.feature.melspectrogram(y, sr=SR, **self.melspectrogram_parameters)
            melspec = librosa.power_to_db(melspec).astype(np.float32)

            image = mono_to_color(melspec)
            height, width, _ = image.shape
            image = cv2.resize(image, (int(width * self.img_size / height), self.img_size))
            image = np.moveaxis(image, 2, 0)
            image = (image / 255.0).astype(np.float32)

            return image, row_id, site

In [12]:
def get_model(model_num, weight_path, num_classes, dropout_rate=0.5):
    
    model = EfficientNet.from_name('efficientnet-b{}'.format(model_num))
    in_features = model._fc.in_features
    model._fc = nn.Sequential(nn.Dropout(dropout_rate), nn.Linear(in_features, num_classes))
    model.load_state_dict(torch.load(weight_path)['model_state_dict'])
    device = torch.device("cuda")
    model.to(device)
    model.eval()
    
    return model

## Prediction loop

**NEW method**

In [13]:

def prediction_for_clip(test_df: pd.DataFrame, 
                        clip: np.ndarray,  
                        mel_params: dict,
                        model: EfficientNet,
                        threshold=0.5,
                        n_max_preds = 5):

    dataset = TestDataset(df=test_df, 
                          clip=clip,
                          img_size=224,
                          melspectrogram_parameters=mel_params)
    loader = data.DataLoader(dataset, batch_size=1, shuffle=False)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.eval()
    row_ids, probas = [], []
    for image, row_id, site in progress_bar(loader):
        site = site[0]
        row_id = row_id[0]
        if site in {"site_1", "site_2"}:
            image = image.to(device)

            with torch.no_grad():
                prediction = F.sigmoid(model(image))
                proba = prediction.detach().cpu().numpy().reshape(-1)

            probas.append(proba)
            row_ids.append(row_id)

        else:
            row_ids.append(row_id)
            max_proba = np.zeros(264)
            # to avoid prediction on large batch
            image = image.squeeze(0)
            batch_size = 16
            whole_size = image.size(0)
            if whole_size % batch_size == 0:
                n_iter = whole_size // batch_size
            else:
                n_iter = whole_size // batch_size + 1

            all_events = set()
            for batch_i in range(n_iter):
                batch = image[batch_i * batch_size:(batch_i + 1) * batch_size]
                if batch.ndim == 3:
                    batch = batch.unsqueeze(0)

                batch = batch.to(device)
                with torch.no_grad():
                    prediction = F.sigmoid(model(batch))
                    proba = prediction.detach().cpu().numpy()

                for i in range(len(proba)):
                    max_proba = np.maximum(max_proba, proba[i])
            probas.append(max_proba)

    max_probas = np.max(probas, axis=0)
    bird_idx = np.argsort(max_probas)[-n_max_preds:]
    print('selected birds:', list(map(lambda x: INV_BIRD_CODE[x], bird_idx)))

    prediction_dict = {}
    for row_id, proba in zip(row_ids, probas):
        events = proba[bird_idx] >= threshold
        labels = bird_idx[np.argwhere(events).reshape(-1).tolist()]

        if len(labels) == 0:
            prediction_dict[row_id] = "nocall"
        else:
            labels_str_list = list(map(lambda x: INV_BIRD_CODE[x], labels))
            label_string = " ".join(labels_str_list[:n_max_preds])
            prediction_dict[row_id] = label_string
    return prediction_dict

In [14]:
def prediction(test_df: pd.DataFrame,
               test_audio: Path,
               model_config: dict,
               mel_params: dict,
               target_sr: int,
               threshold=0.5,n_max_preds = 3):
    model = get_model(model_num=model_config["base_model_name"], 
                      weight_path=model_config["trained_weights"], 
                      num_classes=model_config["num_classes"])
    unique_audio_id = test_df.audio_id.unique()

    warnings.filterwarnings("ignore")
    prediction_dfs = []
    for audio_id in unique_audio_id:
        with timer(f"Loading {audio_id}"):
            clip, _ = librosa.load(test_audio / (audio_id + ".mp3"),
                                   sr=target_sr,
                                   mono=True,
                                   res_type="kaiser_fast")
        
        test_df_for_audio_id = test_df.query(
            f"audio_id == '{audio_id}'").reset_index(drop=True)
        with timer(f"Prediction on {audio_id}"):
            prediction_dict = prediction_for_clip(test_df_for_audio_id,
                                                  clip=clip,
                                                  model=model,
                                                  mel_params=mel_params,
                                                  threshold=threshold,n_max_preds = 3)
        row_id = list(prediction_dict.keys())
        birds = list(prediction_dict.values())
        prediction_df = pd.DataFrame({
            "row_id": row_id,
            "birds": birds
        })
        prediction_dfs.append(prediction_df)
    
    prediction_df = pd.concat(prediction_dfs, axis=0, sort=False).reset_index(drop=True)
    return prediction_df

## Prediction

In [15]:
submission = prediction(test_df=test,
                        test_audio=TEST_AUDIO_DIR,
                        model_config=model_config,
                        mel_params=melspectrogram_parameters,
                        target_sr=TARGET_SR,
                        threshold=0.5,n_max_preds = 2)
submission.to_csv("submission.csv", index=False)

[Loading 41e6fe6504a34bf6846938ba78d13df1] start
[Loading 41e6fe6504a34bf6846938ba78d13df1] done in 1 s
[Prediction on 41e6fe6504a34bf6846938ba78d13df1] start


selected birds: ['chswar', 'amered', 'aldfly']
[Prediction on 41e6fe6504a34bf6846938ba78d13df1] done in 1 s
[Loading cce64fffafed40f2b2f3d3413ec1c4c2] start
[Loading cce64fffafed40f2b2f3d3413ec1c4c2] done in 1 s
[Prediction on cce64fffafed40f2b2f3d3413ec1c4c2] start


selected birds: ['scatan', 'astfly', 'aldfly']
[Prediction on cce64fffafed40f2b2f3d3413ec1c4c2] done in 0 s
[Loading 99af324c881246949408c0b1ae54271f] start
[Loading 99af324c881246949408c0b1ae54271f] done in 1 s
[Prediction on 99af324c881246949408c0b1ae54271f] start


selected birds: ['casfin', 'easpho', 'aldfly']
[Prediction on 99af324c881246949408c0b1ae54271f] done in 0 s
[Loading 6ab74e177aa149468a39ca10beed6222] start
[Loading 6ab74e177aa149468a39ca10beed6222] done in 1 s
[Prediction on 6ab74e177aa149468a39ca10beed6222] start


selected birds: ['saypho', 'wilfly', 'aldfly']
[Prediction on 6ab74e177aa149468a39ca10beed6222] done in 0 s
[Loading b2fd3f01e9284293a1e33f9c811a2ed6] start
[Loading b2fd3f01e9284293a1e33f9c811a2ed6] done in 1 s
[Prediction on b2fd3f01e9284293a1e33f9c811a2ed6] start


selected birds: ['astfly', 'pygnut', 'aldfly']
[Prediction on b2fd3f01e9284293a1e33f9c811a2ed6] done in 0 s
[Loading de62b37ebba749d2abf29d4a493ea5d4] start
[Loading de62b37ebba749d2abf29d4a493ea5d4] done in 0 s
[Prediction on de62b37ebba749d2abf29d4a493ea5d4] start


selected birds: ['carwre', 'pygnut', 'aldfly']
[Prediction on de62b37ebba749d2abf29d4a493ea5d4] done in 0 s
[Loading 8680a8dd845d40f296246dbed0d37394] start
[Loading 8680a8dd845d40f296246dbed0d37394] done in 1 s
[Prediction on 8680a8dd845d40f296246dbed0d37394] start


selected birds: ['astfly', 'pygnut', 'aldfly']
[Prediction on 8680a8dd845d40f296246dbed0d37394] done in 0 s
[Loading 940d546e5eb745c9a74bce3f35efa1f9] start
[Loading 940d546e5eb745c9a74bce3f35efa1f9] done in 1 s
[Prediction on 940d546e5eb745c9a74bce3f35efa1f9] start


selected birds: ['fiespa', 'comyel', 'aldfly']
[Prediction on 940d546e5eb745c9a74bce3f35efa1f9] done in 0 s
[Loading 07ab324c602e4afab65ddbcc746c31b5] start
[Loading 07ab324c602e4afab65ddbcc746c31b5] done in 1 s
[Prediction on 07ab324c602e4afab65ddbcc746c31b5] start


selected birds: ['pasfly', 'amered', 'aldfly']
[Prediction on 07ab324c602e4afab65ddbcc746c31b5] done in 0 s
[Loading 899616723a32409c996f6f3441646c2a] start
[Loading 899616723a32409c996f6f3441646c2a] done in 1 s
[Prediction on 899616723a32409c996f6f3441646c2a] start


selected birds: ['btnwar', 'amerob', 'aldfly']
[Prediction on 899616723a32409c996f6f3441646c2a] done in 0 s
[Loading 9cc5d9646f344f1bbb52640a988fe902] start
[Loading 9cc5d9646f344f1bbb52640a988fe902] done in 3 s
[Prediction on 9cc5d9646f344f1bbb52640a988fe902] start


selected birds: ['belspa2', 'comyel', 'aldfly']
[Prediction on 9cc5d9646f344f1bbb52640a988fe902] done in 1 s
[Loading a56e20a518684688a9952add8a9d5213] start
[Loading a56e20a518684688a9952add8a9d5213] done in 1 s
[Prediction on a56e20a518684688a9952add8a9d5213] start


selected birds: ['buwwar', 'sonspa', 'aldfly']
[Prediction on a56e20a518684688a9952add8a9d5213] done in 0 s
[Loading 96779836288745728306903d54e264dd] start
[Loading 96779836288745728306903d54e264dd] done in 1 s
[Prediction on 96779836288745728306903d54e264dd] start


selected birds: ['cedwax', 'hamfly', 'aldfly']
[Prediction on 96779836288745728306903d54e264dd] done in 0 s
[Loading f77783ba4c6641bc918b034a18c23e53] start
[Loading f77783ba4c6641bc918b034a18c23e53] done in 0 s
[Prediction on f77783ba4c6641bc918b034a18c23e53] start


selected birds: ['yebfly', 'hamfly', 'aldfly']
[Prediction on f77783ba4c6641bc918b034a18c23e53] done in 0 s
[Loading 856b194b097441958697c2bcd1f63982] start
[Loading 856b194b097441958697c2bcd1f63982] done in 1 s
[Prediction on 856b194b097441958697c2bcd1f63982] start


selected birds: ['pygnut', 'hamfly', 'aldfly']
[Prediction on 856b194b097441958697c2bcd1f63982] done in 0 s


In [16]:
submission['birds'].value_counts()

aldfly           48
nocall           24
comyel            1
amerob            1
comyel aldfly     1
amered            1
Name: birds, dtype: int64