In [1]:
import os, random
import cv2
import math
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from collections import Counter

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import WeightedRandomSampler
from torchvision.models import efficientnet
from torchvision.transforms import transforms
# from efficientnet_pytorch import EfficientNet

import timm

from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit, KFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder

from glob import glob
from IPython.display import display, Audio

import cupy as cp
from cupyx.scipy import signal as cupy_signal
import yaml

from metric import score

import wandb

  from .autonotebook import tqdm as notebook_tqdm
  cupy._util.experimental('cupyx.jit.rawkernel')


In [2]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlhk[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [5]:
default_config = {
    "VERSION": "v0.3",
    "DATA_PATH": "inputs",
    "LOAD_SPEC_DATA": True,
    "SEED": 24,
    "SAMPLE_RATE": 32000,
    "N_FFT": 1095,
    "WIN_SIZE": 412,
    "WIN_LAP": 100,
    "MIN_FREQ": 40,
    "MAX_FREQ": 15000,
    "EPOCHS": 10,
    "BACHSIZE": 16
}

try:
    with open('config.yaml', 'r') as f:
        default_config = yaml.load(f, Loader=yaml.SafeLoader)
except:
    pass

default_config

{'VERSION': 'v0.7',
 'DATA_PATH': 'inputs',
 'LOAD_SPEC_DATA': True,
 'SEED': 24,
 'SAMPLE_RATE': 32000,
 'N_FFT': 1095,
 'WIN_SIZE': 412,
 'WIN_LAP': 100,
 'MIN_FREQ': 40,
 'MAX_FREQ': 15000,
 'EPOCHS': 10,
 'FOLD': 5,
 'BACTHSIZE': 16,
 'LABEL_SMOOTHING': 0.1}

In [None]:
# Reproducibility
torch.manual_seed(default_config["SEED"])
random.seed(default_config["SEED"])
np.random.seed(default_config["SEED"])

In [6]:
def oog2spec_via_cupy(audio_data):
    
    audio_data = cp.array(audio_data)
    
    # handles NaNs
    mean_signal = cp.nanmean(audio_data)
    audio_data = cp.nan_to_num(audio_data, nan=mean_signal) if cp.isnan(audio_data).mean() < 1 else cp.zeros_like(audio_data)
    
    # to spec.
    frequencies, times, spec_data = cupy_signal.spectrogram(
        audio_data, 
        fs=default_config["SAMPLE_RATE"], 
        nfft=default_config["N_FFT"], 
        nperseg=default_config["WIN_SIZE"], 
        noverlap=default_config["WIN_LAP"], 
        window='hann'
    )
    
    # Filter frequency range
    valid_freq = (frequencies >= default_config["MIN_FREQ"]) & (frequencies <= default_config["MAX_FREQ"])
    spec_data = spec_data[valid_freq, :]
    
    # Log
    spec_data = cp.log10(spec_data + 1e-20)
    
    # min/max normalize
    spec_data = spec_data - spec_data.min()
    spec_data = spec_data / spec_data.max()
    
    return spec_data.get()

In [7]:
meta_data = pd.read_csv(f"{default_config['DATA_PATH']}/train_metadata.csv")
meta_data.head(10)

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename
0,asbfly,[],['call'],39.2297,118.1987,Muscicapa dauurica,Asian Brown Flycatcher,Matt Slaymaker,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/134896,asbfly/XC134896.ogg
1,asbfly,[],['song'],51.403,104.6401,Muscicapa dauurica,Asian Brown Flycatcher,Magnus Hellström,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/164848,asbfly/XC164848.ogg
2,asbfly,[],['song'],36.3319,127.3555,Muscicapa dauurica,Asian Brown Flycatcher,Stuart Fisher,Creative Commons Attribution-NonCommercial-Sha...,2.5,https://www.xeno-canto.org/175797,asbfly/XC175797.ogg
3,asbfly,[],['call'],21.1697,70.6005,Muscicapa dauurica,Asian Brown Flycatcher,vir joshi,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/207738,asbfly/XC207738.ogg
4,asbfly,[],['call'],15.5442,73.7733,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin & Sergei Karpeev,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/209218,asbfly/XC209218.ogg
5,asbfly,[],['call'],15.5442,73.7733,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin & Sergei Karpeev,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/209219,asbfly/XC209219.ogg
6,asbfly,[],['clicks its bill'],42.5275,130.692,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin,Creative Commons Attribution-NonCommercial-Sha...,3.0,https://www.xeno-canto.org/267679,asbfly/XC267679.ogg
7,asbfly,[],"['call', 'male', 'song']",44.3602,132.7989,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/267680,asbfly/XC267680.ogg
8,asbfly,[],"['call', 'male', 'song']",44.3602,132.7989,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/267681,asbfly/XC267681.ogg
9,asbfly,[],['alarm call'],42.5275,130.692,Muscicapa dauurica,Asian Brown Flycatcher,Albert Lastukhin,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/267682,asbfly/XC267682.ogg


In [8]:
meta_data = meta_data[["primary_label", "filename"]]

label_list = sorted(meta_data["primary_label"].unique())
label_id_list = list(range(len(label_list)))
label2id = dict(zip(label_list, label_id_list))
id2label = dict(zip(label_id_list, label_list))

class_weights = meta_data.groupby(["primary_label"]).size().to_dict()
class_weights = dict((label2id[label], (num/len(meta_data))**(-0.5)) for (label, num) in class_weights.items())
class_weights


{0: 15.262465631176935,
 1: 14.97980291635521,
 2: 15.961868624944888,
 3: 23.313801348843423,
 4: 9.430896997731349,
 5: 69.94140404653027,
 6: 30.671334650301233,
 7: 30.671334650301233,
 8: 34.97070202326513,
 9: 6.994140404653026,
 10: 6.994140404653026,
 11: 31.923737249889776,
 12: 29.041587303442082,
 13: 9.553267785063618,
 14: 6.994140404653026,
 15: 13.988280809306053,
 16: 63.84747449977955,
 17: 10.28995222938945,
 18: 13.17072834380783,
 19: 11.890395405123137,
 20: 6.994140404653026,
 21: 59.111275211611336,
 22: 23.577242494328374,
 23: 43.375817438539855,
 24: 16.864370036979917,
 25: 37.93105200073355,
 26: 14.159223374090015,
 27: 12.76949489995591,
 28: 14.647615218148903,
 29: 34.97070202326513,
 30: 17.595669778008137,
 31: 20.71485609792872,
 32: 28.089143810376278,
 33: 10.79219294543183,
 34: 33.343256098838445,
 35: 17.93959161654526,
 36: 27.646767442144117,
 37: 6.994140404653026,
 38: 10.568109173681417,
 39: 6.994140404653026,
 40: 6.994140404653026,
 41: 1

In [9]:
list_audio = glob("inputs/train_audio/*/*.ogg")
audio_count = dict.fromkeys(label_list,0)
for audio in list_audio:
    audio_count[audio.split("\\")[-2]] +=1

audio_count = dict(sorted(audio_count.items(), key=lambda item: item[1]))
audio_count

{'asiope1': 5,
 'integr': 5,
 'niwpig1': 5,
 'blaeag1': 6,
 'wynlau1': 6,
 'bncwoo3': 7,
 'darter2': 7,
 'paisto1': 7,
 'wbbfly1': 7,
 'nilfly2': 8,
 'rutfly6': 8,
 'pomgrp2': 9,
 'malwoo1': 10,
 'indtit1': 12,
 'scamin3': 12,
 'brasta1': 13,
 'crfbar1': 13,
 'inpher1': 13,
 'kerlau2': 14,
 'junmyn1': 15,
 'isbduc1': 16,
 'redspu1': 16,
 'brfowl1': 17,
 'maltro1': 17,
 'rufbab3': 17,
 'jerbus2': 19,
 'barfly1': 20,
 'brwjac1': 20,
 'grynig2': 20,
 'lobsun2': 20,
 'malpar1': 20,
 'plaflo1': 20,
 'smamin1': 20,
 'sttwoo1': 20,
 'tilwar1': 21,
 'whbtre1': 21,
 'chbeat1': 22,
 'dafbab1': 22,
 'heswoo1': 22,
 'lesyel1': 22,
 'whbsho3': 22,
 'bkcbul1': 24,
 'crbsun2': 24,
 'eurbla2': 24,
 'sqtbul1': 24,
 'sbeowl1': 25,
 'aspfly1': 26,
 'aspswi1': 26,
 'moipig1': 27,
 'plhpar1': 28,
 'yebbab1': 28,
 'bkrfla1': 29,
 'maghor2': 30,
 'yebbul3': 30,
 'bwfshr1': 31,
 'comfla1': 32,
 'lewduc1': 33,
 'whbwag1': 34,
 'indrol2': 35,
 'spoowl1': 35,
 'insowl1': 36,
 'vehpar1': 36,
 'grehor1': 37,
 'pla

In [10]:
if default_config["LOAD_SPEC_DATA"]:
        all_bird_data = np.load(f'data/spec_center_5sec_256_256.npy', allow_pickle=True).item()
else:
        all_bird_data = dict()

        for i, row_metadata in tqdm(meta_data.iterrows()):

                # load ogg
                audio_data, _ = librosa.load(f"{default_config['DATA_PATH']}/train_audio/{row_metadata.filename}", sr=default_config["SAMPLE_RATE"])

                # crop
                n_copy = math.ceil(5 * default_config["SAMPLE_RATE"] / len(audio_data))
                if n_copy > 1: audio_data = np.concatenate([audio_data]*n_copy)

                start_idx = int(len(audio_data) / 2 - 2.5 * default_config["SAMPLE_RATE"])
                end_idx = int(start_idx + 5.0 * default_config["SAMPLE_RATE"])
                input_audio = audio_data[start_idx:end_idx]

                # ogg to spec.
                input_spec = oog2spec_via_cupy(input_audio)

                input_spec = cv2.resize(input_spec, (256, 256), interpolation=cv2.INTER_AREA)

                all_bird_data[row_metadata.filename] = input_spec.astype(np.float32)
        # save to file
        np.save(os.path.join("data", f'spec_center_5sec_256_256.npy'), all_bird_data)

In [11]:
class BirdCLEF_Dataset(torch.utils.data.Dataset):
    def __init__(self, df_data):
        super(BirdCLEF_Dataset).__init__()
        self.df_data = df_data

    def __len__(self):
        return len(self.df_data)
    
    def __getitem__(self, index):
        X = all_bird_data[self.df_data.iloc[index].filename]
        y = label2id[self.df_data.iloc[index].filename.split("/")[0]]

        return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

In [12]:
class BirdCLEF_Model_EfficientnetB0(nn.Module):
    def __init__(self, num_class):
        super(BirdCLEF_Model_EfficientnetB0, self).__init__()
        
        # self.backbone = efficientnet.efficientnet_b0(weights=efficientnet.EfficientNet_B0_Weights.DEFAULT)
        # self.backbone.classifier[1] = nn.Linear(self.backbone.classifier[1].in_features, num_class, dtype=torch.float32)
        self.backbone = timm.create_model('tf_efficientnet_b0.in1k', pretrained=True, num_classes=num_class)
    
    def forward(self, x):
        x = x.unsqueeze(-1)
        x = torch.cat([x, x, x], dim=3).permute(0,3,1,2)
        x = self.backbone(x)
        return x

In [14]:
ssk = StratifiedKFold(n_splits=default_config["FOLD"], shuffle=True, random_state=default_config["SEED"])
meta_data['fold'] = 0
for fold, (train_idx, val_idx) in enumerate(ssk.split(np.zeros(len(meta_data)), meta_data["primary_label"].to_numpy())):
    meta_data.loc[val_idx, 'fold'] = fold

In [17]:
if not os.path.exists(f"model/{default_config['VERSION']}"):
    os.makedirs(f"model/{default_config['VERSION']}")

for f in range(default_config["FOLD"]):
    run = wandb.init(project="BirdCLEF2024_LeviKaay", name=f"BaseModel_EfficientB0_Fold{f}_{default_config['VERSION']}", entity="Kaay", config=default_config)
    
    # main loop of f-fold
    print('================================================================')
    print(f"==== Running training for fold {f} ====")
    
    train_df = meta_data[meta_data['fold'] != f].copy()
    valid_df = meta_data[meta_data['fold'] == f].copy()
    print(f'Train Samples: {len(train_df)}')
    print(f'Valid Samples: {len(valid_df)}')

    train_dataset = BirdCLEF_Dataset(train_df)
    valid_dataset = BirdCLEF_Dataset(valid_df)

    #-------------Over sampling---------------
    sample_weights = [0] * len(train_dataset)
    for idx, (data, label) in enumerate(train_dataset):
        sample_weights[idx] = class_weights[label.item()]

    sampler = WeightedRandomSampler(sample_weights, num_samples=len(train_dataset), replacement=True)

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=default_config["BACTHSIZE"], sampler=sampler)
    valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=default_config["BACTHSIZE"], shuffle=True)

    model = BirdCLEF_Model_EfficientnetB0(num_class=len(label_list)).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=default_config["LABEL_SMOOTHING"])
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.00017, steps_per_epoch=len(train_dataloader), epochs=default_config["EPOCHS"], anneal_strategy='cos')

    # Training loop
    for epoch in range(default_config["EPOCHS"]):
        for idx,batch in enumerate(train_dataloader):
            inputs, targets = batch

            inputs = inputs.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            # Forward pass
            outputs = model(inputs)
            
            # Compute loss
            loss = criterion(outputs, targets)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            scheduler.step()
            print(f'Step {idx}/{len(train_dataloader)}, Loss: {loss.item():.4f}\r', end='', flush=True)
            wandb.log({"Learning Rate": scheduler.get_last_lr()[0]})
        
        model.eval()
        valid_step = []
        with torch.no_grad():
            for idx, batch in enumerate(valid_dataloader):
                inputs, targets = batch
                inputs = inputs.to(device)

                outputs = model(inputs)
                valid_step.append({"logits": outputs, "targets": targets})
            
            output_val = nn.Softmax(dim=1)(torch.cat([x['logits'] for x in valid_step], dim=0)).cpu().detach()
            target_val = torch.cat([x['targets'] for x in valid_step], dim=0).cpu().detach()

            val_loss = criterion(output_val, target_val)
            

            target_val = torch.nn.functional.one_hot(target_val, len(label_list))
            
            gt_df = pd.DataFrame(target_val.numpy().astype(np.float32), columns=label_list)
            pred_df = pd.DataFrame(output_val.numpy().astype(np.float32), columns=label_list)
            
            gt_df['id'] = [f'id_{i}' for i in range(len(gt_df))]
            pred_df['id'] = [f'id_{i}' for i in range(len(pred_df))]

            val_roc_auc = score(gt_df, pred_df, row_id_column_name='id')
        
        print(f"Epoch {epoch+1}/{default_config['EPOCHS']}, train_loss: {loss.item():.4f}, valid_loss: {val_loss:.4f} valid_roc_auc: {val_roc_auc:.4f}, lr: {scheduler.get_last_lr()}")
        wandb.log({"Training Loss": loss.item(),"Valid Loss": val_loss ,"Valid ROC_AUC": val_roc_auc})
    run.finish()
    torch.save(model.state_dict(), f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{f}.pt")


[34m[1mwandb[0m: Currently logged in as: [33mlhk[0m ([33mKaay[0m). Use [1m`wandb login --relogin`[0m to force relogin


==== Running training for fold 0 ====
Train Samples: 19567
Valid Samples: 4892
Epoch 1/10, train_loss: 4.7200, valid_loss: 5.1949 valid_roc_auc: 0.6892, lr: [4.762017696620708e-05]
Epoch 2/10, train_loss: 2.5456, valid_loss: 4.9934 valid_roc_auc: 0.9101, lr: [0.00012924034395422874]
Epoch 3/10, train_loss: 1.6466, valid_loss: 4.9195 valid_roc_auc: 0.9194, lr: [0.00016999999427681298]
Epoch 4/10, train_loss: 1.4605, valid_loss: 4.8795 valid_roc_auc: 0.9164, lr: [0.0001615688486050954]
Epoch 5/10, train_loss: 1.4952, valid_loss: 4.8715 valid_roc_auc: 0.9159, lr: [0.00013797237075300592]
Epoch 6/10, train_loss: 1.0352, valid_loss: 4.8616 valid_roc_auc: 0.9119, lr: [0.00010388413255096036]
Epoch 7/10, train_loss: 1.1337, valid_loss: 4.8734 valid_roc_auc: 0.9066, lr: [6.605572763967985e-05]
Epoch 8/10, train_loss: 0.9454, valid_loss: 4.8680 valid_roc_auc: 0.9101, lr: [3.197953554678399e-05]
Epoch 9/10, train_loss: 1.0734, valid_loss: 4.8722 valid_roc_auc: 0.9108, lr: [8.404764033341025e-06]

0,1
Learning Rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁
Training Loss,█▄▂▂▂▁▁▁▁▁
Valid Loss,█▄▂▁▁▁▁▁▁▁
Valid ROC_AUC,▁█████████

0,1
Learning Rate,0.0
Training Loss,0.96332
Valid Loss,4.87335
Valid ROC_AUC,0.91051


==== Running training for fold 1 ====
Train Samples: 19567
Valid Samples: 4892
Epoch 1/10, train_loss: 4.4492, valid_loss: 5.1912 valid_roc_auc: 0.6933, lr: [4.762017696620708e-05]
Epoch 2/10, train_loss: 3.0492, valid_loss: 5.0032 valid_roc_auc: 0.9091, lr: [0.00012924034395422874]
Epoch 3/10, train_loss: 1.8206, valid_loss: 4.9370 valid_roc_auc: 0.9153, lr: [0.00016999999427681298]
Epoch 4/10, train_loss: 1.7194, valid_loss: 4.8975 valid_roc_auc: 0.9209, lr: [0.0001615688486050954]
Epoch 5/10, train_loss: 1.2753, valid_loss: 4.8796 valid_roc_auc: 0.9096, lr: [0.00013797237075300592]
Epoch 6/10, train_loss: 1.0884, valid_loss: 4.8700 valid_roc_auc: 0.9108, lr: [0.00010388413255096036]
Epoch 7/10, train_loss: 1.1489, valid_loss: 4.8706 valid_roc_auc: 0.9115, lr: [6.605572763967985e-05]
Epoch 8/10, train_loss: 0.9249, valid_loss: 4.8790 valid_roc_auc: 0.9108, lr: [3.197953554678399e-05]
Epoch 9/10, train_loss: 0.9194, valid_loss: 4.8806 valid_roc_auc: 0.9103, lr: [8.404764033341025e-06]

0,1
Learning Rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁
Training Loss,█▅▃▃▂▁▁▁▁▁
Valid Loss,█▄▂▂▁▁▁▁▁▁
Valid ROC_AUC,▁█████████

0,1
Learning Rate,0.0
Training Loss,0.92431
Valid Loss,4.88115
Valid ROC_AUC,0.90932


==== Running training for fold 2 ====
Train Samples: 19567
Valid Samples: 4892
Epoch 1/10, train_loss: 4.3380, valid_loss: 5.1935 valid_roc_auc: 0.6787, lr: [4.762017696620708e-05]
Epoch 2/10, train_loss: 3.0960, valid_loss: 5.0184 valid_roc_auc: 0.9030, lr: [0.00012924034395422874]
Epoch 3/10, train_loss: 2.6658, valid_loss: 4.9302 valid_roc_auc: 0.9212, lr: [0.00016999999427681298]
Epoch 4/10, train_loss: 1.7718, valid_loss: 4.9046 valid_roc_auc: 0.9036, lr: [0.0001615688486050954]
Epoch 5/10, train_loss: 1.1036, valid_loss: 4.8774 valid_roc_auc: 0.9139, lr: [0.00013797237075300592]
Epoch 6/10, train_loss: 1.2460, valid_loss: 4.8721 valid_roc_auc: 0.9030, lr: [0.00010388413255096036]
Epoch 7/10, train_loss: 1.0059, valid_loss: 4.8707 valid_roc_auc: 0.9078, lr: [6.605572763967985e-05]
Epoch 8/10, train_loss: 1.0339, valid_loss: 4.8729 valid_roc_auc: 0.9071, lr: [3.197953554678399e-05]
Epoch 9/10, train_loss: 0.9092, valid_loss: 4.8794 valid_roc_auc: 0.9047, lr: [8.404764033341025e-06]

0,1
Learning Rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁
Training Loss,█▅▅▃▁▂▁▁▁▁
Valid Loss,█▄▂▂▁▁▁▁▁▁
Valid ROC_AUC,▁▇█▇█▇████

0,1
Learning Rate,0.0
Training Loss,0.91189
Valid Loss,4.88059
Valid ROC_AUC,0.90559


==== Running training for fold 3 ====
Train Samples: 19567
Valid Samples: 4892
Epoch 1/10, train_loss: 4.9047, valid_loss: 5.1936 valid_roc_auc: 0.6921, lr: [4.762017696620708e-05]
Epoch 2/10, train_loss: 3.2327, valid_loss: 5.0075 valid_roc_auc: 0.9074, lr: [0.00012924034395422874]
Epoch 3/10, train_loss: 1.5500, valid_loss: 4.9216 valid_roc_auc: 0.9132, lr: [0.00016999999427681298]
Epoch 4/10, train_loss: 1.0821, valid_loss: 4.8877 valid_roc_auc: 0.9144, lr: [0.0001615688486050954]
Epoch 5/10, train_loss: 1.3709, valid_loss: 4.8750 valid_roc_auc: 0.9150, lr: [0.00013797237075300592]
Epoch 6/10, train_loss: 1.4953, valid_loss: 4.8524 valid_roc_auc: 0.9161, lr: [0.00010388413255096036]
Epoch 7/10, train_loss: 1.0497, valid_loss: 4.8642 valid_roc_auc: 0.9145, lr: [6.605572763967985e-05]
Epoch 8/10, train_loss: 0.9510, valid_loss: 4.8595 valid_roc_auc: 0.9157, lr: [3.197953554678399e-05]
Epoch 9/10, train_loss: 0.9322, valid_loss: 4.8693 valid_roc_auc: 0.9145, lr: [8.404764033341025e-06]

0,1
Learning Rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁
Training Loss,█▅▂▁▂▂▁▁▁▁
Valid Loss,█▄▂▂▁▁▁▁▁▁
Valid ROC_AUC,▁█████████

0,1
Learning Rate,0.0
Training Loss,0.90464
Valid Loss,4.8683
Valid ROC_AUC,0.91489


==== Running training for fold 4 ====
Train Samples: 19568
Valid Samples: 4891
Epoch 1/10, train_loss: 4.9674, valid_loss: 5.1895 valid_roc_auc: 0.7060, lr: [4.762017696620708e-05]
Epoch 2/10, train_loss: 3.2269, valid_loss: 5.0214 valid_roc_auc: 0.9076, lr: [0.00012924034395422874]
Epoch 3/10, train_loss: 2.3153, valid_loss: 4.9254 valid_roc_auc: 0.9159, lr: [0.00016999999427681298]
Epoch 4/10, train_loss: 1.3566, valid_loss: 4.8956 valid_roc_auc: 0.9182, lr: [0.0001615688486050954]
Epoch 5/10, train_loss: 1.2407, valid_loss: 4.8738 valid_roc_auc: 0.9209, lr: [0.00013797237075300592]
Epoch 6/10, train_loss: 1.3115, valid_loss: 4.8787 valid_roc_auc: 0.9110, lr: [0.00010388413255096036]
Epoch 7/10, train_loss: 1.0099, valid_loss: 4.8630 valid_roc_auc: 0.9215, lr: [6.605572763967985e-05]
Epoch 8/10, train_loss: 1.1229, valid_loss: 4.8737 valid_roc_auc: 0.9200, lr: [3.197953554678399e-05]
Epoch 9/10, train_loss: 0.9501, valid_loss: 4.8747 valid_roc_auc: 0.9184, lr: [8.404764033341025e-06]

0,1
Learning Rate,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁▁▁▁
Training Loss,█▅▃▂▂▂▁▁▁▁
Valid Loss,█▄▂▂▁▁▁▁▁▁
Valid ROC_AUC,▁█████████

0,1
Learning Rate,0.0
Training Loss,1.16246
Valid Loss,4.87737
Valid ROC_AUC,0.91757


In [19]:
input_tensor = torch.randn(default_config["BACTHSIZE"], 256, 256)

### Export model to ONNX

In [20]:
  # input shape
input_names = ['x']
output_names = ['output']

for fold in range(default_config["FOLD"]):
    bird_model = BirdCLEF_Model_EfficientnetB0(num_class=len(label_list))
    weights = torch.load(f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{fold}.pt", map_location=torch.device('cpu'))
    bird_model.load_state_dict(weights)
    bird_model.eval()

    torch.onnx.export(bird_model, input_tensor, f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{fold}.onnx", input_names=input_names, output_names=output_names)

### Export model to OpenVino

In [21]:
from openvino.runtime import Core
import openvino as ov

In [22]:
for fold in range(default_config["FOLD"]):
    bird_model = BirdCLEF_Model_EfficientnetB0(num_class=len(label_list))
    weights = torch.load(f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{fold}.pt", map_location=torch.device('cpu'))
    bird_model.load_state_dict(weights)
    bird_model.eval()

    ov_model = ov.convert_model(bird_model, example_input=input_tensor)
    ov.save_model(ov_model, f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{fold}.xml")

In [None]:
ie = Core()
classification_model_xml = f"model/v0.4/BaseModel_EfficientB0_Fold0.xml"
model = ie.read_model(model=classification_model_xml)