In [1]:
import os, random
import cv2
import math
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from collections import Counter

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import WeightedRandomSampler
from torchvision.models import efficientnet
from torchvision.transforms import transforms
# from efficientnet_pytorch import EfficientNet

import timm

import scikitplot as skplt
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit, KFold
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder

from glob import glob
from IPython.display import display, Audio

import cupy as cp
from cupyx.scipy import signal as cupy_signal
import yaml

from metric import score

import wandb

import plotly.graph_objects as go
import plotly.express as px

  from .autonotebook import tqdm as notebook_tqdm
  cupy._util.experimental('cupyx.jit.rawkernel')


In [24]:
loss = nn.BCEWithLogitsLoss()

inn = np.array([0,1,1,1], dtype=np.float32)
input = torch.from_numpy(inn)

tar = np.array([0,1,0,0], dtype=np.float32)
target = torch.from_numpy(tar)
target_smooth = torch.clamp(target.float(), 0.0025, 1.0 - 0.0025)
target_smooth = target_smooth + (0.0025 / target.size(1))
output = loss(input, target)
print(input)
print(target)
print(target_smooth)
print(output)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [2]:
def oog2spec_via_cupy(audio_data):
    
    audio_data = cp.array(audio_data)
    
    # handles NaNs
    mean_signal = cp.nanmean(audio_data)
    audio_data = cp.nan_to_num(audio_data, nan=mean_signal) if cp.isnan(audio_data).mean() < 1 else cp.zeros_like(audio_data)
    
    # to spec.
    frequencies, times, spec_data = cupy_signal.spectrogram(
        audio_data, 
        fs=48000, 
        nfft=1095, 
        nperseg=412, 
        noverlap=100, 
        window='hann'
    )
    
    # Filter frequency range
    valid_freq = (frequencies >= 40) & (frequencies <= 15000)
    spec_data = spec_data[valid_freq, :]
    
    # Log
    spec_data = cp.log10(spec_data + 1e-20)
    
    # min/max normalize
    spec_data = spec_data - spec_data.min()
    spec_data = spec_data / spec_data.max()
    
    return spec_data.get()

In [5]:
audio_data, _ = librosa.load("inputs/previous_dataset/ColumbiaAndCostaRica/soundscape_data/NES_001_S01_20190914_043000.flac", sr=48000)

# ogg to spec.
input_spec = oog2spec_via_cupy(audio_data)

# input_spec = cv2.resize(input_spec, (256, 256), interpolation=cv2.INTER_AREA)

In [2]:
default_config = {
    "VERSION": "v0.3",
    "DATA_PATH": "inputs",
    "LOAD_SPEC_DATA": True,
    "SEED": 24,
    "SAMPLE_RATE": 32000,
    "N_FFT": 1095,
    "WIN_SIZE": 412,
    "WIN_LAP": 100,
    "MIN_FREQ": 40,
    "MAX_FREQ": 15000,
    "EPOCHS": 10,
    "BACHSIZE": 16
}

try:
    with open('config.yaml', 'r') as f:
        default_config = yaml.load(f, Loader=yaml.SafeLoader)
except:
    pass

default_config

{'VERSION': 'v1.3',
 'DESCRIPTION': 'Get all 5s data',
 'DATA_PATH': 'inputs',
 'LOAD_SPEC_DATA': True,
 'SEED': 24,
 'SAMPLE_RATE': 32000,
 'N_FFT': 1095,
 'WIN_SIZE': 412,
 'WIN_LAP': 100,
 'MIN_FREQ': 40,
 'MAX_FREQ': 15000,
 'EPOCHS': 10,
 'FOLD': 5,
 'BACTHSIZE': 16,
 'LABEL_SMOOTHING': 0.0}

In [4]:
class BirdCLEF_Model_EfficientnetB0(nn.Module):
    def __init__(self, num_class):
        super(BirdCLEF_Model_EfficientnetB0, self).__init__()
        self.backbone = timm.create_model('tf_efficientnet_b0.in1k', pretrained=True, in_chans=1,  num_classes=num_class)
    
    def forward(self, x):
        x = self.backbone(x)
        return x

In [3]:
input_tensor = torch.randn(default_config["BACTHSIZE"], 1, 256, 256)

In [7]:
from openvino.runtime import Core
import openvino as ov

In [8]:
for fold in range(default_config["FOLD"]):
    bird_model = BirdCLEF_Model_EfficientnetB0(num_class=182)
    weights = torch.load(f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{fold}.pt", map_location=torch.device('cpu'))
    bird_model.load_state_dict(weights)
    bird_model.eval()

    ov_model = ov.convert_model(bird_model, example_input=input_tensor)
    ov.save_model(ov_model, f"model/{default_config['VERSION']}/BaseModel_EfficientB0_Fold{fold}.xml")

FileNotFoundError: [Errno 2] No such file or directory: 'model/v1.3/BaseModel_EfficientB0_Fold3.pt'