In [90]:
%cd /home/skang/Documents/kaggle/bird_clef/notebook
%pwd

/home/skang/Documents/kaggle/bird_clef/notebook


'/home/skang/Documents/kaggle/bird_clef/notebook'

In [2]:
import os
import json
import tqdm
import random
import shutil
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
import torchaudio
import torchaudio.transforms as T
import seaborn as sns
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


# Make filepath

In [51]:
root_path = "../input/birdclef-2022/"
input_path = root_path + '/train_audio/'
out_path = "./train/"

meta_df = pd.read_csv(root_path + 'train_metadata.csv')
bird_label = list(meta_df["primary_label"].unique())


In [52]:
meta_df['filepath'] = input_path + "/" + meta_df['filename']

In [26]:
d = {filename: str('test') + "/" + filename for filename in meta_df.filename}

# make essential (label, sample_rate)

In [5]:
bird_label = list(meta_df['primary_label'].unique())
essentials= {"birds": bird_label, "sample_rate": 32000}
with open("test.json", "w") as f:
    json.dump(essentials, f)

In [6]:
with open("test.json") as f:
    essentials = json.load(f)

mapping = list(essentials['birds'])

In [7]:
mapping

['afrsil1',
 'akekee',
 'akepa1',
 'akiapo',
 'akikik',
 'amewig',
 'aniani',
 'apapan',
 'arcter',
 'barpet',
 'bcnher',
 'belkin1',
 'bkbplo',
 'bknsti',
 'bkwpet',
 'blkfra',
 'blknod',
 'bongul',
 'brant',
 'brnboo',
 'brnnod',
 'brnowl',
 'brtcur',
 'bubsan',
 'buffle',
 'bulpet',
 'burpar',
 'buwtea',
 'cacgoo1',
 'calqua',
 'cangoo',
 'canvas',
 'caster1',
 'categr',
 'chbsan',
 'chemun',
 'chukar',
 'cintea',
 'comgal1',
 'commyn',
 'compea',
 'comsan',
 'comwax',
 'coopet',
 'crehon',
 'dunlin',
 'elepai',
 'ercfra',
 'eurwig',
 'fragul',
 'gadwal',
 'gamqua',
 'glwgul',
 'gnwtea',
 'golphe',
 'grbher3',
 'grefri',
 'gresca',
 'gryfra',
 'gwfgoo',
 'hawama',
 'hawcoo',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'hoomer',
 'houfin',
 'houspa',
 'hudgod',
 'iiwi',
 'incter1',
 'jabwar',
 'japqua',
 'kalphe',
 'kauama',
 'laugul',
 'layalb',
 'lcspet',
 'leasan',
 'leater1',
 'lessca',
 'lesyel',
 'lobdow',
 'lotjae',
 'madpet',
 'magpet1',
 'mallar3',
 'masboo',
 'mauala',
 '

In [8]:
inverse_mapping = {v: k for k, v in enumerate(mapping)}

In [9]:
inverse_mapping

{'afrsil1': 0,
 'akekee': 1,
 'akepa1': 2,
 'akiapo': 3,
 'akikik': 4,
 'amewig': 5,
 'aniani': 6,
 'apapan': 7,
 'arcter': 8,
 'barpet': 9,
 'bcnher': 10,
 'belkin1': 11,
 'bkbplo': 12,
 'bknsti': 13,
 'bkwpet': 14,
 'blkfra': 15,
 'blknod': 16,
 'bongul': 17,
 'brant': 18,
 'brnboo': 19,
 'brnnod': 20,
 'brnowl': 21,
 'brtcur': 22,
 'bubsan': 23,
 'buffle': 24,
 'bulpet': 25,
 'burpar': 26,
 'buwtea': 27,
 'cacgoo1': 28,
 'calqua': 29,
 'cangoo': 30,
 'canvas': 31,
 'caster1': 32,
 'categr': 33,
 'chbsan': 34,
 'chemun': 35,
 'chukar': 36,
 'cintea': 37,
 'comgal1': 38,
 'commyn': 39,
 'compea': 40,
 'comsan': 41,
 'comwax': 42,
 'coopet': 43,
 'crehon': 44,
 'dunlin': 45,
 'elepai': 46,
 'ercfra': 47,
 'eurwig': 48,
 'fragul': 49,
 'gadwal': 50,
 'gamqua': 51,
 'glwgul': 52,
 'gnwtea': 53,
 'golphe': 54,
 'grbher3': 55,
 'grefri': 56,
 'gresca': 57,
 'gryfra': 58,
 'gwfgoo': 59,
 'hawama': 60,
 'hawcoo': 61,
 'hawcre': 62,
 'hawgoo': 63,
 'hawhaw': 64,
 'hawpet1': 65,
 'hoomer': 66,

# split train, test audio

In [10]:
from importlib.util import find_spec
if find_spec("bridclef") is None:
    import sys
    sys.path.append('..')
    
from birdclef.util import get_split_by_bird
meta_train, meta_test = get_split_by_bird(meta_df)

In [55]:
traintest_filename = {"trainval": list(meta_train.filename), "test": list(meta_test.filename)}

with open("/home/skang/Documents/kaggle/bird_clef/input/birdclef-2022/traintest_filename.json", "w") as f:
    json.dump(traintest_filename, f)

In [11]:
from birdclef.util import copy_split_audio

In [15]:
root_dir = "/home/skang/Documents/kaggle/bird_clef/input/birdclef-2022"
for meta, stage in zip([meta_train, meta_test], ["trainval", "test"]):
    copy_split_audio(meta, root_dir=root_dir, stage=stage)

In [56]:
with open("/home/skang/Documents/kaggle/bird_clef/input/birdclef-2022/traintest_filename.json") as f:
    split_names = json.load(f)

In [59]:
len(list(meta_train.filename))

11881

In [57]:
len(meta_train)

11881

In [47]:
len(meta_test)

2971

In [69]:
meta_df.primary_label.value_counts()

brnowl     500
skylar     500
norcar     500
mallar3    500
houspa     500
          ... 
puaioh       3
layalb       3
akikik       2
crehon       2
maupar       1
Name: primary_label, Length: 152, dtype: int64

In [75]:
meta_train.primary_label.value_counts()

skylar     401
mallar3    400
comsan     400
norcar     400
brnowl     400
          ... 
bkwpet       2
akikik       2
layalb       2
hawhaw       2
shtsan       1
Name: primary_label, Length: 152, dtype: int64

In [66]:
for i in meta_train.filename:
    if i not in list(meta_df[meta_df.filename.isin(split_names['trainval'])].filename):
        print(i)

In [74]:
len(meta_df[meta_df.filename.isin(split_names['test'])])

2971

# CNN output size

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
def add_importpath(package_name: str):
    from importlib.util import find_spec
    if find_spec(package_name) is None:
        import sys
        sys.path.append('..')

In [3]:
add_importpath("birdclef")

In [4]:
from birdclef.util import get_output_size_of_cnn

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
h_in = 256
w_in = 313
kernel_size = [3, 3]
padding = [1, 1]
stride = [1, 1]
pool = 0
get_output_size_of_cnn(h_in, w_in, kernel_size, stride, padding)

(256, 313)

# Loss

In [1]:
import torch

In [9]:
LOSS = "BCEWithLogitsLoss"

test = getattr(torch.nn, LOSS)

test()

In [2]:
t = torch.rand(2,3)

In [13]:
t[0] > 0.8

tensor([False, False,  True])

In [14]:
", ".join(["aa", "bb", "cc"])

'aa, bb, cc'

In [16]:
import json

In [17]:
with open("/home/skang/Documents/kaggle/birdclef2022/input/processed/birdclef2022/birdclef2022.json") as f:
    essentials = json.load(f)

mapping = list(essentials['birds'])

In [20]:
a = [0] * len(mapping)

In [29]:
", ".join([mapping[i] for i in range(len(mapping)) if a[i] == True])

'afrsil1, akiapo'

In [35]:
def _audio_to_mel_label(
    filepath,
    min_sec_proc,
    sample_rate,
    mel_converter,
    stage="trainval",
    data_index=0,
    label_list=[],
    bird_label=[],
    label_file=[],
):
    """오디오 파일을 mel spectrogram으로 변환 후 5초 간격으로 잘라서 저장

    Args:
        filepath (str): 오디오 파일 경로
        min_sec_proc (int): 자를 간격(5초) * sample rate
        sample_rate (int): 1초에 측정한 샘플 수
        mel_converter (torch.transform): mel_converter
        data_index (int, optional): 파일이름(인덱스). Defaults to 0.
        label_list (list, optional): 각 음원 파일 별 label 정보(target). Defaults to [].
        bird_label (list, optional): 전체 타겟 클래스 정보. Defaults to [].
        label_file (list, optional): 각 파일에 들어있는 타겟 정보. Defaults to [].

    Returns:
        _type_: _description_
    """

    label_file_all = np.zeros(len(bird_label))
    for label_file_temp in label_file:
        label_file_all += label_file_temp == bird_label
    label_file_all = np.clip(label_file_all, 0, 1)

    waveform, sample_rate_file = torchaudio.load(filepath=filepath)

    if sample_rate_file != sample_rate:
        resample = T.Resample(sample_rate_file, sample_rate)
        waveform = resample(waveform)

    wav_len = waveform.shape[1]
    waveform = to_mono(waveform)
    waveform = waveform.reshape(1, wav_len)

    waveform, wav_len = repeat_crop_waveform(waveform, min_sec_proc, wav_len)

    for index in range(int(wav_len / min_sec_proc)):
        log_melspec = torch.log10(
            mel_converter(
                waveform[0, index * min_sec_proc : index * min_sec_proc + min_sec_proc]
            ).unsqueeze(0)
            + 1e-10
        )  # 5초마다 자르기
        log_melspec = normalize_std(log_melspec)

        if not os.path.exists(PROCESSED_DATA_DIRNAME / stage):
            os.makedirs(PROCESSED_DATA_DIRNAME / stage)

        torch.save(log_melspec, PROCESSED_DATA_DIRNAME / stage / (str(data_index) + ".pt"))
        label_list.append(label_file_all)
        data_index += 1

    return data_index


def repeat_crop_waveform(waveform: torch.tensor, min_sec_proc, wav_len) -> torch.tensor:
    """
    정해진 길이만큼 오디오를 반복한후 자른다.
    
    Args:
        waveform(torch.tensor): 오디오 파일의 waveform
        min_sec : 최소 시간
    """

    if wav_len < min_sec_proc:
        for _ in range(round(min_sec_proc / wav_len)):
            waveform = torch.cat((waveform, waveform[:, 0:wav_len]), 1)
        wav_len = min_sec_proc
        waveform = waveform[:, 0:wav_len]

    return waveform, wav_len

In [None]:
def _save_mel_labels_essentials(
    df: pd.DataFrame, stage, min_sec_proc, mel_converter, sample_rate=32000
):
    """audio data를 mel spectrogram으로 변환한 후 5초 간격으로 나누어서 저장.

    Args:
        df (pd.DataFrame): 오디오 파일 metadata
    """
    if not os.path.exists(PROCESSED_DATA_DIRNAME):
        os.makedirs(PROCESSED_DATA_DIRNAME)
    bird_label = list(df["primary_label"].unique())
    essentials = {"birds": bird_label, "sample_rate": sample_rate}
    with open(ESSENTIALS_FILENAME, "w") as f:
        json.dump(essentials, f)

    data_index = 0
    label_list = []

    for i in range(len(df)):
        data_index = _audio_to_mel_label(
            df["filepath"].iloc[i],
            min_sec_proc,
            sample_rate,
            mel_converter,
            stage,
            data_index,
            label_list,
            bird_label,
            [df["primary_label"].iloc[i]] + eval(df["secondary_labels"].iloc[i]),
        )

    torch.save(np.stack(label_list), PROCESSED_DATA_DIRNAME / stage / "label_list.pt")

In [1]:
import torchaudio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
waveform, sample_rate_file = torchaudio.load(filepath="/home/skang/Documents/kaggle/birdclef2022/input/train_audio/afrsil1/XC125458.ogg")

torch.Size([1, 355265])

In [None]:
waveform, wav_len = repeat_crop_waveform(waveform: torch.tensor, 32000 * 5, wavefrom.shape[1])

In [37]:
import torch

t = torch.load("/home/skang/Documents/kaggle/birdclef2022/input/processed/birdclef2022/v1/trainval/label_list.pt")

In [32]:
import json
with open("/home/skang/Documents/kaggle/birdclef2022/input/processed/birdclef2022/v1/birdclef2022.json") as f:
    essentials = json.load(f)

In [55]:
y = np.array(essentials['birds'])

In [57]:
y == 'afrsil1'

array([ True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [51]:
import numpy as np

In [None]:
np.

In [53]:
t[0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [52]:
print(np.array(at[0]) == 'afrsil1')

False


  print(np.array(t[0]) == 'afrsil1')


In [None]:
label_file_all

In [36]:
y

['afrsil1',
 'akekee',
 'akepa1',
 'akiapo',
 'akikik',
 'amewig',
 'aniani',
 'apapan',
 'arcter',
 'barpet',
 'bcnher',
 'belkin1',
 'bkbplo',
 'bknsti',
 'bkwpet',
 'blkfra',
 'blknod',
 'bongul',
 'brant',
 'brnboo',
 'brnnod',
 'brnowl',
 'brtcur',
 'bubsan',
 'buffle',
 'bulpet',
 'burpar',
 'buwtea',
 'cacgoo1',
 'calqua',
 'cangoo',
 'canvas',
 'caster1',
 'categr',
 'chbsan',
 'chemun',
 'chukar',
 'cintea',
 'comgal1',
 'commyn',
 'compea',
 'comsan',
 'comwax',
 'coopet',
 'crehon',
 'dunlin',
 'elepai',
 'ercfra',
 'eurwig',
 'fragul',
 'gadwal',
 'gamqua',
 'glwgul',
 'gnwtea',
 'golphe',
 'grbher3',
 'grefri',
 'gresca',
 'gryfra',
 'gwfgoo',
 'hawama',
 'hawcoo',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'hoomer',
 'houfin',
 'houspa',
 'hudgod',
 'iiwi',
 'incter1',
 'jabwar',
 'japqua',
 'kalphe',
 'kauama',
 'laugul',
 'layalb',
 'lcspet',
 'leasan',
 'leater1',
 'lessca',
 'lesyel',
 'lobdow',
 'lotjae',
 'madpet',
 'magpet1',
 'mallar3',
 'masboo',
 'mauala',
 '

In [35]:
import numpy as np
("akekee" == np.array(y))

array([False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [45]:
[y[j] for j in range(len(t[0])) if t[2][j] == True]

['afrsil1', 'houspa', 'redava', 'zebdov']