In [90]:
%cd /home/skang/Documents/kaggle/bird_clef/notebook
%pwd

/home/skang/Documents/kaggle/bird_clef/notebook


'/home/skang/Documents/kaggle/bird_clef/notebook'

In [51]:
import os
import json
import tqdm
import random
import shutil
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
import torchaudio
import torchaudio.transforms as T
import seaborn as sns
import matplotlib.pyplot as plt

# Make filepath

In [12]:
root_path = "../input/birdclef-2022/"
input_path = root_path + '/train_audio/'
out_path = "./train/"

meta_df = pd.read_csv(root_path + 'train_metadata.csv')
bird_label = list(meta_df["primary_label"].unique())


In [53]:
meta_df['filepath'] = input_path + "/" + meta_df['filename']

In [55]:
meta_df.filename

0        afrsil1/XC125458.ogg
1        afrsil1/XC175522.ogg
2        afrsil1/XC177993.ogg
3        afrsil1/XC205893.ogg
4        afrsil1/XC207431.ogg
                 ...         
14847     zebdov/XC629769.ogg
14848     zebdov/XC642415.ogg
14849     zebdov/XC665873.ogg
14850     zebdov/XC666194.ogg
14851     zebdov/XC666195.ogg
Name: filename, Length: 14852, dtype: object

In [26]:
d = {filename: str('test') + "/" + filename for filename in meta_df.filename}

# make essential (label, sample_rate)

In [5]:
bird_label = list(meta_df['primary_label'].unique())
essentials= {"birds": bird_label, "sample_rate": 32000}
with open("test.json", "w") as f:
    json.dump(essentials, f)

In [6]:
with open("test.json") as f:
    essentials = json.load(f)

mapping = list(essentials['birds'])

In [7]:
mapping

['afrsil1',
 'akekee',
 'akepa1',
 'akiapo',
 'akikik',
 'amewig',
 'aniani',
 'apapan',
 'arcter',
 'barpet',
 'bcnher',
 'belkin1',
 'bkbplo',
 'bknsti',
 'bkwpet',
 'blkfra',
 'blknod',
 'bongul',
 'brant',
 'brnboo',
 'brnnod',
 'brnowl',
 'brtcur',
 'bubsan',
 'buffle',
 'bulpet',
 'burpar',
 'buwtea',
 'cacgoo1',
 'calqua',
 'cangoo',
 'canvas',
 'caster1',
 'categr',
 'chbsan',
 'chemun',
 'chukar',
 'cintea',
 'comgal1',
 'commyn',
 'compea',
 'comsan',
 'comwax',
 'coopet',
 'crehon',
 'dunlin',
 'elepai',
 'ercfra',
 'eurwig',
 'fragul',
 'gadwal',
 'gamqua',
 'glwgul',
 'gnwtea',
 'golphe',
 'grbher3',
 'grefri',
 'gresca',
 'gryfra',
 'gwfgoo',
 'hawama',
 'hawcoo',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'hoomer',
 'houfin',
 'houspa',
 'hudgod',
 'iiwi',
 'incter1',
 'jabwar',
 'japqua',
 'kalphe',
 'kauama',
 'laugul',
 'layalb',
 'lcspet',
 'leasan',
 'leater1',
 'lessca',
 'lesyel',
 'lobdow',
 'lotjae',
 'madpet',
 'magpet1',
 'mallar3',
 'masboo',
 'mauala',
 '

In [8]:
inverse_mapping = {v: k for k, v in enumerate(mapping)}

In [9]:
inverse_mapping

{'afrsil1': 0,
 'akekee': 1,
 'akepa1': 2,
 'akiapo': 3,
 'akikik': 4,
 'amewig': 5,
 'aniani': 6,
 'apapan': 7,
 'arcter': 8,
 'barpet': 9,
 'bcnher': 10,
 'belkin1': 11,
 'bkbplo': 12,
 'bknsti': 13,
 'bkwpet': 14,
 'blkfra': 15,
 'blknod': 16,
 'bongul': 17,
 'brant': 18,
 'brnboo': 19,
 'brnnod': 20,
 'brnowl': 21,
 'brtcur': 22,
 'bubsan': 23,
 'buffle': 24,
 'bulpet': 25,
 'burpar': 26,
 'buwtea': 27,
 'cacgoo1': 28,
 'calqua': 29,
 'cangoo': 30,
 'canvas': 31,
 'caster1': 32,
 'categr': 33,
 'chbsan': 34,
 'chemun': 35,
 'chukar': 36,
 'cintea': 37,
 'comgal1': 38,
 'commyn': 39,
 'compea': 40,
 'comsan': 41,
 'comwax': 42,
 'coopet': 43,
 'crehon': 44,
 'dunlin': 45,
 'elepai': 46,
 'ercfra': 47,
 'eurwig': 48,
 'fragul': 49,
 'gadwal': 50,
 'gamqua': 51,
 'glwgul': 52,
 'gnwtea': 53,
 'golphe': 54,
 'grbher3': 55,
 'grefri': 56,
 'gresca': 57,
 'gryfra': 58,
 'gwfgoo': 59,
 'hawama': 60,
 'hawcoo': 61,
 'hawcre': 62,
 'hawgoo': 63,
 'hawhaw': 64,
 'hawpet1': 65,
 'hoomer': 66,

# split train, test audio

In [10]:
from importlib.util import find_spec
if find_spec("bridclef") is None:
    import sys
    sys.path.append('..')
    
from birdclef.util import get_split_by_bird
meta_train, meta_test = get_split_by_bird(meta_df)

In [55]:
traintest_filename = {"trainval": list(meta_train.filename), "test": list(meta_test.filename)}

with open("/home/skang/Documents/kaggle/bird_clef/input/birdclef-2022/traintest_filename.json", "w") as f:
    json.dump(traintest_filename, f)

In [11]:
from birdclef.util import copy_split_audio

In [15]:
root_dir = "/home/skang/Documents/kaggle/bird_clef/input/birdclef-2022"
for meta, stage in zip([meta_train, meta_test], ["trainval", "test"]):
    copy_split_audio(meta, root_dir=root_dir, stage=stage)

In [56]:
with open("/home/skang/Documents/kaggle/bird_clef/input/birdclef-2022/traintest_filename.json") as f:
    split_names = json.load(f)

In [59]:
len(list(meta_train.filename))

11881

In [57]:
len(meta_train)

11881

In [47]:
len(meta_test)

2971

In [69]:
meta_df.primary_label.value_counts()

brnowl     500
skylar     500
norcar     500
mallar3    500
houspa     500
          ... 
puaioh       3
layalb       3
akikik       2
crehon       2
maupar       1
Name: primary_label, Length: 152, dtype: int64

In [75]:
meta_train.primary_label.value_counts()

skylar     401
mallar3    400
comsan     400
norcar     400
brnowl     400
          ... 
bkwpet       2
akikik       2
layalb       2
hawhaw       2
shtsan       1
Name: primary_label, Length: 152, dtype: int64

In [66]:
for i in meta_train.filename:
    if i not in list(meta_df[meta_df.filename.isin(split_names['trainval'])].filename):
        print(i)

In [74]:
len(meta_df[meta_df.filename.isin(split_names['test'])])

2971

# CNN output size

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
def add_importpath(package_name: str):
    from importlib.util import find_spec
    if find_spec(package_name) is None:
        import sys
        sys.path.append('..')

In [3]:
add_importpath("birdclef")

In [4]:
from birdclef.util import get_output_size_of_cnn

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
h_in = 256
w_in = 313
kernel_size = [3, 3]
padding = [1, 1]
stride = [1, 1]
pool = 0
get_output_size_of_cnn(h_in, w_in, kernel_size, stride, padding)

(256, 313)

# Loss

In [1]:
import torch

In [9]:
LOSS = "BCEWithLogitsLoss"

test = getattr(torch.nn, LOSS)

test()

In [2]:
t = torch.rand(2,3)

In [13]:
t[0] > 0.8

tensor([False, False,  True])

In [14]:
", ".join(["aa", "bb", "cc"])

'aa, bb, cc'

In [16]:
import json

In [17]:
with open("/home/skang/Documents/kaggle/birdclef2022/input/processed/birdclef2022/birdclef2022.json") as f:
    essentials = json.load(f)

mapping = list(essentials['birds'])

In [20]:
a = [0] * len(mapping)

In [29]:
", ".join([mapping[i] for i in range(len(mapping)) if a[i] == True])

'afrsil1, akiapo'

In [35]:
def _audio_to_mel_label(
    filepath,
    min_sec_proc,
    sample_rate,
    mel_converter,
    stage="trainval",
    data_index=0,
    label_list=[],
    bird_label=[],
    label_file=[],
):
    """오디오 파일을 mel spectrogram으로 변환 후 5초 간격으로 잘라서 저장

    Args:
        filepath (str): 오디오 파일 경로
        min_sec_proc (int): 자를 간격(5초) * sample rate
        sample_rate (int): 1초에 측정한 샘플 수
        mel_converter (torch.transform): mel_converter
        data_index (int, optional): 파일이름(인덱스). Defaults to 0.
        label_list (list, optional): 각 음원 파일 별 label 정보(target). Defaults to [].
        bird_label (list, optional): 전체 타겟 클래스 정보. Defaults to [].
        label_file (list, optional): 각 파일에 들어있는 타겟 정보. Defaults to [].

    Returns:
        _type_: _description_
    """

    label_file_all = np.zeros(len(bird_label))
    for label_file_temp in label_file:
        label_file_all += label_file_temp == bird_label
    label_file_all = np.clip(label_file_all, 0, 1)

    waveform, sample_rate_file = torchaudio.load(filepath=filepath)

    if sample_rate_file != sample_rate:
        resample = T.Resample(sample_rate_file, sample_rate)
        waveform = resample(waveform)

    wav_len = waveform.shape[1]
    waveform = to_mono(waveform)
    waveform = waveform.reshape(1, wav_len)

    waveform, wav_len = repeat_crop_waveform(waveform, min_sec_proc, wav_len)

    for index in range(int(wav_len / min_sec_proc)):
        log_melspec = torch.log10(
            mel_converter(
                waveform[0, index * min_sec_proc : index * min_sec_proc + min_sec_proc]
            ).unsqueeze(0)
            + 1e-10
        )  # 5초마다 자르기
        log_melspec = normalize_std(log_melspec)

        if not os.path.exists(PROCESSED_DATA_DIRNAME / stage):
            os.makedirs(PROCESSED_DATA_DIRNAME / stage)

        torch.save(log_melspec, PROCESSED_DATA_DIRNAME / stage / (str(data_index) + ".pt"))
        label_list.append(label_file_all)
        data_index += 1

    return data_index


def repeat_crop_waveform(waveform: torch.tensor, min_sec_proc, wav_len) -> torch.tensor:
    """
    정해진 길이만큼 오디오를 반복한후 자른다.
    
    Args:
        waveform(torch.tensor): 오디오 파일의 waveform
        min_sec : 최소 시간
    """

    if wav_len < min_sec_proc:
        for _ in range(round(min_sec_proc / wav_len)):
            waveform = torch.cat((waveform, waveform[:, 0:wav_len]), 1)
        wav_len = min_sec_proc
        waveform = waveform[:, 0:wav_len]

    return waveform, wav_len

In [None]:
def _save_mel_labels_essentials(
    df: pd.DataFrame, stage, min_sec_proc, mel_converter, sample_rate=32000
):
    """audio data를 mel spectrogram으로 변환한 후 5초 간격으로 나누어서 저장.

    Args:
        df (pd.DataFrame): 오디오 파일 metadata
    """
    if not os.path.exists(PROCESSED_DATA_DIRNAME):
        os.makedirs(PROCESSED_DATA_DIRNAME)
    bird_label = list(df["primary_label"].unique())
    essentials = {"birds": bird_label, "sample_rate": sample_rate}
    with open(ESSENTIALS_FILENAME, "w") as f:
        json.dump(essentials, f)

    data_index = 0
    label_list = []

    for i in range(len(df)):
        data_index = _audio_to_mel_label(
            df["filepath"].iloc[i],
            min_sec_proc,
            sample_rate,
            mel_converter,
            stage,
            data_index,
            label_list,
            bird_label,
            [df["primary_label"].iloc[i]] + eval(df["secondary_labels"].iloc[i]),
        )

    torch.save(np.stack(label_list), PROCESSED_DATA_DIRNAME / stage / "label_list.pt")

In [1]:
import torchaudio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
waveform, sample_rate_file = torchaudio.load(filepath="/home/skang/Documents/kaggle/birdclef2022/input/train_audio/afrsil1/XC125458.ogg")

torch.Size([1, 355265])

In [None]:
waveform, wav_len = repeat_crop_waveform(waveform: torch.tensor, 32000 * 5, wavefrom.shape[1])

In [37]:
import torch

t = torch.load("/home/skang/Documents/kaggle/birdclef2022/input/processed/birdclef2022/v1/trainval/label_list.pt")

'c:\\Users\\ftmlab\\Documents\\hyoon\\project_new\\kaggle\\birdclef2022\\notebook'

In [7]:
import os
from pathlib import Path
cur = Path(os.path.abspath(os.curdir))
import json
with open(cur / ".." / "input/processed/birdclef2022/v1/birdclef2022.json") as f:
    essentials = json.load(f)

In [11]:
import numpy as np
output_dim = len(essentials['birds'])
y = np.array(essentials['birds'])

In [10]:
y == 'afrsil1'

array([ True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [51]:
import numpy as np

In [None]:
np.

In [53]:
t[0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [52]:
print(np.array(at[0]) == 'afrsil1')

False


  print(np.array(t[0]) == 'afrsil1')


In [None]:
label_file_all

In [36]:
y

['afrsil1',
 'akekee',
 'akepa1',
 'akiapo',
 'akikik',
 'amewig',
 'aniani',
 'apapan',
 'arcter',
 'barpet',
 'bcnher',
 'belkin1',
 'bkbplo',
 'bknsti',
 'bkwpet',
 'blkfra',
 'blknod',
 'bongul',
 'brant',
 'brnboo',
 'brnnod',
 'brnowl',
 'brtcur',
 'bubsan',
 'buffle',
 'bulpet',
 'burpar',
 'buwtea',
 'cacgoo1',
 'calqua',
 'cangoo',
 'canvas',
 'caster1',
 'categr',
 'chbsan',
 'chemun',
 'chukar',
 'cintea',
 'comgal1',
 'commyn',
 'compea',
 'comsan',
 'comwax',
 'coopet',
 'crehon',
 'dunlin',
 'elepai',
 'ercfra',
 'eurwig',
 'fragul',
 'gadwal',
 'gamqua',
 'glwgul',
 'gnwtea',
 'golphe',
 'grbher3',
 'grefri',
 'gresca',
 'gryfra',
 'gwfgoo',
 'hawama',
 'hawcoo',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'hoomer',
 'houfin',
 'houspa',
 'hudgod',
 'iiwi',
 'incter1',
 'jabwar',
 'japqua',
 'kalphe',
 'kauama',
 'laugul',
 'layalb',
 'lcspet',
 'leasan',
 'leater1',
 'lessca',
 'lesyel',
 'lobdow',
 'lotjae',
 'madpet',
 'magpet1',
 'mallar3',
 'masboo',
 'mauala',
 '

In [35]:
import numpy as np
("akekee" == np.array(y))

array([False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [45]:
[y[j] for j in range(len(t[0])) if t[2][j] == True]

['afrsil1', 'houspa', 'redava', 'zebdov']

In [15]:
import torchvision
from torchvision.models.resnet import ResNet, BasicBlock
import torch.nn as nn

In [12]:
output_dim

152

In [18]:
resnet = torchvision.models.resnet18(pretrained=True)

In [26]:
resnet.conv1

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [104]:
torch.ones(1)

tensor([1.])

In [80]:
import torch.nn.functional as F
import torch
import torchvision

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [99]:
H_SPEC = 128
W_SPEC = 313
embedding_size = 1024
class ResNetBird(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = torchvision.models.resnet18(pretrained=True)
        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet = torch.nn.Sequential(*list(resnet.children())[:-2])
        self.pooling = GeM()
        o_h, o_w = H_SPEC // 32, W_SPEC // 32 + 1
        self.embedding = nn.Linear(512, embedding_size)
        self.fc = nn.Linear(embedding_size, output_dim)
        
    def forward(self, x: torch.Tensor):
        x = self.resnet(x)
        x = self.pooling(x).flatten(1)
        x = self.embedding(x)
        x = self.fc(x)
        
        return x

In [101]:
t = torch.rand((1, 1, H_SPEC, W_SPEC))
resnet_bird = ResNetBird()
output = resnet_bird(t)
output

tensor([[-4.9904e-01, -8.3791e-01, -1.2619e+00,  9.5162e-02,  9.9783e-01,
          6.2090e-01, -9.2301e-01,  1.0538e+00, -5.1902e-02,  2.0504e-01,
          8.7513e-01, -5.0454e-01,  1.0899e+00, -9.7472e-01,  5.2867e-01,
         -9.3032e-02, -1.3911e+00, -4.6960e-01, -1.3674e-01,  8.0066e-01,
         -1.6127e-01, -1.8084e+00,  9.3338e-01, -1.7764e-03, -3.8512e-01,
          1.0683e+00, -5.6962e-01, -2.4046e-01,  2.8832e-02,  5.6548e-01,
          4.0870e-01, -1.7609e+00, -4.1039e-01,  5.6884e-01,  7.6098e-02,
         -7.5229e-01,  3.9971e-01, -9.7782e-01, -1.1542e+00,  5.2280e-01,
          1.0622e+00, -3.7133e-01, -3.5100e-02,  9.7366e-01, -1.6001e-01,
         -6.1014e-01, -6.4755e-01,  5.4360e-01,  5.3232e-01, -8.8948e-01,
          9.9745e-02, -3.8787e-01,  2.2768e-02,  8.8781e-01, -1.8197e+00,
          9.9405e-02, -4.2266e-02,  2.8076e-01, -1.4109e+00,  1.0839e+00,
          7.5266e-01,  2.0765e-01, -1.2040e-01,  6.1410e-01,  1.3863e-01,
          4.3482e-01, -3.3620e-03,  7.

In [103]:
output.shape

torch.Size([1, 152])

In [94]:
W_SPEC // 32

9

In [33]:
from typing import List
def get_output_size_of_cnn(
    h_in,
    w_in,
    kernel_size: List[int],
    stride: List[int],
    padding: List[int],
    pool=0,
    dilation=[1, 1],
):
    """cnn 출력 이미지 크기를 반환한다.
    Args:
        h_in (int): 입력 이미지 높이
        w_in (int): 입력 이미지 너비
        kernel_size (List[int, int]): 커널 크기
        stride (List[int, int]): 스트라이드
        padding (List[int, int]): 패딩
        pool (int, optional): 풀링. Defaults to 0.
        dilation (list, optional): 커널사이의 간격. Defaults to [1, 1]

    Returns:
        int: 출력 이미지 크기(h, w)
    """

    h_out = np.floor(
        (h_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1
    )
    w_out = np.floor(
        (w_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1
    )

    if pool:
        h_out /= pool
        w_out /= pool

    return int(h_out), int(w_out)


In [43]:
resnet.layer3

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(256, eps=1

In [44]:
get_output_size_of_cnn(h_in=6, w_in=6, stride=[2, 2], padding=[3, 3], kernel_size=[7, 7])

(3, 3)

## V2

In [1]:
import torch

In [2]:
label = torch.load(r"C:\Users\ftmlab\Documents\hyoon\project_new\kaggle\birdclef2022\input\processed\birdclef2022\v2\trainval\label_list.pt")

In [9]:
label.sum(axis=0)

array([203.,   1., 298.,   0.,   0.,  44.,   0., 155.,  23.,   0.,   0.,
         0.,   5., 236.,   0.,   0., 156.,   0.,   0., 262.,   0.])

In [56]:
import numpy as np
t = np.array(["a", "b", "c", "d" "e"])

In [30]:
c = t["c" == t]

In [31]:
c

array(['c'], dtype='<U2')

In [17]:
("a" == t).sum() > 0

True

In [29]:
isinstance(t, np.ndarray)

True

In [32]:
import pandas as pd

meta = pd.DataFrame(columns=["filepath", "label"])

In [43]:
meta = meta.append({"filepath": "aa", "label": [0,1,0,0,0,0]}, ignore_index=True)

  meta = meta.append({"filepath": "aa", "label": [0,1,0,0,0,0]}, ignore_index=True)


In [50]:
t = pd.DataFrame([["aa", "bb"]], columns=["filepath", "label"])
pd.concat([meta, t], axis=0, ignore_index=True)

Unnamed: 0,filepath,label
0,aa,"[0, 1, 0, 0, 0, 0]"
1,aa,"[0, 1, 0, 0, 0, 0]"
2,aa,bb


In [71]:
label = t == "c"

In [72]:
test = list(label)

In [74]:
str(test)

'[False, False, True, False]'

In [132]:
test = np.clip(0, 1, test)

In [133]:
df = pd.DataFrame(columns=["label"])

In [135]:
df = pd.concat([df, pd.DataFrame([str(list(test))], columns=["label"])], axis=0, ignore_index=True)

In [136]:
df.label

0    [0, 0, 1, 0]
1    [0, 0, 1, 0]
Name: label, dtype: object

In [137]:
type(df.label[0])

str

In [138]:
df.label = df.label.map(lambda x: np.array(eval(x)))

In [139]:
df.label

0    [tensor(0), tensor(0), tensor(1), tensor(0)]
1    [tensor(0), tensor(0), tensor(1), tensor(0)]
Name: label, dtype: object

In [1]:
import pandas as pd
df = pd.read_csv(r"C:\Users\ftmlab\Documents\hyoon\project_new\kaggle\birdclef2022\input\processed\birdclef2022\v2_test\trainval\v2_test_meta.csv")

In [2]:
df["bird_name"] = df.filename.map(lambda x: x.split("/")[0])

In [7]:
df.bird_name.value_counts().index

Index(['others', 'warwhe1', 'iiwi', 'akiapo', 'houfin', 'apapan', 'hawama',
       'aniani'],
      dtype='object')

In [17]:
sampling_num = 100
birds = df.bird_name.unique()
df_new = df
for b in birds:
    if b == "others":
        continue
    df_new = pd.concat([df_new, df[df.bird_name == b].sample(n=sampling_num, replace=True, ignore_index=True)], ignore_index=True)

In [21]:
df_new.bird_name.value_counts()

warwhe1    362
others     296
iiwi       187
akiapo     108
houfin     105
apapan     103
hawama     102
aniani     101
Name: bird_name, dtype: int64

In [23]:
len(df[df.bird_name == "others"])

296

In [28]:
def oversampling(df, frac=None):
    
    df["bird_name"] = df.filename.map(lambda x: x.split("/")[0])
    
    if frac is None:
        frac = max(len(df.bird_name.unique()) - 1, 1)
        frac = 1 / frac
    
    sampling_num = int(len(df[df.bird_name == "others"]) * frac)
    birds = df.bird_name.unique()
    df_new = df
    for b in birds:
        if b == "others":
            continue
        df_new = pd.concat([df_new, df[df.bird_name == b].sample(n=sampling_num, replace=True, ignore_index=True)], ignore_index=True)
    # print("Before oversampling")
    # print(df.bird_name.value_counts())
    # print("After oversampling")
    # print(df_new.bird_name.value_counts())
    
    return df

In [29]:
import pandas as pd
df = pd.read_csv(r"C:\Users\ftmlab\Documents\hyoon\project_new\kaggle\birdclef2022\input\processed\birdclef2022\v2_test\trainval\v2_test_meta.csv")
df = oversampling(df)

Before oversampling
others     296
warwhe1    262
iiwi        87
akiapo       8
houfin       5
apapan       3
hawama       2
aniani       1
Name: bird_name, dtype: int64
After oversampling
warwhe1    304
others     296
iiwi       129
akiapo      50
houfin      47
apapan      45
hawama      44
aniani      43
Name: bird_name, dtype: int64


In [3]:
import pandas as pd
df = pd.read_csv(r"C:\Users\ftmlab\Documents\hyoon\project_new\kaggle\birdclef2022\input\processed\birdclef2022\v3_test\trainval\v3_test_meta.csv")

In [5]:
df["bird_name"] = df.filename.map(lambda x: x.split("/")[0])

In [7]:
max(df["bird_name"].value_counts())

193

In [13]:
df['filepath'] = input_path + "/" + df['filename']

In [15]:
from pathlib import Path

In [24]:
df.filepath[0].rsplit(".", 1)[0].rsplit("/")[-2]

'afrsil1'

In [23]:
Path(df.filepath[0].rsplit())

WindowsPath('../input/birdclef-2022/train_audio/afrsil1/fc8db52e-690d-4b0a-af87-e8a2b7702ccf')

In [31]:
import torch

In [32]:
t = torch.tensor([1, 2 , 3, 4])

In [34]:
output = torch.tensor([False, False, False, False])

In [36]:
output[t.argmax()] = True

In [37]:
output

tensor([False, False, False,  True])

In [38]:
import json
with open(r"C:\Users\ftmlab\Documents\hyoon\project_new\kaggle\birdclef2022\input\birdclef-2022\scored_birds.json") as f:
    t = json.load(f)

In [39]:
t

['akiapo',
 'aniani',
 'apapan',
 'barpet',
 'crehon',
 'elepai',
 'ercfra',
 'hawama',
 'hawcre',
 'hawgoo',
 'hawhaw',
 'hawpet1',
 'houfin',
 'iiwi',
 'jabwar',
 'maupar',
 'omao',
 'puaioh',
 'skylar',
 'warwhe1',
 'yefcan']