In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import torch
import torchvision
import torchaudio
from torchvision import transforms
import torchvision.models as models
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.datapipes.iter import FileLister
from torch.optim import lr_scheduler
import numpy as np
import pandas as pd
import os
import librosa
from sklearn.model_selection import StratifiedKFold, cross_val_score
import time
import copy
from tqdm import tqdm
from math import ceil

In [2]:
train_folder = "../input/birdclef-2022/train_audio"
meta_file_path = "../input/birdclef-2022/train_metadata.csv"

In [3]:
meta_data = pd.read_csv(meta_file_path)

In [4]:
classes = list(meta_data['primary_label'].unique())
print(len(classes))
print(classes)

152
['afrsil1', 'akekee', 'akepa1', 'akiapo', 'akikik', 'amewig', 'aniani', 'apapan', 'arcter', 'barpet', 'bcnher', 'belkin1', 'bkbplo', 'bknsti', 'bkwpet', 'blkfra', 'blknod', 'bongul', 'brant', 'brnboo', 'brnnod', 'brnowl', 'brtcur', 'bubsan', 'buffle', 'bulpet', 'burpar', 'buwtea', 'cacgoo1', 'calqua', 'cangoo', 'canvas', 'caster1', 'categr', 'chbsan', 'chemun', 'chukar', 'cintea', 'comgal1', 'commyn', 'compea', 'comsan', 'comwax', 'coopet', 'crehon', 'dunlin', 'elepai', 'ercfra', 'eurwig', 'fragul', 'gadwal', 'gamqua', 'glwgul', 'gnwtea', 'golphe', 'grbher3', 'grefri', 'gresca', 'gryfra', 'gwfgoo', 'hawama', 'hawcoo', 'hawcre', 'hawgoo', 'hawhaw', 'hawpet1', 'hoomer', 'houfin', 'houspa', 'hudgod', 'iiwi', 'incter1', 'jabwar', 'japqua', 'kalphe', 'kauama', 'laugul', 'layalb', 'lcspet', 'leasan', 'leater1', 'lessca', 'lesyel', 'lobdow', 'lotjae', 'madpet', 'magpet1', 'mallar3', 'masboo', 'mauala', 'maupar', 'merlin', 'mitpar', 'moudov', 'norcar', 'norhar2', 'normoc', 'norpin', 'norsh

In [5]:
SAMPLE_RATE = 44100
NUM_SAMPLES = SAMPLE_RATE*5
N_MELS = 256
HOP_LENGTH = 512
N_FFT = 4096
WINDOW = 1764
BATCH_SIZE = 128
EPOCHS = 2
LEARNING_RATE = 0.1
WORKERS = 0
NUM_CLASSES = 152
NUM_FOLDS = 10

NEED_MORE = 100
NUM_AUGS = 4

In [6]:
num_bird_samples = []
classes_need_augmentation = []
for class_name in classes:
    num = (meta_data.primary_label == class_name).sum()
    if num < NEED_MORE:
        classes_need_augmentation.append(class_name)
    num_bird_samples.append(num)

In [7]:
print(classes_need_augmentation)

['afrsil1', 'akekee', 'akepa1', 'akiapo', 'akikik', 'amewig', 'aniani', 'apapan', 'barpet', 'bkwpet', 'blkfra', 'blknod', 'bongul', 'brnboo', 'brnnod', 'brtcur', 'bubsan', 'buffle', 'bulpet', 'burpar', 'buwtea', 'cacgoo1', 'canvas', 'chbsan', 'chemun', 'chukar', 'cintea', 'compea', 'coopet', 'crehon', 'elepai', 'ercfra', 'fragul', 'glwgul', 'golphe', 'grefri', 'gresca', 'gryfra', 'hawama', 'hawcoo', 'hawcre', 'hawgoo', 'hawhaw', 'hawpet1', 'hoomer', 'hudgod', 'iiwi', 'incter1', 'jabwar', 'japqua', 'kalphe', 'kauama', 'laugul', 'layalb', 'lcspet', 'leasan', 'leater1', 'lessca', 'lobdow', 'lotjae', 'madpet', 'magpet1', 'masboo', 'mauala', 'maupar', 'merlin', 'mitpar', 'norhar2', 'nutman', 'oahama', 'omao', 'pagplo', 'palila', 'parjae', 'pecsan', 'peflov', 'pomjae', 'puaioh', 'reccar', 'redava', 'redjun', 'redpha1', 'refboo', 'rempar', 'rettro', 'ribgul', 'rinduc', 'ruff', 'semplo', 'shtsan', 'snogoo', 'sooshe', 'sooter1', 'sopsku1', 'wantat1', 'warwhe1', 'wessan', 'wetshe', 'whfibi', 'wh

In [8]:
print(num_bird_samples)
print(len(classes_need_augmentation))

[16, 6, 10, 14, 2, 51, 12, 47, 196, 15, 462, 100, 271, 179, 3, 41, 14, 56, 135, 13, 20, 500, 24, 5, 15, 7, 20, 52, 38, 134, 318, 10, 176, 120, 16, 12, 39, 16, 177, 208, 55, 500, 194, 3, 2, 476, 14, 6, 340, 32, 239, 108, 30, 463, 11, 101, 17, 16, 48, 273, 21, 16, 20, 9, 3, 3, 20, 322, 500, 8, 37, 8, 78, 43, 17, 12, 79, 3, 13, 84, 99, 7, 149, 83, 44, 19, 16, 500, 27, 3, 1, 74, 59, 142, 500, 31, 387, 112, 106, 78, 14, 21, 228, 53, 7, 62, 75, 38, 186, 162, 7, 3, 42, 36, 75, 39, 40, 39, 32, 78, 30, 248, 156, 311, 192, 35, 151, 116, 77, 128, 3, 500, 97, 11, 22, 6, 146, 107, 123, 101, 19, 71, 343, 40, 23, 45, 28, 23, 76, 20, 67, 89]
105


In [9]:
!nvidia-smi

Tue Jul  5 17:52:50 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [13]:
class BirdDataset(Dataset):
    def __init__(self, train_folder, transformation, sample_rate, num_samples, device):
        super().__init__()
        self.device = device
        self.train_files_list = list(FileLister(root=train_folder, recursive=True))
        self.train_files_list = _add_augmented_files(self.train_files_list)
        self.transformation = transformation
        self.sample_rate = sample_rate
        self.num_samples = num_samples
        _add_filenames_for_generation()
        
    def _add_filenames_for_generation(self):
        # loop by classes need more data
        for idx, class_name in enumerate(classes_need_augmentation):
            #Get number of files to be generated per original files
            num_orig_files = num_bird_samples[idx]
            num_gen_files = NEED_MORE - num_orig_files
            num_gen_files_per_orig = math.ceil( num_gen_files/num_orig_files ) 
            
            class_train_files = FileLister(root=train_folder+'/'+class_name)
            #Loop by original file name and add new filename and corresponding classname
            for count in range(num_orig_files):
                for i in range(num_gen_files_per_orig):
                    new_filename = class_train_files[count] + 'aug' + i
                    self.train_files_list.append(new_filename)
                
        
    def __len__(self):
        self.total_train_file = len(self.train_files_list)
        return self.total_train_file
        
    def list_all_files(self):
        return list(self.train_files_list)
    
    def _resample(self, signal, sr):
        if sr != self.sample_rate: 
            resampler = torchaudio.transforms.Resample(sr, self.sample_rate)
            signal = resampler(signal)
        return signal
    
    def _mix_channels(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal
    
    def _right_padding(self,signal):
        if signal.shape[1] < self.num_samples:
            num_missing_samples = self.num_samples - signal.shape[1]
            last_dim_pad = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_pad)
        return signal
        
    def _crop(self, signal):
        if signal.shape[1] > self.num_samples:
            signal = signal[:, :self.num_samples]
        return signal
        
    def __getitem__(self,index):
        filename = self.train_files_list[index]
        temp = filename.split(os.sep)
        label = temp[-2]
        label = classes.index(label)
        #label = label.to(self.device)
        signal, sr = torchaudio.load(filename)
        signal = self._resample(signal, sr)
        signal = signal.to(self.device)
        signal = self._mix_channels(signal)
        signal = self._right_padding(signal)
        signal = self._crop(signal)
        signal = self.transformation(signal)
        signal = torch.stack([signal[0],signal[0],signal[0]])
        return signal,label

In [43]:
mel_spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate=SAMPLE_RATE, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)

In [44]:
mean = np.array([0.5])
std = np.array([0.5])
data_transforms = transforms.Compose(
[
    mel_spectrogram.to(device),
    transforms.Normalize(mean, std).to(device)
])

In [45]:
bird_cleff_ds = BirdDataset(train_folder, data_transforms, SAMPLE_RATE, NUM_SAMPLES, device)

In [46]:
train_loader = DataLoader(bird_cleff_ds, batch_size=BATCH_SIZE, shuffle=True)

In [47]:
dataiter = iter(train_loader)
sample = dataiter.next()
s,l = sample

In [48]:
s.shape

torch.Size([128, 3, 256, 431])

In [None]:
print(l)

In [None]:
print(s[1][0])

In [None]:
def plot_spectrogram(spec, xmax=None):
    fig, axs = plt.subplots(1, 1, figsize=(10,4))
    axs.set_title("Mel-Spectrogram")
    axs.set_ylabel("mel-freq")
    axs.set_xlabel("frame")
    im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect="auto")
    if xmax:
        axs.set_xlim((0, xmax))
    fig.colorbar(im, ax=axs)
    plt.show(block=False)

In [None]:
plot_spectrogram(s[1][0].cpu())

In [None]:
def get_model():
    model = models.resnet34(pretrained=False)
    num_ftrs_last_layer = model.fc.in_features
    
    model.fc = nn.Linear(num_ftrs_last_layer, NUM_CLASSES)
    return model

In [None]:
model = get_model()
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

lr_step_sched = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

In [None]:
folds = StratifiedKFold(n_splits=NUM_FOLDS)

best_model_wts = copy.deepcopy(model.state_dict())

#Train:
for fold, (train_idxs, val_idxs) in enumerate(folds.split(meta_data, meta_data['primary_label'])):
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idxs)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_idxs)
    
    train_loader = DataLoader(bird_cleff_ds, batch_size=BATCH_SIZE, sampler=train_subsampler, num_workers=WORKERS)
    val_loader = DataLoader(bird_cleff_ds, batch_size=BATCH_SIZE, sampler=val_subsampler, num_workers=WORKERS)
    
    since = time.time()
    best_acc = 0
    
    
    for epoch in range(EPOCHS):
        for phase in ['train','val']:
            if phase == 'train':
                data_loader = train_loader
                num_samples = len(train_idxs)
                #Set the model into train mode
                model.train()
            else:
                data_loader = val_loader
                num_samples = len(val_idxs)
                #Set the model into eval mode
                model.eval()
                
            running_loss = 0.0
            running_num_corrects = 0

            #TQDM progress bar
            loop = tqdm(enumerate(data_loader), total=num_samples/BATCH_SIZE, leave=False)
            
            for i, (mel_specs, labels) in loop:
                mel_specs = mel_specs.to(device)
                labels = labels.to(device)
                #print(mel_specs.shape)

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(mel_specs)
                    #print(outputs)
                    _, preds = torch.max(outputs,1)
                    
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                
                loop.set_description(f"{phase}= Epoch [{epoch}/{EPOCHS}] | Fold [{fold}/{NUM_FOLDS}] ")
                loop.set_postfix(loss = loss.item())
            
                running_loss += loss.item()
                running_num_corrects += torch.sum(preds == labels)

            if phase == 'train':
                lr_step_sched.step()
            
            epoch_loss = (running_loss * BATCH_SIZE) / num_samples
            epoch_acc = running_num_corrects.double() / num_samples
            print(f"epoch loss :{epoch_loss} epoch acc :{epoch_acc}")    

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print(f"Training completed in {time_elapsed}")
    print(f"Best accuracy {best_acc}")    

In [None]:
torch.save(model.state_dict(), "bird_cleff_fine_tuned_model.pth")