In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torchvision
import torchaudio
from torchvision import transforms
import torchvision.models as models
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.datapipes.iter import FileLister
from torch.optim import lr_scheduler
import numpy as np
import pandas as pd
import os
import librosa
from sklearn.model_selection import StratifiedKFold, cross_val_score
import time
import copy
from tqdm import tqdm
from math import ceil
import random

In [None]:
train_folder = "../input/birdclef-2022/train_audio"
meta_file_path = "../input/birdclef-2022/train_metadata.csv"

In [None]:
meta_data = pd.read_csv(meta_file_path)

In [None]:
classes = list(meta_data['primary_label'].unique())
print(len(classes))
print(classes)

In [None]:
SAMPLE_RATE = 44100
NUM_SAMPLES = SAMPLE_RATE*5
N_MELS = 431
HOP_LENGTH = 512
N_FFT = 4096
WINDOW = 1764
BATCH_SIZE = 64
EPOCHS = 5
LEARNING_RATE = 0.001
WORKERS = 0
NUM_CLASSES = 152
NUM_FOLDS = 10

NEED_MORE = 50
NUM_AUGS = 4

MIN_NOISE = 0.1
MAX_NOISE = 0.4
MIN_PITCH_SCALE = -2
MAX_PITCH_SCALE = 2
MIN_GAIN = 1
MAX_GAIN = 1.5

IMAGE_RESIZE = 215

In [None]:
aug_num_bird_samples = []
classes_need_augmentation = []
for class_name in classes:
    num = (meta_data.primary_label == class_name).sum()
    if num < NEED_MORE:
        classes_need_augmentation.append(class_name)
        aug_num_bird_samples.append(num)

In [None]:
print(classes_need_augmentation)

In [None]:
print(len(aug_num_bird_samples))
print(len(classes_need_augmentation))

In [None]:
!nvidia-smi

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
class BirdDataset(Dataset):
    def __init__(self, train_folder, transformation, augmentations, sample_rate, num_samples, device):
        super().__init__()
        self.device = device
        self.train_files_list = []#list(FileLister(root=train_folder, recursive=True))
        self.transformation = transformation
        self.augmentations = augmentations
        self.sample_rate = sample_rate
        self.num_samples = num_samples
        self._downsample()
        self._add_filenames_for_generation()
        
    def _downsample(self):
        for _, class_name in enumerate(classes):
            class_train_files = list(FileLister(root=train_folder+'/'+class_name))
            if len(class_train_files) > NEED_MORE:
                random.shuffle(list(class_train_files))
                for _, f_name in enumerate(class_train_files[: NEED_MORE]):
                    self.train_files_list.append(f_name)
            else:
                for _, f_name in enumerate(class_train_files):
                    self.train_files_list.append(f_name)
                
    
    def _add_filenames_for_generation(self):
        # loop by classes need more data
        for idx, class_name in enumerate(classes_need_augmentation):
            #Get number of files to be generated per original files
            num_orig_files = aug_num_bird_samples[idx]
            num_gen_files = NEED_MORE - num_orig_files
            num_gen_files_per_orig = ceil( num_gen_files/num_orig_files ) 
            
            class_train_files = list(FileLister(root=train_folder+'/'+class_name))
            #Loop by original file name and add new filename and corresponding classname
            for count in range(num_orig_files):
                for i in range(num_gen_files_per_orig):
                    new_filename = class_train_files[count] + 'augment'
                    self.train_files_list.append(new_filename)
        
    def __len__(self):
        self.total_train_file = len(self.train_files_list)
        return self.total_train_file
        
    def list_all_files(self):
        return list(self.train_files_list)
    
    def _resample(self, signal, sr):
        if sr != self.sample_rate: 
            resampler = torchaudio.transforms.Resample(sr, self.sample_rate)
            signal = resampler(signal)
        return signal
    
    def _mix_channels(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal
    
    def _right_padding(self,signal):
        if signal.shape[1] < self.num_samples:
            num_missing_samples = self.num_samples - signal.shape[1]
            last_dim_pad = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_pad)
        return signal
        
    def _crop(self, signal):
        if signal.shape[1] > self.num_samples:
            signal = signal[:, :self.num_samples]
        return signal
    
    def _aug_white_noise(self,signal):
        scale = np.random.uniform(MIN_NOISE, MAX_NOISE)
        noise = np.random.normal(0, signal.std(), signal.size)
        aug_sig = signal + noise*scale
        return aug_sig
        
    def __getitem__(self,index):
        filename = self.train_files_list[index]
        temp = filename.split(os.sep)
        label = temp[-2]
        if_aug = False
        if 'augment' in filename:
            if_aug = True
            filename = filename[:-7]
        label = classes.index(label)
        #label = label.to(self.device)
        signal, sr = torchaudio.load(filename)
        signal = self._resample(signal, sr)
        signal = signal.to(self.device)
        signal = self._mix_channels(signal)
        signal = self._right_padding(signal)
        signal = self._crop(signal)
        if if_aug == True:
            #signal = self._aug_white_noise(signal)
            signal = self.augmentations(signal)
        signal = self.transformation(signal)
        signal = torch.stack([signal[0],signal[0],signal[0]])
        return signal,label

In [None]:
pitch_scale = np.random.randint(MIN_PITCH_SCALE, MAX_PITCH_SCALE)
pitch_scaler = torchaudio.transforms.PitchShift(sample_rate= SAMPLE_RATE, n_steps=pitch_scale)

gain_scale = np.random.uniform(MIN_GAIN, MAX_GAIN)
gain_scaler = torchaudio.transforms.Vol(gain=gain_scale)

aug_transfms = [pitch_scaler.to(device),gain_scaler.to(device)]

augmentations = transforms.Compose(
[
    transforms.RandomChoice(aug_transfms)
])

In [None]:
mel_spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate=SAMPLE_RATE, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)

mean = np.array([0.5])
std = np.array([0.5])
data_transforms = transforms.Compose(
[
    mel_spectrogram.to(device),
    transforms.ToPILImage(),
    transforms.Resize([IMAGE_RESIZE, IMAGE_RESIZE]).to(device),
    transforms.ToTensor(),
    transforms.Normalize(mean, std).to(device)
])

In [None]:
bird_cleff_ds = BirdDataset(train_folder, data_transforms, augmentations, SAMPLE_RATE, NUM_SAMPLES, device)

In [None]:
bird_cleff_ds.__len__()

In [None]:
train_size = int(0.8 * bird_cleff_ds.__len__())
val_size = bird_cleff_ds.__len__() - train_size
trainset, valset = torch.utils.data.random_split(bird_cleff_ds, [train_size, val_size])

In [None]:
len(trainset), len(valset)

In [None]:
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)
valloader  = DataLoader(valset, batch_size=BATCH_SIZE, num_workers=WORKERS)

In [None]:
len(trainloader), len(valloader)

In [None]:
dataiter = iter(trainloader)
sample = dataiter.next()
s,l = sample

In [None]:
s.shape

In [None]:
def plot_spectrogram(spec, xmax=None):
    fig, axs = plt.subplots(1, 1, figsize=(10,4))
    axs.set_title("Mel-Spectrogram")
    axs.set_ylabel("mel-freq")
    axs.set_xlabel("frame")
    im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect="auto")
    if xmax:
        axs.set_xlim((0, xmax))
    fig.colorbar(im, ax=axs)
    plt.show(block=False)

In [None]:
plot_spectrogram(s[14][0].cpu())

In [None]:
del dataiter

In [None]:
del s
del l

In [None]:

def get_model():
    model = models.resnet50(pretrained=True)
    num_ftrs_last_layer = model.fc.in_features
    
    model.fc = nn.Linear(num_ftrs_last_layer, NUM_CLASSES)
    return model

In [None]:
#from torch.torchsummary import summary

model = get_model()
model = model.to(device)
print(model)
#summary(model, input_size=(64, 3, 215, 215))

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

#lr_step_sched = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

In [None]:
# Train and eval funcs:

def train_func(trainloader, model, optimizer, criterion):
    
    total_loss = 0.0
    model.train()
    
    #TQDM progress bar
    loop = tqdm(trainloader, total=len(trainloader), leave=False)
    
    for mel_specs, labels in loop:
        mel_specs = mel_specs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        logits = model(mel_specs)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        loop.set_postfix(loss = loss.item())
  
    return total_loss/ len(trainloader)

def eval_func(valloader, model, criterion):
    total_loss = 0.0
    model.eval()
    
    #TQDM progress bar
    loop = tqdm(valloader, total=len(valloader), leave=False)
    

    for mel_specs, labels in loop:
        mel_specs = mel_specs.to(device)
        labels = labels.to(device)

        logits = model(mel_specs)
        loss = criterion(logits, labels)

        total_loss += loss.item()
        loop.set_postfix(loss = loss.item())

    return total_loss/ len(valloader)

In [None]:
#Training

best_val_loss = np.Inf

for i in range(EPOCHS):
  train_loss = train_func( trainloader, model, optimizer, criterion)
  val_loss = eval_func(valloader, model, criterion)

  if val_loss < best_val_loss:
    torch.save(model.state_dict(), 'bird_cleff_fine_tuned_model.pt')
    print("Model saved!")
    best_val_loss = val_loss

  print(f"Epoch={i+1}, train_loss= {train_loss}, val_loss={val_loss}")