### Loading dependencies

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import json

from modules.image_import_module import ImageDataset, get_subdirectories
from torch.utils.data import ConcatDataset, DataLoader
from torch.utils.data import random_split
from modules.conformer import Conformer

# Check if cuda is available and set random seed for reproducibility
cuda = torch.cuda.is_available()  
torch.manual_seed(0)
if cuda:
    print('cuda is available')
    torch.cuda.manual_seed_all(0)
else:
    print('cuda is not available')
device = torch.device('cuda' if cuda else 'cpu')

### Loading the training data. 
- Combining the training data from all the subdirectories into one big dataset. 
- Randomize and create dataloaders for train, dev and test.

Note that the training data (black-and-white spectrograms) are not included in this repository due to the file size (16GB). It can be downloaded on https://www.kaggle.com/datasets/forbo7/spectrograms-birdclef-2023.

In [16]:
# !!!Due to the size, the dataset if not included in the repository and should be downloaded from the following link: https://www.kaggle.com/datasets/forbo7/spectrograms-birdclef-2023
list_of_subdirectories = get_subdirectories('/spectrograms-birdclef-2023')
idx_to_name = {}
name_to_idx = {}

for i, path in enumerate(list_of_subdirectories):
    idx_to_name[i] = path.split('/')[-1]
    name_to_idx[path.split('/')[-1]] = i

list_of_datasets = []

# optional: load the idx_to_name and name_to_idx dictionaries from a json file
# with open('name_to_idx.json', 'r') as f:
#     name_to_idx =json.load(f)

# with open('idx_to_name.json', 'r') as f:
#     idx_to_name = json.load(f)

for i, path in enumerate(list_of_subdirectories):
    current_dataset = ImageDataset(name_to_idx, path)
    list_of_datasets.append(current_dataset)
    
complete_dataset = ConcatDataset(list_of_datasets)

# Splitting the dataset into training and development sets
train_size = int(0.90 * len(complete_dataset))
dev_size = int(0.05*len(complete_dataset)) 
test_size = len(complete_dataset)-train_size -dev_size

train_dataset, dev_dataset, test_dataset = random_split(complete_dataset, [train_size, dev_size, test_size])

# Creating the dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
dev_loader = DataLoader(dev_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True, num_workers=4)

### Instantiating the model.
Currently the parameters are set to match the dimensions of the input spectrogram images.

In [17]:
bird_classifier = Conformer(num_classes=264,input_channels=1,input_height=512, input_length=512, conv_kernel_size=31, num_heads=2, num_conformer_blocks=6,  subsampling_factor=2, device=device).to(device)

### Specifying hyperparameters, loss and optimizer.

In [8]:
epochs = 10 
learning_rate = 0.001

loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(bird_classifier.parameters(), lr=learning_rate)  


### Training loop
- Prints out the average loss and accuracy on the training data every epoch.
- Saves the model every 5 epochs.

In [None]:
for t in range(1,epochs+1):
    print(f"current epoch {t}")
    array_of_losses = np.array([]) 
    num_correct = 0
    num_total = 0
    for data, labels in train_loader:
        optimizer.zero_grad()
        data = data.to(device)
        labels = labels.to(device)
        output = bird_classifier(data)
        predict = torch.argmax(output, dim=-1)
        num_correct += torch.sum(labels==predict)
        num_total += len(predict)
        loss_val = loss(output, labels)
        loss_val.backward()

        optimizer.step() 
        array_of_losses = np.append(array_of_losses, loss_val.item())
    
    print(f"accuracy on training data in epoch {t} is {num_correct/num_total}")
    print(f"average loss is {np.mean(array_of_losses)}")
    if t%5 == 0 and t>0:
        torch.save(bird_classifier.state_dict(), 'results/bird_classifier_epoch_' + str(t) + '.pth')  

### Load previous model if desired

In [None]:
# bird_classifier.load_state_dict(torch.load('bird_classifier_epoch_10.pth', map_location=torch.device('cpu')))

### Test the model on the dev set to evaluate its performance and make changes

In [None]:
bird_classifier.to('cpu')
bird_classifier.eval()
num_total = 0
num_correct =0
with torch.no_grad():
    for data, labels in dev_loader:
        yhat = bird_classifier(data)
        pred = torch.argmax(yhat, dim =-1)
        num_correct +=torch.sum(pred==labels)
        num_total +=len(labels)
   
print(f" accuracy is {num_correct/num_total}")