In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from birdCLEFloaddata import load_audiofile,load_metadata,get_melspectrogram
from birdCLEFFunctions import Dynamic_CNN, Dynamic_CNN2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import Resize
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

plt.rcParams['figure.dpi'] = 600


In [2]:
#load data
path = r"C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCLEFproject\data\birdCLEF2023"

meta_data = load_metadata(path)
meta_data.head(10)


Unnamed: 0,latitude,longitude,common_name,rating,filename
0,4.3906,38.2788,African Bare-eyed Thrush,4.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
1,-2.9524,38.2921,African Bare-eyed Thrush,3.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
2,-2.9524,38.2921,African Bare-eyed Thrush,3.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
3,-2.9524,38.2921,African Bare-eyed Thrush,5.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
4,-2.9524,38.2921,African Bare-eyed Thrush,4.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
5,-2.9524,38.2921,African Bare-eyed Thrush,3.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
6,-2.9965,37.6244,African Bare-eyed Thrush,3.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
7,-4.0904,37.8807,African Bare-eyed Thrush,5.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
8,-4.0904,37.8807,African Bare-eyed Thrush,5.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
9,4.8403,38.6988,African Bare-eyed Thrush,4.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...


In [3]:
#Generate audio data
audio_data = []
for i in range(50):
    audio, sr = load_audiofile(meta_data['filename'][i])
    audio_data.append(audio)

In [3]:
#Generate spectrograms
spectrograms = []
for i in range(100):
    spectrograms.append([get_melspectrogram(meta_data['filename'][i]),meta_data['common_name'][i]])

spectrograms = np.asarray(spectrograms)


In [25]:
import random
#Random selection of 100 data points, each point is taken in groups of 5
spectrograms = []
selected_spots = random.sample(range(len(meta_data) - 4), 100)  

for i in selected_spots:
    for j in range(i, i + 4):
        spectrograms.append([get_melspectrogram(meta_data['filename'][j]), meta_data['common_name'][j]])

spectrograms = np.asarray(spectrograms)


  spectrograms = np.asarray(spectrograms)
  spectrograms = np.asarray(spectrograms)


In [44]:
#Split data into validation and test. (Need to look at this again, possible mistake in the validation data creation)
train_ratio = 0.8
train_size = int(train_ratio * len(spectrograms))

train_labels = spectrograms[:train_size,1]
val_labels = spectrograms[train_size:,1]

label_mapping = {label: index for index, label in enumerate(set(train_labels))}

spectrograms[:train_size,1] = [label_mapping.get(label, -1)+1 for label in train_labels]
spectrograms[train_size:,1] = [label_mapping.get(label, -1)+1 for label in val_labels]

validation_set = []
training_set = []

#Split data 
for i, (spectrogram, label) in enumerate(spectrograms):
    shape = np.shape(spectrogram)
    if shape[1] >= 400:
        validation_data = spectrogram[:, :200]
        validation_set.append([validation_data, label])
        
        remaining_data = spectrogram[:, 200:]
        num_chunks = remaining_data.shape[1] // 200
        if num_chunks > 0:
            chunks = np.split(remaining_data[:, :num_chunks*200], num_chunks, axis=1)
            for chunk in chunks:
                training_set.append([chunk, label])


In [45]:
#Change this to using the dynamic class at some point
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(51200, num_classes)
        self.fc2 = nn.Linear(num_classes, num_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [46]:

#Load data into batches of 32
batch_size = 32
train_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)

# Initialize the CNN and use +1 for classes due to "no class" being labeled as -1
num_classes = len(set(train_labels))+1
cnn = CNN(num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=0.001)

# Training loop and attempt to use cuda
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cnn.to(device)

for epoch in range(num_epochs):
    print("epoch : ", epoch)
    cnn.train()
    train_loss = 0.0
    train_correct = 0
    i = 0 
    for images, labels in train_loader:
        #load data onto device, either gpu or cpu
        images = images.unsqueeze(1).to(device) 
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        train_correct += (predicted == labels).sum().item()
        i+=1
        
    # Validation loop
    cnn.eval()
    val_loss = 0.0
    val_correct = 0
    
    with torch.no_grad(): #Disables some calculations, used to reduce memory.
        for images, labels in val_loader:
        #load data onto device, either gpu or cpu
            images = images.unsqueeze(1).to(device)
            labels = labels.to(device)
            
            outputs = cnn(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
    
    train_loss /= len(train_loader.dataset)
    train_acc = train_correct / len(train_loader.dataset)
    val_loss /= len(val_loader.dataset)
    val_acc = val_correct / len(val_loader.dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

139
12
epoch :  0
Epoch 1/10:
Train Loss: 2256.2163 | Train Acc: 0.0339
Val Loss: 3.9896 | Val Acc: 0.0300
epoch :  1
Epoch 2/10:
Train Loss: 3.9373 | Train Acc: 0.0498
Val Loss: 3.9489 | Val Acc: 0.0300
epoch :  2
Epoch 3/10:
Train Loss: 3.8754 | Train Acc: 0.0738
Val Loss: 3.9106 | Val Acc: 0.1172
epoch :  3
Epoch 4/10:
Train Loss: 3.8209 | Train Acc: 0.1036
Val Loss: 3.8821 | Val Acc: 0.1172
epoch :  4
Epoch 5/10:
Train Loss: 3.7747 | Train Acc: 0.1038
Val Loss: 3.8539 | Val Acc: 0.1172
epoch :  5
Epoch 6/10:
Train Loss: 3.7279 | Train Acc: 0.1048
Val Loss: 3.8435 | Val Acc: 0.1172
epoch :  6
Epoch 7/10:
Train Loss: 3.6918 | Train Acc: 0.1036
Val Loss: 3.8270 | Val Acc: 0.1144
epoch :  7
Epoch 8/10:
Train Loss: 3.6577 | Train Acc: 0.1038
Val Loss: 3.8090 | Val Acc: 0.1172
epoch :  8
Epoch 9/10:
Train Loss: 3.6277 | Train Acc: 0.1043
Val Loss: 3.7923 | Val Acc: 0.1172
epoch :  9
Epoch 10/10:
Train Loss: 3.6047 | Train Acc: 0.1043
Val Loss: 3.7845 | Val Acc: 0.1172


In [47]:
print(num_classes)

56
