In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from birdCLEFloaddata import load_audiofile,load_metadata,get_melspectrogram
from birdCLEFFunctions import Dynamic_CNN, Dynamic_CNN2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import Resize
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

plt.rcParams['figure.dpi'] = 600


In [3]:
#load data
path = r"C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCLEFproject\data\birdCLEF2023"

meta_data = load_metadata(path)
meta_data.head(10)


Unnamed: 0,latitude,longitude,common_name,rating,filename
0,4.3906,38.2788,African Bare-eyed Thrush,4.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
1,-2.9524,38.2921,African Bare-eyed Thrush,3.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
2,-2.9524,38.2921,African Bare-eyed Thrush,3.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
3,-2.9524,38.2921,African Bare-eyed Thrush,5.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
4,-2.9524,38.2921,African Bare-eyed Thrush,4.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
5,-2.9524,38.2921,African Bare-eyed Thrush,3.5,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
6,-2.9965,37.6244,African Bare-eyed Thrush,3.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
7,-4.0904,37.8807,African Bare-eyed Thrush,5.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
8,-4.0904,37.8807,African Bare-eyed Thrush,5.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...
9,4.8403,38.6988,African Bare-eyed Thrush,4.0,C:\Users\zhakk\Desktop\Uni\Kandidat\AML-BirdCL...


In [14]:
print(len(set(meta_data['common_name'])))
print()


264


In [22]:
print(len(set(meta_data['common_name'].unique())))

264


In [12]:
pd.set_option('display.max_rows', None)
print(meta_data['common_name'].value_counts())

Barn Swallow                        500
Willow Warbler                      500
Thrush Nightingale                  500
Western Yellow Wagtail              500
Common Sandpiper                    500
Wood Sandpiper                      486
Common Buzzard                      477
European Bee-eater                  437
Eurasian Hoopoe                     436
Common House-Martin                 425
Little Egret                        378
Common Bulbul                       293
Red-backed Scrub-Robin              281
Black Kite                          262
Great Egret                         252
Gray-backed Camaroptera             239
Red-rumped Swallow                  227
Sombre Greenbul                     199
Collared Sunbird                    181
Rattling Cisticola                  172
Black-backed Puffback               166
Cattle Egret                        166
Tawny-flanked Prinia                161
Cape Robin-Chat                     153
Egyptian Goose                      152


In [3]:
#Generate audio data
audio_data = []
for i in range(50):
    audio, sr = load_audiofile(meta_data['filename'][i])
    audio_data.append(audio)

In [3]:
#Generate spectrograms
spectrograms = []
for i in range(100):
    spectrograms.append([get_melspectrogram(meta_data['filename'][i]),meta_data['common_name'][i]])

spectrograms = np.asarray(spectrograms)


In [46]:
#load one of each birds data
spectrograms = []

for common_name in meta_data['common_name'].unique():
    index = meta_data.loc[meta_data['common_name'] == common_name].index[0] 
    spectrogram = get_melspectrogram(meta_data['filename'][index])  
    spectrograms.append([spectrogram, common_name])

spectrograms = np.asarray(spectrograms)

  spectrograms = np.asarray(spectrograms)
  spectrograms = np.asarray(spectrograms)


In [25]:
import random
#Random selection of 100 data points, each point is taken in groups of 5
spectrograms = []
selected_spots = random.sample(range(len(meta_data) - 4), 100)  

for i in selected_spots:
    for j in range(i, i + 4):
        spectrograms.append([get_melspectrogram(meta_data['filename'][j]), meta_data['common_name'][j]])

spectrograms = np.asarray(spectrograms)


  spectrograms = np.asarray(spectrograms)
  spectrograms = np.asarray(spectrograms)


In [47]:
#Split data into validation and test. (Need to look at this again, possible mistake in the validation data creation)
train_ratio = 0.8
train_size = int(train_ratio * len(spectrograms))

train_labels = spectrograms[:,1]

label_mapping = {label: index for index, label in enumerate(set(train_labels))}

spectrograms[:,1] = [label_mapping.get(label, -1)+1 for label in train_labels]

validation_set = []
training_set = []

#Split data 
for i, (spectrogram, label) in enumerate(spectrograms):
    shape = np.shape(spectrogram)
    if shape[1] >= 100:
        validation_data = spectrogram[:, :50]
        validation_set.append([validation_data, label])
        
        remaining_data = spectrogram[:, 50:]
        num_chunks = remaining_data.shape[1] // 50
        if num_chunks > 0:
            chunks = np.split(remaining_data[:, :num_chunks*50], num_chunks, axis=1)
            for chunk in chunks:
                training_set.append([chunk, label])
    else: print(i,label)


In [48]:
train_labels

array([28, 47, 247, 240, 229, 183, 4, 169, 18, 154, 59, 236, 143, 22, 126,
       221, 102, 90, 141, 56, 222, 241, 49, 118, 204, 99, 113, 122, 190,
       173, 243, 146, 78, 228, 117, 194, 185, 215, 32, 128, 11, 82, 119,
       246, 5, 217, 227, 256, 189, 152, 98, 48, 164, 230, 158, 81, 15, 55,
       191, 137, 238, 153, 264, 96, 125, 140, 60, 192, 111, 193, 155, 263,
       159, 253, 89, 248, 46, 67, 21, 249, 103, 26, 139, 184, 66, 260,
       244, 33, 218, 168, 101, 16, 87, 3, 91, 177, 216, 182, 175, 88, 70,
       83, 261, 187, 105, 30, 10, 195, 257, 242, 145, 106, 258, 79, 112,
       123, 41, 170, 134, 196, 63, 165, 203, 209, 14, 144, 39, 254, 136,
       208, 114, 150, 163, 181, 133, 71, 61, 179, 174, 127, 245, 188, 197,
       29, 8, 234, 239, 13, 34, 237, 6, 17, 64, 121, 7, 36, 131, 19, 206,
       231, 138, 42, 156, 109, 73, 172, 259, 69, 147, 232, 157, 110, 43,
       20, 94, 129, 186, 176, 160, 161, 9, 74, 148, 51, 250, 104, 251,
       262, 1, 62, 213, 85, 65, 68, 132, 212,

In [40]:
#Change this to using the dynamic class at some point
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(12288, num_classes)
        self.fc2 = nn.Linear(num_classes, num_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [41]:

#Load data into batches of 32
batch_size = 16
train_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)

# Initialize the CNN and use +1 for classes due to "no class" being labeled as -1
num_classes = len(set(train_labels))+1
cnn = CNN(num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=0.001)

# Training loop and attempt to use cuda
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cnn.to(device)

for epoch in range(num_epochs):
    print("epoch : ", epoch)
    cnn.train()
    train_loss = 0.0
    train_correct = 0
    i = 0 
    for images, labels in train_loader:
        #load data onto device, either gpu or cpu
        images = images.unsqueeze(1).to(device) 
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        train_correct += (predicted == labels).sum().item()
        i+=1
        
    # Validation loop
    cnn.eval()
    val_loss = 0.0
    val_correct = 0
    
    with torch.no_grad(): #Disables some calculations, used to reduce memory.
        for images, labels in val_loader:
        #load data onto device, either gpu or cpu
            images = images.unsqueeze(1).to(device)
            labels = labels.to(device)
            
            outputs = cnn(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
    
    train_loss /= len(train_loader.dataset)
    train_acc = train_correct / len(train_loader.dataset)
    val_loss /= len(val_loader.dataset)
    val_acc = val_correct / len(val_loader.dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

epoch :  0
Epoch 1/10:
Train Loss: 505.1963 | Train Acc: 0.1772
Val Loss: 8.1931 | Val Acc: 0.1970
epoch :  1
Epoch 2/10:
Train Loss: 3.8682 | Train Acc: 0.2056
Val Loss: 8.7248 | Val Acc: 0.1932
epoch :  2
Epoch 3/10:
Train Loss: 3.7849 | Train Acc: 0.2077
Val Loss: 10.5389 | Val Acc: 0.1629
epoch :  3
Epoch 4/10:
Train Loss: 3.6802 | Train Acc: 0.2121
Val Loss: 11.6974 | Val Acc: 0.1932
epoch :  4
Epoch 5/10:
Train Loss: 3.6158 | Train Acc: 0.2138
Val Loss: 13.7607 | Val Acc: 0.2008
epoch :  5
Epoch 6/10:
Train Loss: 3.5458 | Train Acc: 0.2193
Val Loss: 14.5244 | Val Acc: 0.1780
epoch :  6
Epoch 7/10:
Train Loss: 3.4925 | Train Acc: 0.2137
Val Loss: 12.5247 | Val Acc: 0.1970
epoch :  7
Epoch 8/10:
Train Loss: 3.4952 | Train Acc: 0.2177
Val Loss: 12.1181 | Val Acc: 0.1818
epoch :  8
Epoch 9/10:
Train Loss: 3.4613 | Train Acc: 0.2243
Val Loss: 14.2698 | Val Acc: 0.1477
epoch :  9
Epoch 10/10:
Train Loss: 3.4898 | Train Acc: 0.2236
Val Loss: 16.6115 | Val Acc: 0.1136


In [49]:
print(num_classes)

212
