In [10]:
import os
import pandas as pd
import torchaudio
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch

torch.manual_seed(42)

class BirdDataset(Dataset):
    def __init__(self, root_dir, csv_file, class_info_file='class_info.csv', transform=None):
        self.labels_frame = pd.read_csv(os.path.join(root_dir, csv_file))
        self.root_dir = os.path.join(root_dir, "train" if "train" in csv_file else "test")
        self.transform = transform or T.MelSpectrogram()
        self.classes = pd.read_csv(os.path.join(root_dir, class_info_file))['class name'].tolist()
        self.num_classes = len(self.classes)

    def __getitem__(self, idx):
        # Load, resample and apply transformations here
        audio_name = os.path.join(self.root_dir, self.labels_frame.iloc[idx, 0])
        waveform, rate = torchaudio.load(audio_name, normalize=True)
        waveform = self.transform(waveform)

        # Handle multilabel cases for training data
        if 'class' in self.labels_frame.columns:
            labels = self.labels_frame.iloc[idx].get('class').split(',')
            target = np.zeros(self.num_classes, dtype=np.float32)
            for label in labels:
                target[self.classes.index(label)] = 1.0
            return waveform, torch.tensor(target)
        return waveform
    
    def __len__(self):
        return len(self.labels_frame)

train_dataset = BirdDataset("data", "train.csv")
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [11]:
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=5)
        self.pool = nn.MaxPool1d(kernel_size=4)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=5)
        self.fc1_size = 32 * 9998
        self.fc1 = nn.Linear(self.fc1_size, 64)
        self.fc2 = nn.Linear(64, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, self.fc1_size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.sigmoid(x)

model = SimpleCNN(len(train_dataset.classes)).cuda()


In [12]:
import torch.optim as optim

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10

for epoch in range(epochs):
    for waveforms, labels in train_loader:
        waveforms, labels = waveforms.cuda(), labels.cuda()
        optimizer.zero_grad()
        
        outputs = model(waveforms)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item()}")


Epoch 1/10 Loss: 0.05744423344731331
Epoch 2/10 Loss: 0.062306180596351624
Epoch 3/10 Loss: 0.060677461326122284
Epoch 4/10 Loss: 0.06008269637823105
Epoch 5/10 Loss: 0.056424882262945175
Epoch 6/10 Loss: 0.05195903033018112
Epoch 7/10 Loss: 0.059545136988162994
Epoch 8/10 Loss: 0.05917944014072418
Epoch 9/10 Loss: 0.059189558029174805
Epoch 10/10 Loss: 0.05804351717233658


In [14]:
from sklearn.metrics import f1_score

model.eval()
all_preds = []
all_labels = []

test_dataset = BirdDataset("data", "train.csv")  # Use train dataset for evaluation (change later)
test_loader = DataLoader(test_dataset, batch_size=32)

model.eval()
all_preds = []
all_labels_list = []

with torch.no_grad():
    for waveforms, labels in test_loader:
        waveforms = waveforms.cuda()
        outputs = model(waveforms)
        preds = (outputs > 0.5).float()
        all_preds.extend(preds.cpu().numpy())
        all_labels_list.extend(labels.cpu().numpy())

f1 = f1_score(all_labels_list, all_preds, average='weighted')
print(f"F1 Score: {f1}")

F1 Score: 0.004644682633297434
