In [1]:
import os
import torch
from torch.utils.data import Dataset


class YesNoDataset(Dataset):
    def __init__(self, base_dir, test_list_path=None, return_label_name=False):
        self.filepaths = []
        self.labels = []
        self.label_to_index = {"no": 0, "yes": 1}
        self.index_to_label = {0: "no", 1: "yes"}
        self.return_label_name = return_label_name

        # Prepare exclusion set from testing_list.txt
        excluded_filenames = set()
        if test_list_path and os.path.exists(test_list_path):
            with open(test_list_path, "r") as f:
                for line in f:
                    if line.startswith(("yes/", "no/")):
                        file_name = (
                            os.path.splitext(os.path.basename(line.strip()))[0] + ".pt"
                        )
                        excluded_filenames.add(file_name)

        # Load filepaths and labels
        for label in ["no", "yes"]:
            label_path = os.path.join(base_dir, label)
            if os.path.isdir(label_path):
                for file in os.listdir(label_path):
                    if file.endswith(".pt") and file not in excluded_filenames:
                        self.filepaths.append(os.path.join(label_path, file))
                        self.labels.append(self.label_to_index[label])

    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        spectrogram = torch.load(self.filepaths[idx])
        label_index = self.labels[idx]
        return spectrogram, (
            self.index_to_label[label_index] if self.return_label_name else label_index
        )

In [2]:
def pad_collate(batch):
    specs, labels = zip(*batch)
    max_len = max(spec.shape[-1] for spec in specs)
    padded_specs = []

    for spec in specs:
        pad_len = max_len - spec.shape[-1]
        padded_spec = torch.nn.functional.pad(spec, (0, pad_len), value=0)
        padded_specs.append(padded_spec)

    return torch.stack(padded_specs), list(labels)

In [3]:
from torch.utils.data import DataLoader

base_path = "../data/processed/train/audio"
test_list_path = "../data/raw/train/testing_list.txt"

dataset = YesNoDataset(base_path, test_list_path=test_list_path, return_label_name=True)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=pad_collate)

for spectrograms, labels in dataloader:
    print("Spectrogram shape:", spectrograms.shape)
    print("Labels:", labels)
    break

  spectrogram = torch.load(self.filepaths[idx])


Spectrogram shape: torch.Size([16, 128, 81])
Labels: ['no', 'yes', 'yes', 'yes', 'yes', 'no', 'no', 'no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'yes', 'no']


In [4]:
yes_base_path = "../data/processed/train/audio/yes"
number_of_yes_files = len([f for f in os.listdir(yes_base_path) if f.endswith(".pt")])
print(f"Number of 'yes' files: {number_of_yes_files}")
no_base_path = "../data/processed/train/audio/no"
number_of_no_files = len([f for f in os.listdir(no_base_path) if f.endswith(".pt")])
print(f"Number of 'no' files: {number_of_no_files}")
print(f"we have removed {number_of_yes_files + number_of_no_files - 266*16} files from the train dataset to put them in the test set") 


Number of 'yes' files: 2377
Number of 'no' files: 2375
we have removed 496 files from the train dataset to put them in the test set


In [None]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from models import CNNRNNClassifier


model = CNNRNNClassifier(
    num_classes=2, input_freq_bins=128
)  


CNNRNNClassifier(
  (cnn): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (rnn): GRU(2048, 128, batch_first=True, bidirectional=True)
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=2, bias=True)
  )
)
