In [3]:
import os
import torch
from torch.utils.data import Dataset


class YesNoDataset(Dataset):
    def __init__(self, base_dir, test_list_path=None, return_label_name=False, train=True):
        self.filepaths = []
        self.labels = []
        self.label_to_index = {"no": 0, "yes": 1}
        self.index_to_label = {0: "no", 1: "yes"}
        self.return_label_name = return_label_name

        included_files = set()

        # Load filenames from testing_list.txt if provided
        if test_list_path and os.path.exists(test_list_path):
            with open(test_list_path, "r") as f:
                for line in f:
                    if line.startswith(("yes/", "no/")):
                        file_name = os.path.splitext(os.path.basename(line.strip()))[0] + ".pt"
                        included_files.add(file_name)

        for label in ["no", "yes"]:
            label_path = os.path.join(base_dir, label)
            if not os.path.isdir(label_path):
                continue
            for file in os.listdir(label_path):
                if file.endswith(".pt"):
                    if (train and file not in included_files) or (not train and file in included_files):
                        self.filepaths.append(os.path.join(label_path, file))
                        self.labels.append(self.label_to_index[label])

    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        spectrogram = torch.load(self.filepaths[idx])
        label_index = self.labels[idx]
        return spectrogram, (
            self.index_to_label[label_index] if self.return_label_name else label_index
        )


In [4]:
def pad_collate(batch):
    specs, labels = zip(*batch)
    max_len = max(spec.shape[-1] for spec in specs)
    padded_specs = []

    for spec in specs:
        pad_len = max_len - spec.shape[-1]
        padded_spec = torch.nn.functional.pad(spec, (0, pad_len), value=0)
        padded_specs.append(padded_spec)

    return torch.stack(padded_specs), torch.tensor(labels)

In [5]:
from torch.utils.data import DataLoader

base_path = "../data/processed/train/audio"
test_list_path = "../data/raw/train/testing_list.txt"

train_dataset = YesNoDataset(base_path, test_list_path=test_list_path, return_label_name=False)
train_loader = DataLoader(
    train_dataset, batch_size=16, shuffle=True, collate_fn=pad_collate
)
test_loader = DataLoader(
    YesNoDataset(base_path, test_list_path=test_list_path, return_label_name=False, train=False),
    batch_size=16, shuffle=False, collate_fn=pad_collate
)

for spectrograms, labels in train_loader:
    print("Spectrogram shape:", spectrograms.shape)
    print("Labels:", labels)
    break

Spectrogram shape: torch.Size([16, 128, 81])
Labels: tensor([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1])


  spectrogram = torch.load(self.filepaths[idx])


In [6]:
len(train_loader.dataset), len(test_loader.dataset)

(4244, 508)

In [22]:
# ⚡ Check overlap between training and testing files
def check_train_test_overlap(train_dataset, test_dataset):
    # Récupère les noms de fichiers sans chemin
    train_files = set(os.path.basename(path) for path in train_dataset.filepaths)
    test_files = set(os.path.basename(path) for path in test_dataset.filepaths)

    # Intersection entre les deux sets
    overlap = train_files.intersection(test_files)

    if overlap:
        print(f"❌ Found {len(overlap)} overlapping files between train and test!")
        for f in list(overlap)[:10]:  # Affiche quelques exemples
            print(f"- {f}")
    else:
        print("✅ No overlap detected between train and test files.")


# Utilisation
check_train_test_overlap(train_loader.dataset, test_loader.dataset)

✅ No overlap detected between train and test files.


In [7]:
yes_base_path = "../data/processed/train/audio/yes"
number_of_yes_files = len([f for f in os.listdir(yes_base_path) if f.endswith(".pt")])
print(f"Number of 'yes' files: {number_of_yes_files}")
no_base_path = "../data/processed/train/audio/no"
number_of_no_files = len([f for f in os.listdir(no_base_path) if f.endswith(".pt")])
print(f"Number of 'no' files: {number_of_no_files}")
print(f"we have removed {number_of_yes_files + number_of_no_files - 266*16} files from the train dataset to put them in the test set") 


Number of 'yes' files: 2377
Number of 'no' files: 2375
we have removed 496 files from the train dataset to put them in the test set


In [26]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from models import CNNRNNClassifier


model = CNNRNNClassifier(
    num_classes=2, input_freq_bins=128
)  


In [8]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from models import CNNRNNClassifier
from mlflow_tracking.tracking_utils import (
    mlflow_run,
    log_metrics,
    log_learning_curves,
    log_confusion_matrix,
    log_checkpoint_model,
)

In [9]:
import os 
config = {
    "experiment_name": "speech-commands-yesno",
    "run_name": "cnn-rnn-v1",
    
    "registered_model_name": "YesNoClassifier",
    "num_epochs": 15,
    "learning_rate": 0.001,
    "batch_size": 32,
}


In [10]:
class EarlyStopper:
    def __init__(self, patience=2, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.best_epoch = -1

    def should_stop(self, score, epoch):
        if self.best_score is None or score > self.best_score + self.min_delta:
            self.best_score = score
            self.best_epoch = epoch
            self.counter = 0
            return False
        else:
            self.counter += 1
            return self.counter >= self.patience




In [40]:
import time 

@mlflow_run(config)
def train():
    model = CNNRNNClassifier(num_classes=2, input_freq_bins=128)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []

    early_stopper = EarlyStopper(patience=5, min_delta=0.001)
    best_model_path = None

    for epoch in range(config["num_epochs"]):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()

        for X, y in train_loader:
            X, y = X.to(device), torch.tensor(y).to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(y).sum().item()
            total += y.size(0)

        avg_loss = running_loss / len(train_loader)
        train_acc = correct / total
        elapsed_time = time.time() - start_time

        model.eval()

        with torch.no_grad():
            test_correct = 0
            test_total = 0
            test_running_loss = 0.0
            for X_test, y_test in test_loader:
                X_test, y_test = X_test.to(device), y_test.to(device)
                out = model(X_test)
                loss = criterion(out, y_test)  
                test_running_loss += loss.item()  
                pred = out.argmax(dim=1)
                test_correct += pred.eq(y_test).sum().item()
                test_total += y_test.size(0)
        test_acc = test_correct / test_total
        test_loss = test_running_loss / len(test_loader) 

        test_losses.append(test_loss)
        train_losses.append(avg_loss)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        log_metrics(
            {
                "loss": avg_loss,
                "test_loss": test_loss,
                "accuracy": train_acc,
                "test_accuracy": test_acc,
                "epoch_time_sec": elapsed_time,
            },
            step=epoch,
        )

        print(f"Epoch {epoch+1}/{config['num_epochs']}, Time: {elapsed_time:.2f}s")
        print(f"Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

        best_model_path = log_checkpoint_model(model, epoch)

        if early_stopper.should_stop(test_acc, epoch):
            print(
                f"Early stopping at epoch {epoch+1} — no improvement for {early_stopper.patience} epochs."
            )
            break

    log_learning_curves(
        train_metrics={"loss": train_losses, "accuracy": train_accuracies},
        val_metrics={"loss": test_losses, "accuracy": test_accuracies},
    )

    return {"model": model, "best_model_path": best_model_path}

In [41]:
train() ## 1st run 

2025/04/26 13:14:10 INFO mlflow.tracking.fluent: Experiment with name 'speech-commands-yesno' does not exist. Creating a new experiment.
  spectrogram = torch.load(self.filepaths[idx])
  X, y = X.to(device), torch.tensor(y).to(device)


Epoch 1/15, Time: 109.28s
Train Acc: 0.8504, Test Acc: 0.7264
Epoch 2/15, Time: 36.83s
Train Acc: 0.9489, Test Acc: 0.7087
Epoch 3/15, Time: 34.89s
Train Acc: 0.9574, Test Acc: 0.9409
Epoch 4/15, Time: 29.54s
Train Acc: 0.9616, Test Acc: 0.8307
Epoch 5/15, Time: 28.30s
Train Acc: 0.9715, Test Acc: 0.7480
Epoch 6/15, Time: 33.05s
Train Acc: 0.9713, Test Acc: 0.8425
Epoch 7/15, Time: 32.25s
Train Acc: 0.9760, Test Acc: 0.7795
Epoch 8/15, Time: 28.93s
Train Acc: 0.9750, Test Acc: 0.6732
Early stopping at epoch 8 — no improvement for 5 epochs.


Successfully registered model 'YesNoClassifier'.
Created version '1' of model 'YesNoClassifier'.


{'model': CNNRNNClassifier(
   (cnn): Sequential(
     (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): ReLU()
     (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
     (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (6): ReLU()
     (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
   )
   (rnn): GRU(2048, 128, batch_first=True, bidirectional=True)
   (classifier): Sequential(
     (0): Linear(in_features=256, out_features=128, bias=True)
     (1): ReLU()
     (2): Dropout(p=0.3, inplace=False)
     (3): Linear(in_features=128, out_features=2, bias=True)
   )
 ),
 'best_model_path': 'checkpoints\\model_epoch_7.pth'}

In [44]:
path = "../mlruns/758780538974589917/2da99e3c395442c290dfb6c0b5545c7c/artifacts/checkpoints/model_epoch_7.pth"
model = CNNRNNClassifier(num_classes=2, input_freq_bins=128)
model.load_state_dict(torch.load(path))
model.eval()

  model.load_state_dict(torch.load(path))


CNNRNNClassifier(
  (cnn): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (rnn): GRU(2048, 128, batch_first=True, bidirectional=True)
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=2, bias=True)
  )
)

In [46]:
# ✅ Check test_loader settings
print(
    "Test Loader shuffle:",
    (
        test_loader.shuffle
        if hasattr(test_loader, "shuffle")
        else "Unknown (probably False)"
    ),
)
print(
    "Test Loader drop_last:",
    (
        test_loader.drop_last
        if hasattr(test_loader, "drop_last")
        else "Unknown (probably False)"
    ),
)

# ✅ Check one batch shapes
for X_test, y_test in test_loader:
    print(
        f"One test batch - inputs shape: {X_test.shape}, labels shape: {y_test.shape}"
    )
    break

# ✅ Check model in eval mode during test
print("Model training mode during evaluation:", model.training)

Test Loader shuffle: Unknown (probably False)
Test Loader drop_last: False
One test batch - inputs shape: torch.Size([16, 128, 81]), labels shape: torch.Size([16])
Model training mode during evaluation: False


  spectrogram = torch.load(self.filepaths[idx])


In [47]:
for X_test, y_test in test_loader:
    print(X_test)

  spectrogram = torch.load(self.filepaths[idx])


tensor([[[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [2.0681e+01, 2.9830e+01, 1.3159e+00,  ..., 7.1225e+00,
          1.1014e+01, 1.4166e+00],
         [1.1135e+02, 1.6061e+02, 7.0849e+00,  ..., 3.8349e+01,
          5.9303e+01, 7.6273e+00],
         ...,
         [1.1978e+02, 1.5721e+01, 1.7837e+01,  ..., 5.6106e+01,
          1.2050e+02, 2.5192e+01],
         [2.2352e+01, 2.2527e+01, 8.1026e+01,  ..., 3.3934e+01,
          7.2726e+01, 8.3601e+01],
         [4.7812e+01, 3.4360e+01, 5.2796e+01,  ..., 4.8625e+01,
          7.9423e+01, 5.1802e+01]],

        [[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [7.2875e+00, 8.5712e+00, 5.6708e+00,  ..., 2.7118e+01,
          1.4740e+00, 2.3414e+01],
         [3.9238e+01, 4.6150e+01, 3.0533e+01,  ..., 1.4601e+02,
          7.9366e+00, 1.2607e+02],
         ...,
         [6.6197e+00, 4.9514e+01, 6.1003e+01,  ..., 8.9917e+01,
          3.263

In [1]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from models import CNNRNNClassifierDropout


model = CNNRNNClassifierDropout(num_classes=2, input_freq_bins=128)

In [None]:
import time
import torch.optim.lr_scheduler as lr_scheduler


@mlflow_run(config)
def train_v2():
    model = CNNRNNClassifierDropout(num_classes=2, input_freq_bins=128)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(), lr=config["learning_rate"], weight_decay=1e-4
    )
    scheduler = lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="max", patience=3, factor=0.5, verbose=True
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []

    early_stopper = EarlyStopper(patience=5, min_delta=0.001)
    best_model_path = None

    for epoch in range(config["num_epochs"]):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()
        print("Epoch:", epoch + 1)
        
        for X, y in train_loader:
            
            X, y = X.to(device), torch.tensor(y).to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(y).sum().item()
            total += y.size(0)

        avg_loss = running_loss / len(train_loader)
        train_acc = correct / total
        elapsed_time = time.time() - start_time

        model.eval()

        with torch.no_grad():
            test_correct = 0
            test_total = 0
            test_running_loss = 0.0
            for X_test, y_test in test_loader:
                X_test, y_test = X_test.to(device), y_test.to(device)
                out = model(X_test)
                loss = criterion(out, y_test)
                test_running_loss += loss.item()
                pred = out.argmax(dim=1)
                test_correct += pred.eq(y_test).sum().item()
                test_total += y_test.size(0)
        test_acc = test_correct / test_total
        test_loss = test_running_loss / len(test_loader)

        test_losses.append(test_loss)
        train_losses.append(avg_loss)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        log_metrics(
            {
                "loss": avg_loss,
                "test_loss": test_loss,
                "accuracy": train_acc,
                "test_accuracy": test_acc,
                "epoch_time_sec": elapsed_time,
                "current_lr": optimizer.param_groups[0]["lr"],
            },
            step=epoch,
        )

        print(f"Epoch {epoch+1}/{config['num_epochs']}, Time: {elapsed_time:.2f}s")
        print(f"Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

        best_model_path = log_checkpoint_model(model, epoch)

        # ✨ Step the scheduler on test_acc (maximize mode)
        scheduler.step(test_acc)

        # ⏹️ Early stopping
        if early_stopper.should_stop(test_acc, epoch):
            print(
                f"Early stopping at epoch {epoch+1} — no improvement for {early_stopper.patience} epochs."
            )
            break

    # 📈 Plot learning curves
    log_learning_curves(
        train_metrics={"loss": train_losses, "accuracy": train_accuracies},
        val_metrics={"loss": test_losses, "accuracy": test_accuracies},
    )

    return {"model": model, "best_model_path": best_model_path}

In [12]:
train_v2() ## 2nd run

  spectrogram = torch.load(self.filepaths[idx])
  X, y = X.to(device), torch.tensor(y).to(device)


Epoch 1/15, Time: 35.16s
Train Acc: 0.8181, Test Acc: 0.9311
Epoch 2/15, Time: 34.70s
Train Acc: 0.9204, Test Acc: 0.9035
Epoch 3/15, Time: 33.33s
Train Acc: 0.9331, Test Acc: 0.9488
Epoch 4/15, Time: 32.76s
Train Acc: 0.9434, Test Acc: 0.9252
Epoch 5/15, Time: 33.33s
Train Acc: 0.9555, Test Acc: 0.9291
Epoch 6/15, Time: 34.13s
Train Acc: 0.9519, Test Acc: 0.8819
Epoch 7/15, Time: 35.55s
Train Acc: 0.9611, Test Acc: 0.9272
Epoch 8/15, Time: 35.62s
Train Acc: 0.9658, Test Acc: 0.9370
Early stopping at epoch 8 — no improvement for 5 epochs.


Registered model 'YesNoClassifier' already exists. Creating a new version of this model...
Created version '2' of model 'YesNoClassifier'.


{'model': CNNRNNClassifierDropout(
   (cnn): Sequential(
     (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (2): ReLU()
     (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
     (4): Dropout2d(p=0.2, inplace=False)
     (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (7): ReLU()
     (8): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
     (9): Dropout2d(p=0.2, inplace=False)
   )
   (rnn): GRU(2048, 128, batch_first=True, bidirectional=True)
   (dropout_rnn_out): Dropout(p=0.3, inplace=False)
   (classifier): Sequential(
     (0): Linear(in_features=256, out_features=128, bias=True)
     (1): ReLU()
     (2): Dropout(p=0.3, inplace=False)
     (3): Linear(in_features=128, o