In [2]:
import torch
from torch.utils.data import Dataset

class SpectrogramDataset(Dataset):
    def __init__(self, X, y):
        """
        Args:
            X (np.ndarray): mixture spectrograms, shape (B, 128, 800)
            y (np.ndarray): source spectrograms, shape (B, 4, 128, 800)
        """
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [None]:
from torch.utils.data import DataLoader
import numpy as np

X_train = np.load('../data/processed/X_train.npy')
y_train = np.load('../data/processed/y_train.npy')
X_test = np.load('../data/processed/X_test.npy')
y_test = np.load('../data/processed/y_test.npy')

# Charger ton dataset
train_dataset = SpectrogramDataset(X_train, y_train)
test_dataset = SpectrogramDataset(X_test, y_test)

# Créer les DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)


In [4]:
class SI_SDR_Loss(nn.Module):
    def __init__(self, eps=1e-8):
        super().__init__()
        self.eps = eps

    def forward(self, predictions, targets):
        """
        Args:
            predictions: tensor of shape [B, S, F, T] (batch, sources, freq_bins, time)
            targets: tensor of shape [B, S, F, T]
        Returns:
            SI-SDR loss (negative SI-SDR for minimization)
        """
        # Reshape to [B*S, F*T]
        B, S, F, T = predictions.shape
        predictions = predictions.reshape(B*S, -1)
        targets = targets.reshape(B*S, -1)

        # Zero-mean normalization
        predictions = predictions - torch.mean(predictions, dim=-1, keepdim=True)
        targets = targets - torch.mean(targets, dim=-1, keepdim=True)

        # Calculate SI-SDR
        alpha = (torch.sum(predictions * targets, dim=-1, keepdim=True) + self.eps) / (
            torch.sum(targets ** 2, dim=-1, keepdim=True) + self.eps)
        scaled_target = alpha * targets

        si_sdr = torch.sum(scaled_target ** 2, dim=-1) / (
            torch.sum((predictions - scaled_target) ** 2, dim=-1) + self.eps)
        si_sdr = 10 * torch.log10(si_sdr + self.eps)

        # Return negative mean for loss minimization
        return -si_sdr.mean()

In [None]:
model = SpectrogramSeparator().to('cuda')
criterion = SI_SDR_Loss().to('cuda')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

num_epochs = 16
best_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
    
    for X_batch, y_batch in pbar:
        X_batch = X_batch.to(torch.float32).to('cuda')
        y_batch = y_batch.to(torch.float32).to('cuda')
        
        optimizer.zero_grad()
        output = model.forward(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        pbar.set_postfix({"SI-SDR Loss": f"{loss.item():.4f}"})
    
    avg_loss = epoch_loss / len(train_loader)

    # Save best model
    if avg_loss < best_loss:
        best_loss = avg_loss
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, '../results/models/model.pth')

Epoch 1/16:   0%|                                                                            | 0/100 [00:00<?, ?batch/s]