In [None]:
import os
os.chdir("../")

In [None]:
import torch
from torch.utils.data import DataLoader
from torch.nn import TripletMarginLoss
import torch.optim as optim
from source.DataLoader import AudioDataset, collate_fn
from source.Model import SpeakerClassifier
from source.Frontend import MFCCTransform


# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available! Training on GPU...")
    device = torch.device("cuda")
else:
    print("CUDA is not available. Training on CPU...")
    device = torch.device("cpu")

In [None]:
# Set root directory to your dataset folder
root_directory = 'data/'

audio_dataset = AudioDataset(root_directory, frontend=MFCCTransform)
audio_dataloader = DataLoader(audio_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
model = SpeakerClassifier(5, input_size=13, device=device)
model.to(device)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
triplet_loss = TripletMarginLoss(margin=1.0, p=2)

def train_model(epochs, dataloader, model, loss_function, optimizer, device):
    model.train()
    for epoch in range(epochs):
        for anchors, positives, negatives in dataloader:
            anchors = anchors.to(device)
            positives = positives.to(device)
            negatives = negatives.to(device)

            optimizer.zero_grad()
            anchor_outputs = model(anchors)
            positive_outputs = model(positives)
            negative_outputs = model(negatives)
            
            loss = loss_function(anchor_outputs, positive_outputs, negative_outputs)
            loss.backward()
            optimizer.step()

            print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Run training
train_model(25, audio_dataloader, model, triplet_loss, optimizer, device)

In [None]:
torch.cuda.reset_peak_memory_stats()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()  # Resets the starting point for tracking