In [None]:
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
import torch
from tqdm import tqdm

In [None]:
def train(dataloader, model, device, criterion, optimizer, epochs):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        prog_bar = tqdm(dataloader["train"], desc=f"Epoch {epoch + 1}/{epochs}", unit="batch")

        for audio, label in prog_bar:
            audio, label = audio.to(device), label.to(device)
            optimizer.zero_grad()

            outputs = model(audio)
            loss = criterion(outputs, label)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            prog_bar.set_postfix(train_loss=running_loss / len(dataloader["train"]))

        avg_train_loss = running_loss / len(dataloader["train"])
        train_losses.append(avg_train_loss)

        avg_val_loss = validate(dataloader["val"], model, device, criterion)
        val_losses.append(avg_val_loss)
        print(f"Epoch {epoch + 1}: Train Loss = {avg_train_loss:.4f}, Validation Loss = {avg_val_loss:.4f}")

    return train_losses, val_losses


def validate(dataloader, model, device, criterion):
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for audio, label in dataloader:
            audio, label = audio.to(device), label.to(device)
            outputs = model(audio)
            loss = criterion(outputs, label)
            val_loss += loss.item()

    return val_loss / len(dataloader)


def test(dataloader, model, device, criterion):
    model.eval()
    test_loss = 0.0

    with torch.no_grad():
        for audio, label in dataloader:
            audio, label = audio.to(device), label.to(device)
            outputs = model(audio)
            loss = criterion(outputs, label)
            test_loss += loss.item()

    avg_test_loss = test_loss / len(dataloader)
    print(f"Test Loss: {avg_test_loss:.4f}")
    return avg_test_loss

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

In [None]:
from data_generator import GenderDataset

dataset = GenderDataset("data")

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

dataloaders = {
    "train": DataLoader(train_set, batch_size=32, shuffle=True),
    "val": DataLoader(val_set, batch_size=32, shuffle=False),
    "test": DataLoader(test_set, batch_size=32, shuffle=False),
}

In [None]:
from models.lstm_torch import LSTMClassifier
from models.TCN_torch import TCNClassifier

# model = LSTMClassifier(input_size=20, hidden_size=1024, num_layers=2, num_classes=2)
model = TCNClassifier(input_size=20, num_classes=2, channels=[64, 128, 256], kernel_size=3)
model.to(device)

In [None]:
from models.TCN_torch_custom import TinyTCN

model = TinyTCN(in_features=20)
device = torch.device("cpu")
model.to(device)

In [None]:
from models.TCN_w_cudaConv1d import TCNClassifier
model = TCNClassifier()

In [None]:
# set loss function and optimizer
# loss
# criterion = nn.BCELoss() # lstm
criterion = nn.CrossEntropyLoss() # TCN
# optim
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train the model and track losses
train_losses, val_losses = train(dataloaders, model, device, criterion, optimizer, epochs=10)

In [None]:
from post_processing import save_losses, plot_losses

test_loss = test(dataloaders["test"], model, device, criterion)


save_losses(train_losses, val_losses, test_loss)
plot_losses(train_losses, val_losses, test_loss)

In [None]:
from gender_inference import predict_single

audio_path = './data/male/arctic_a0001(4).wav'
# predict_single(model, audio_path, "cuda", ("f", "m"))
predict_single(model, audio_path, "cpu", ("f", "m"))

In [None]:
torch.save(model, "model.pth")