## Imports

In [1]:
from utils.SpokenDigitDataset import SpokenDigitDataset
from utils.DatasetSplitter import DatasetSplitter
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as functional
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset, random_split
import matplotlib.pyplot as plt
import librosa.display
import wandb
from utils.Metrics import multiclass_f1_score, multiclass_confusion_matrix
from models.lenet import LeNet5
import time



## Dataset

In [2]:
dataset = SpokenDigitDataset("data/audio")

# This will automatically perform the split upon creation
splitter = DatasetSplitter(
        dataset=dataset,
        split_ratios=(0.7, 0.15, 0.15) # Example: 70% train, 15% val, 15% test
)

# You can access the split datasets:
train_set = splitter.train_dataset
val_set = splitter.val_dataset
test_set = splitter.test_dataset

print(f"\nAccessed train_set (Subset): Size {len(train_set)}")
print(f"Accessed val_set (Subset): Size {len(val_set)}")
print(f"Accessed test_set (Subset): Size {len(test_set)}")

# You can configure the underlying dataset for each split
# For example, enable augmentation only for the training set


# Check the configuration of the underlying dataset for a split
# print(f"\nTrain dataset underlying config after configure_splits: Bilateral={train_set.dataset.bilateral}, Augment={train_set.dataset.augment}")
# print(f"Validation dataset underlying config after configure_splits: Bilateral={val_set.dataset.bilateral}, Augment={val_set.dataset.augment}")
# print(f"Test dataset underlying config after configure_splits: Bilateral={test_set.dataset.bilateral}, Augment={test_set.dataset.augment}")


# You can access the DataLoaders:
train_loader = splitter.train_dataloader
val_loader = splitter.val_dataloader
test_loader = splitter.test_dataloader


# splitter.configure_splits(bilateral=False, augment=False)
# x, y = train_set[0]
# print("Etiqueta:", y)
# plt.figure(figsize=(10, 4))
# librosa.display.specshow(x.squeeze().numpy(), sr=16000, x_axis='time', y_axis='mel')
# plt.colorbar(format='%+2.0f dB')
# plt.title('Log-Mel Spectrogram: Raw without augmentation')
# plt.tight_layout()
# plt.show()

# splitter.configure_splits(bilateral=True, augment=False)
# x, y = train_set[0]
# print("Etiqueta:", y)
# plt.figure(figsize=(10, 4))
# librosa.display.specshow(x.squeeze().numpy(), sr=16000, x_axis='time', y_axis='mel')
# plt.colorbar(format='%+2.0f dB')
# plt.title('Log-Mel Spectrogram: Bilateral without Augmentation')
# plt.tight_layout()
# plt.show()

# splitter.configure_splits(bilateral=False, augment=True)

# x, y = train_set[0]
# print("Etiqueta:", y)
# plt.figure(figsize=(10, 4))
# librosa.display.specshow(x.squeeze().numpy(), sr=16000, x_axis='time', y_axis='mel')
# plt.colorbar(format='%+2.0f dB')
# plt.title('Log-Mel Spectrogram: Raw with Augmentation')
# plt.tight_layout()
# plt.show()

# splitter.configure_splits(bilateral=True, augment=True)

# x, y = train_set[0]
# print("Etiqueta:", y)
# plt.figure(figsize=(10, 4))
# librosa.display.specshow(x.squeeze().numpy(), sr=16000, x_axis='time', y_axis='mel')
# plt.colorbar(format='%+2.0f dB')
# plt.title('Log-Mel Spectrogram: Bilateral with Augmentation')
# plt.tight_layout()
# plt.show()




Accessed train_set (Subset): Size 21000
Accessed val_set (Subset): Size 4500
Accessed test_set (Subset): Size 4500


## Modelo A: LeNet-5

#### Set Device

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Training

In [None]:
def train(training_data_loader, model, loss_function, optimizer, scheduler, num_classes=10):
    model.train()
    correct = 0
    total_f1 = 0
    losses = []
    for inputs, labels in training_data_loader:
        inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        out = model(inputs)
        loss = loss_function(out, labels)
        loss.backward()
        optimizer.step()

        losses.append(loss)
        total_f1 += multiclass_f1_score(out, labels, num_classes=num_classes)
        correct += (out.argmax(1) == labels).sum().item()
        
        

    mean_loss = sum(losses)/len(losses)
    mean_acc = correct/len(training_data_loader.dataset) * 100.
    mean_f1 = total_f1/len(training_data_loader.dataset)

    scheduler.step(mean_loss)

    return mean_loss, mean_acc, mean_f1


#### Validation

In [5]:
def validate(validation_data_loader, model, loss_function, num_classes=10):
    model.eval()
    with torch.no_grad():
        correct = 0
        total_f1 = 0
        losses = []
        for inputs, labels in validation_data_loader:
            inputs, labels = inputs.cuda(), labels.cuda()
            out = model(inputs)
            loss = loss_function(out, labels)
            losses.append(loss)
            total_f1 += multiclass_f1_score(out, labels, num_classes=num_classes)
            correct += (out.argmax(1) == labels).sum().item()
        
        mean_loss = sum(losses)/len(losses)
        mean_acc = correct/len(validation_data_loader.dataset) * 100.
        mean_f1 = total_f1/len(validation_data_loader.dataset)

        return mean_loss, mean_acc, mean_f1

#### Test

In [6]:
def test(test_data_loader, model, num_classes=10):
    model.eval()
    correct = 0
    conf_matrix = None
    total_f1 = 0
    with torch.no_grad():
        for inputs, labels in test_data_loader:
            inputs, labels = inputs.cuda(), labels.cuda()
            out = model(inputs)
            correct += (out.argmax(1) == labels).sum().item()
            total_f1 += multiclass_f1_score(out, labels, num_classes=num_classes)
            if conf_matrix!=None:
                conf_matrix = conf_matrix + multiclass_confusion_matrix(out, labels, num_classes=num_classes)
            else:
                conf_matrix = multiclass_confusion_matrix(out, labels, num_classes=num_classes)

        mean_acc = correct/len(test_data_loader.dataset) * 100.
        mean_f1 = total_f1/len(test_data_loader.dataset)
        return conf_matrix, mean_acc, mean_f1
        


#### Run Model

In [None]:
def run_lenet_model(epochs, lr, splitter, train_loader, val_loader, test_loader, bilateral=False, augmentation=False):
    wandb.init(
        project="Audio-mnist",
        name=f"lenet5-audio-{'No' if not bilateral else ''}Bi-{'No' if not augmentation else ''}Aug-run",
        config={
            "epochs": epochs,
            "batch_size": 32,
            "learning_rate": lr,
            "architecture": "LeNet5Audio",
            "input_size": "1x224x224",
            "num_classes": 10,
            "bilateral_filter": bilateral,
            "augmentation": augmentation
        }
    )
    splitter.configure_splits(wandb.config.bilateral_filter, wandb.config.augmentation)

    model = LeNet5(wandb.config.num_classes).to(device)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=wandb.config.learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)

    for epoch in range(wandb.config.epochs):
        start_time = time.time()
        val_loss, val_acc, val_f1 = validate(val_loader, model, loss_function)
        # val_loss, val_acc, val_f1 = 0, 0, 0
        mean_loss, mean_acc, mean_f1 = train(train_loader, model, loss_function, optimizer, scheduler)

        wandb.log({
            "epoch": epoch + 1,
            "train/loss": mean_loss,
            "train/accuracy": mean_acc,
            "train/f1": mean_f1,
            "validation/loss": val_loss,
            "validation/accuracy": val_acc,
            "validation/f1": val_f1
        })


        print(f"Epoch #{epoch+1} ({(time.time()-start_time):.1f}s):\n\tLoss={mean_loss:.4f}, Val_Loss={val_loss:.4f}, Acc={mean_acc:.2f}%, Val_Acc={val_acc:.2f}%")


    conf_matrix, test_acc, test_f1 = test(test_data_loader=test_loader, model=model)

    wandb.log({
        "confusion_matrix":  conf_matrix,
        "test/accuracy": test_acc,
        "test/f1_score": test_f1
    })



### No Augmentation and No Bilateral Filter

In [8]:
run_lenet_model(15, 0.00004, splitter, train_loader, val_loader, test_loader, bilateral=False, augmentation=False)

[34m[1mwandb[0m: Currently logged in as: [33mluiscantodd[0m ([33mluiscantodd-tec-costa-rica[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch #1 (1095.2s):
	Loss=0.5873, Val_Loss=0.0000, Acc=86.18%, Val_Acc=0.00%
Epoch #2 (1054.1s):
	Loss=0.1243, Val_Loss=0.0000, Acc=98.96%, Val_Acc=0.00%


KeyboardInterrupt: 

### Augmentation and No Bilateral Filter

In [None]:
run_lenet_model(18, 0.0000145, splitter, train_loader, val_loader, test_loader, bilateral=False, augmentation=True)

### No Augmentation and Bilateral Filter

In [None]:
run_lenet_model(15, 0.0001, splitter, train_loader, val_loader, test_loader, bilateral=True, augmentation=False)

### Augmentation and Bilateral Filter

In [None]:
run_lenet_model(15, 0.00008, splitter, train_loader, val_loader, test_loader, bilateral=True, augmentation=True)