In [None]:
import os
import torchaudio
from torch import nn
import seaborn as sns
from modules.models import RNNModel
from modules.dataset import CustomSpeechCommands, FeaturesDataset
from modules.trainer import train_model, show_curves, confusion_matrix_display, confusion_matrix


In [None]:
# ==== Paths ====
ROOT_DIR = 'data'
train_pt = os.path.join(ROOT_DIR, 'train.pt')
val_pt = os.path.join(ROOT_DIR, 'val.pt')
test_pt = os.path.join(ROOT_DIR, 'test.pt')
TRAIN_LIST = os.path.join(ROOT_DIR,"train_list.txt")
VAL_LIST = os.path.join(ROOT_DIR, "val_list.txt")
TEST_LIST = os.path.join(ROOT_DIR, "test_list.txt")

if not os.path.isfile(train_pt):
    train_raw = CustomSpeechCommands(ROOT_DIR, TRAIN_LIST)
    val_raw = CustomSpeechCommands(ROOT_DIR, VAL_LIST)
    test_raw = CustomSpeechCommands(ROOT_DIR, TEST_LIST)
    mfcc_transform = torchaudio.transforms.MFCC(
        sample_rate=16000,
        n_mfcc=13, # número de coeficientes MFCC a extraer
        melkwargs={"n_fft": 320, "hop_length": 160, "n_mels": 23}, # 320 = 20ms, 160 = 10ms, 23 = número de filtros mel
        log_mels = True
    )
    train_raw.save_features(mfcc_transform, train_pt)
    test_raw.save_features(mfcc_transform, test_pt)
    val_raw.save_features(mfcc_transform, val_pt)

train_dataset = FeaturesDataset(train_pt)
test_dataset = FeaturesDataset(test_pt)
val_dataset = FeaturesDataset(val_pt)


print("¡Datasets cargados exitosamente!")
print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")

# Entrenamiento

In [None]:
print(train_dataset.features.shape)

In [None]:
lr = 5e-4
batch_size = 32
criterion = nn.CrossEntropyLoss()
n_trains = 5
epochs = 40
results = {}
times_of_training = []
models = []
for k in range(n_trains):
    print(f'Entrenando modelo {k}/{n_trains}')
    model = RNNModel(rnn_type = 'LSTM', n_input_channels=13, hidd_size=64) # puede ser que sea util estudiar el hidden size, o sea reducirlo hasta que comience a afectar el rendimiento del modelo en val
    all_curves, times = train_model(model, train_dataset, val_dataset, epochs, criterion, batch_size, lr, n_evaluations_per_epoch=3, use_gpu=True)
    results[k] = all_curves
    times_of_training.append(times)
    models.append(model) # arreglar xd
show_curves(results)


In [None]:
tags, models = models.keys(), models.items()
confusion_matrix_display(tags, models, test_dataset, list(test_dataset.label_to_idx.keys()))
