In [106]:
import sys
import os

# Add the project root (parent of current folder) to Python path
project_root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root_dir)


In [107]:
import src.utils as utils

## Read Yaml File for Model params

In [108]:
import yaml
import json

model_config_path = os.path.join(project_root_dir, 'config', 'model_config.yaml')
with open(model_config_path, "r") as f:
    model_config = yaml.safe_load(f)

print(json.dumps(config, indent=2))

{
  "model": {
    "name": "lstm_audio_classifier",
    "type": "LSTM",
    "input_dim": 13,
    "hidden_dim": 128,
    "num_layers": 2,
    "output_dim": 10,
    "bidirectional": false,
    "dropout_rate": 0.3
  },
  "training": {
    "batch_size": 32,
    "epochs": 20,
    "learning_rate": 0.001,
    "optimizer": "adam",
    "loss_function": "cross_entropy",
    "early_stopping": true,
    "patience": 5
  },
  "data_splitting": {
    "test_size": 0.2,
    "random_state": 42
  },
  "dataset": {
    "name": "Free Spoken Digits Dataset",
    "path": "/home/pavan/Music/spectrum/free-spoken-digit-dataset/recordings",
    "preprocessing": {
      "denoise": false
    }
  },
  "output": {
    "save_dir": "outputs/models/",
    "log_dir": "outputs/logs/",
    "save_best_model": true,
    "model_filename": "lstm_audio_classifier.pt"
  },
  "hardware": {
    "device": "cuda"
  },
  "hardware_constraints": {
    "memory_size_limit": 36
  },
  "experiment": {
    "seed": 42
  }
}


## Fetch Data Lable Pairs list

In [109]:
data_path = model_config['dataset']['path']
test_data_size = model_config['data_splitting']['test_size']
seed = model_config['experiment']['seed']

In [110]:
data_label_pairs = utils.prepare_data_label_pairs(data_path)

In [111]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data_label_pairs, test_size=test_data_size, random_state=seed)

In [112]:
from src.data_preprocessor import SpokenDigitDataset

train_dataset = SpokenDigitDataset(train_data)
test_dataset = SpokenDigitDataset(test_data)

In [113]:
data = train_dataset.__getitem__(4)

In [114]:
print(f"Train size: {len(train_dataset)}")
print(f"Test size: {len(test_dataset)}")

Train size: 2400
Test size: 600


In [115]:
import torch

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [116]:
input_dim = model_config['model']['input_dim']
hidden_dim = model_config['model']['hidden_dim']
num_layers = model_config['model']['num_layers']
output_dim = model_config['model']['output_dim']

In [117]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [118]:
import src.model as model
import torch.nn as nn
import torch.optim as optim

model = model.LSTMClassifier(input_dim=input_dim,
                       hidden_dim=hidden_dim,
                       num_layers=num_layers,
                       output_dim=output_dim).to(device)

In [119]:
learning_rate = model_config['training']['learning_rate']
epochs = model_config['training']['epochs']

In [120]:
from src.train import ModelTrainer
trainer_instance = ModelTrainer(
    model, 
    epochs,
    train_loader,
    device,
    learning_rate
)

In [121]:
trainer_instance.train()

Epoch [1/5], Loss: 133.6825, Accuracy: 69.04%
Epoch [2/5], Loss: 47.6737, Accuracy: 92.12%
Epoch [3/5], Loss: 17.8605, Accuracy: 96.58%
Epoch [4/5], Loss: 14.0454, Accuracy: 95.42%
Epoch [5/5], Loss: 6.7255, Accuracy: 98.96%


ImportError: cannot import name 'get_model_params_size' from 'src.utils' (/home/pavan/Music/spectrum/fsdd-speech-recognition/src/utils.py)