In [2]:
import sys
import os

# Add the project root (parent of current folder) to Python path
project_root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root_dir)

## Load Model Configuration from YAML

To make the training pipeline configurable and modular, we store model parameters like number of LSTM layers, hidden size, and learning rate etc in a YAML file. This structure enables quick adaptation to related tasks B, and C.

This section loads the model configuration using a custom utility function.

In [3]:
import src.utils as utils

In [4]:
utils.set_seed(42)

[INFO] Random seed set to: 42


In [5]:
import yaml
import json

model_config_path = os.path.join(project_root_dir, 'config', 'model_config.yaml')
model_config = utils.read_yaml_file(model_config_path)
# print(json.dumps(model_config, indent=2))

## Load and Split Dataset for Training and Evaluation

In this section, we load the recordings data from disk, generate data-label pairs, and split them into training and test sets according to the `test_size` defined in the YAML file.

Using `test_size` and `seed` from the YAML config ensures that experiments are reproducible and easily tunable for other tasks by simply updating the configuration.


In [6]:
data_path = model_config['dataset']['path']
test_data_size = model_config['data_splitting']['test_size']
seed = model_config['experiment']['seed']

In [7]:
data_label_pairs = utils.prepare_data_label_pairs(data_path)

In [8]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data_label_pairs, test_size=test_data_size, random_state=seed)

## Transform Raw Data into PyTorch Dataset Objects

The `AudioFeaturesDataset` class converts raw data-label pairs into PyTorch-compatible datasets that provide easy access to samples and labels.

AudioFeaturesDataset is a custom dataset class that:

- Loads audio recordings of spoken digits along with their labels.
- Optionally cleans the audio by filtering out noise.
- Extracts MFCC features (a common speech feature).
- Pads or trims these features to a fixed length so all inputs have the same shape.
- Works with PyTorch to provide samples one-by-one when training or testing a model.
- It helps prepare your audio data in the right format for training neural networks efficiently.


In [9]:
from src.data_preprocessor import AudioFeaturesDataset

train_dataset = AudioFeaturesDataset(train_data)
test_dataset = AudioFeaturesDataset(test_data)

In [10]:
print(f"Train size: {len(train_dataset)}")
print(f"Test size: {len(test_dataset)}")

Train size: 2400
Test size: 600


## Create DataLoaders for Batch Processing

Using PyTorch DataLoaders, we enable efficient loading, batching, and shuffling of data during training and evaluation.

In [11]:
import torch

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

## LSTM Model Definition

A simple `n`-layer LSTM followed by a fully connected output layer. Variable `n` is defined in the configuration YAML file

In [12]:
import torch

In [30]:
int8_model_load_path = os.path.join(project_root_dir, 'outputs', 'models', 'int8_model_weights.pth')

static_quantized_model = torch.jit.load(int8_model_load_path)
print(static_quantized_model)
# static_quantized_model.eval()

RecursiveScriptModule(
  original_name=StaticQuantizableModel
  (quant): RecursiveScriptModule(original_name=Quantize)
  (dequant): RecursiveScriptModule(original_name=DeQuantize)
  (model): RecursiveScriptModule(
    original_name=LSTMClassifier
    (lstm): RecursiveScriptModule(
      original_name=LSTM
      (layers): RecursiveScriptModule(
        original_name=ModuleList
        (0): RecursiveScriptModule(
          original_name=_LSTMLayer
          (layer_fw): RecursiveScriptModule(
            original_name=_LSTMSingleLayer
            (cell): RecursiveScriptModule(
              original_name=LSTMCell
              (igates): RecursiveScriptModule(
                original_name=Linear
                (_packed_params): RecursiveScriptModule(original_name=LinearPackedParams)
              )
              (hgates): RecursiveScriptModule(
                original_name=Linear
                (_packed_params): RecursiveScriptModule(original_name=LinearPackedParams)
              )
  

In [14]:
from src.evaluate import ModelEvaluator

In [15]:
device = torch.device("cpu")

test_instance = ModelEvaluator(
    static_quantized_model, 
    test_loader,
    device
)

In [16]:
test_instance.evaluate()


 Accuracy on test data: 90.50%

 Classification Report:
              precision    recall  f1-score   support

           0     0.9808    0.9808    0.9808        52
           1     0.9677    0.9231    0.9449        65
           2     0.9643    0.8571    0.9076        63
           3     0.7222    0.8667    0.7879        60
           4     1.0000    0.9273    0.9623        55
           5     0.8197    0.9434    0.8772        53
           6     0.9298    0.8154    0.8689        65
           7     0.9655    0.9492    0.9573        59
           8     0.8667    0.8525    0.8595        61
           9     0.9014    0.9552    0.9275        67

    accuracy                         0.9050       600
   macro avg     0.9118    0.9071    0.9074       600
weighted avg     0.9118    0.9050    0.9064       600


 Confusion Matrix:
[[51  0  0  0  0  0  0  1  0  0]
 [ 0 60  0  0  0  2  0  0  0  3]
 [ 1  0 54  7  0  0  0  0  0  1]
 [ 0  0  2 52  0  0  2  0  2  2]
 [ 0  0  0  0 51  4  0  0  0  0]

In [19]:
import src.power_of_two_observer as po2
from torch.ao.quantization import QuantStub, DeQuantStub, MinMaxObserver, QConfig


In [22]:
po2_qconfig = QConfig(
    activation=po2.PowerOfTwoObserver.with_args(reduce_range=False, dtype=torch.quint8),
    weight=po2.PowerOfTwoObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)
)

In [23]:
quant_weight = static_quantized_model.quant.fc._packed_params._weight  # get quantized weight tensor

scale = quant_weight.q_per_channel_scales()
zero_point = quant_weight.q_per_channel_zero_points()


AttributeError: 'RecursiveScriptModule' object has no attribute 'fc'

In [24]:
for name, mod in static_quantized_model.named_modules():
    print(name, mod)

 RecursiveScriptModule(
  original_name=StaticQuantizableModel
  (quant): RecursiveScriptModule(original_name=Quantize)
  (dequant): RecursiveScriptModule(original_name=DeQuantize)
  (model): RecursiveScriptModule(
    original_name=LSTMClassifier
    (lstm): RecursiveScriptModule(
      original_name=LSTM
      (layers): RecursiveScriptModule(
        original_name=ModuleList
        (0): RecursiveScriptModule(
          original_name=_LSTMLayer
          (layer_fw): RecursiveScriptModule(
            original_name=_LSTMSingleLayer
            (cell): RecursiveScriptModule(
              original_name=LSTMCell
              (igates): RecursiveScriptModule(
                original_name=Linear
                (_packed_params): RecursiveScriptModule(original_name=LinearPackedParams)
              )
              (hgates): RecursiveScriptModule(
                original_name=Linear
                (_packed_params): RecursiveScriptModule(original_name=LinearPackedParams)
              )
 

In [28]:
for name, param in static_quantized_model.named_buffers():
    print(name, param.shape)

quant.scale torch.Size([1])
quant.zero_point torch.Size([1])


In [29]:
weight_scale = static_quantized_model.lstm.weight_ih_l0().q_scale()
print(weight_scale)

AttributeError: 'RecursiveScriptModule' object has no attribute 'lstm'