# Packages

In [1]:
import os

import lightning
import pandas as pd
import torch
import torchmetrics
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Lambda

from Project2_LSTM.load_data import AudioTrainDataset, PaddingZeros, CustomSpectogram, TargetEncoder

# Simple loading

In [2]:
DATA_PATH = os.path.join("tensorflow-speech-recognition-challenge", "train", "audio")
dataset = AudioTrainDataset(DATA_PATH)

labels_list, labels_dict = dataset.find_classes(DATA_PATH)
labels_dict = {idx: name for name, idx in labels_dict.items()}
labels_dict

{1: 'bed',
 2: 'bird',
 3: 'cat',
 4: 'dog',
 5: 'down',
 6: 'eight',
 7: 'five',
 8: 'four',
 9: 'go',
 10: 'happy',
 11: 'house',
 12: 'left',
 13: 'marvin',
 14: 'nine',
 15: 'no',
 16: 'off',
 17: 'on',
 18: 'one',
 19: 'right',
 20: 'seven',
 21: 'sheila',
 22: 'silence',
 23: 'six',
 24: 'stop',
 25: 'three',
 26: 'tree',
 27: 'two',
 28: 'up',
 29: 'wow',
 30: 'yes',
 31: 'zero'}

In [3]:
NUM_WORKERS = 4
BATCH_SIZE = 128

# Simple Model

In [4]:
transforms = Compose([
    PaddingZeros(16000),
    CustomSpectogram(n_fft=1024, power=2),
])

features_dataset = AudioTrainDataset(DATA_PATH, transform=transforms,
                                     target_transform=TargetEncoder(class_dict=labels_dict))

In [5]:
gen = torch.Generator().manual_seed(42)
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(features_dataset, [0.7, 0.1, 0.2],
                                                                           generator=gen)
len(train_dataset), len(valid_dataset), len(test_dataset)

(45587, 6512, 13024)

In [6]:
train_dataset_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True,
                                  generator=torch.random.manual_seed(123))
valid_dataset_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
test_dataset_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
len(train_dataset_loader), len(valid_dataset_loader), len(test_dataset_loader)

(357, 51, 102)

In [13]:
class MyLSTM(lightning.LightningModule):
    def __init__(self,
                 input_features_size, # number of frequencies of spectogram
                 input_sequence_size, # length of spectogram
                 hidden_size,
                 conv_channels_out,
                 conv_kernel_size,
                 target_size):
        super().__init__()
        self.conv = torch.nn.Conv1d(input_sequence_size, conv_channels_out, kernel_size=conv_kernel_size, groups=input_sequence_size)
        lstm_input_size = input_features_size - (conv_kernel_size - 1)
        self.lstm = torch.nn.LSTM(lstm_input_size, hidden_size, num_layers=1, batch_first=True)
        self.hidden2label = torch.nn.Linear(hidden_size, target_size)
        self.softmax = torch.nn.Softmax(dim=-1)
        self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=target_size)
        self.valid_acc = torchmetrics.Accuracy(task="multiclass", num_classes=target_size)
        self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=target_size)
        self.test_conf_mat = torchmetrics.ConfusionMatrix(task="multiclass", num_classes=target_size)

    def forward(self, x):
        x_squeeze = x.squeeze()
        x = self.conv(x_squeeze)
        lstm_out, _ = self.lstm(x)
        label_space = self.hidden2label(lstm_out[:, -1])
        return self.softmax(label_space)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        self.train_acc(y_hat, torch.argmax(y, dim=-1))
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log("train_acc_step", self.train_acc)
        return loss

    def on_train_epoch_end(self):
        self.log('train_acc', self.train_acc)

    def predict_step(self, batch, batch_idx, dataloader_idx = 0):
        x, y = batch
        return self(x)

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        self.valid_acc(y_hat, torch.argmax(y, dim=-1))
        self.log('val_loss', loss, on_epoch=True)
        self.log('val_acc', self.valid_acc, on_epoch=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        y_class = torch.argmax(y, dim=-1)
        self.test_acc(y_hat, y_class)
        self.test_conf_mat(y_hat, y_class)
        self.log('test_loss', loss, on_epoch=True)
        self.log('test_acc', self.test_acc, on_epoch=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.3, patience=3),
                "monitor": "val_loss",
            }
        }

In [7]:
for batch_x,  batch_y in train_dataset_loader:
    print(batch_x.shape)
    break

torch.Size([128, 1, 32, 513])


In [16]:
model = MyLSTM(513, 32, 128, 32, 8, 12)
for batch_x,  batch_y in train_dataset_loader:
    y_hat = model(batch_x)
    break

In [17]:
model = MyLSTM(513, 32,  128, 128, 8, 12)
trainer = lightning.Trainer(max_epochs=2, logger=True)
torch.set_float32_matmul_precision('medium')
trainer.fit(model, train_dataloaders=train_dataset_loader, val_dataloaders=valid_dataset_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type                      | Params
------------------------------------------------------------
0 | conv          | Conv1d                    | 1.2 K 
1 | lstm          | LSTM                      | 325 K 
2 | hidden2label  | Linear                    | 1.5 K 
3 | softmax       | Softmax                   | 0     
4 | train_acc     | MulticlassAccuracy        | 0     
5 | valid_acc     | MulticlassAccuracy        | 0     
6 | test_acc      | MulticlassAccuracy        | 0     
7 | test_conf_mat | MulticlassConfusionMatrix | 0     
------------------------------------------------------------
328 K     Trainable params
0         Non-trainable params
328 K     Total params
1.313     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

In [22]:
early_stopping = lightning.pytorch.callbacks.EarlyStopping('val_loss')
results = []
predictions = []
for i in range(5):
    lightning.pytorch.seed_everything(i)
    model = MyLSTM(513, 32,  128, 32, 8, 12)
    trainer = lightning.Trainer(max_epochs=2, callbacks=[early_stopping])
    trainer.fit(model, train_dataloaders=train_dataset_loader, val_dataloaders=valid_dataset_loader)
    res = trainer.test(dataloaders=test_dataset_loader)
    test_pred_tensor = torch.cat(trainer.predict(dataloaders=test_dataset_loader))
    results.append(res[0])
    predictions.append(test_pred_tensor)
torch.save(torch.stack(predictions), "spectogram_cnn_lstm_predictions.ts")
pd.DataFrame(results).to_csv("spectogram_cnn_lstm_metrics.csv")

Global seed set to 0
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type                      | Params
------------------------------------------------------------
0 | conv          | Conv1d                    | 525 K 
1 | lstm          | LSTM                      | 79.4 K
2 | hidden2label  | Linear                    | 1.5 K 
3 | softmax       | Softmax                   | 0     
4 | train_acc     | MulticlassAccuracy        | 0     
5 | valid_acc     | MulticlassAccuracy        | 0     
6 | test_acc      | MulticlassAccuracy        | 0     
7 | test_conf_mat | MulticlassConfusionMatrix | 0     
------------------------------------------------------------
606 K     Trainable params
0         Non-trainable params
606 K     Total params
2.425     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.
Restoring states from the checkpoint path at E:\Studies\DataScience-1sem\DeepLearning\Project2_LSTM\lightning_logs\version_8\checkpoints\epoch=1-step=714.ckpt
Loaded model weights from the checkpoint at E:\Studies\DataScience-1sem\DeepLearning\Project2_LSTM\lightning_logs\version_8\checkpoints\epoch=1-step=714.ckpt


Testing: 0it [00:00, ?it/s]

Global seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type                      | Params
------------------------------------------------------------
0 | conv          | Conv1d                    | 525 K 
1 | lstm          | LSTM                      | 79.4 K
2 | hidden2label  | Linear                    | 1.5 K 
3 | softmax       | Softmax                   | 0     
4 | train_acc     | MulticlassAccuracy        | 0     
5 | valid_acc     | MulticlassAccuracy        | 0     
6 | test_acc      | MulticlassAccuracy        | 0     
7 | test_conf_mat | MulticlassConfusionMatrix | 0     
------------------------------------------------------------
606 K     Trainable params
0         Non-trainable params
606 K     Total params
2.425     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x00000217D39B0E50>
Traceback (most recent call last):
  File "E:\Studies\DataScience-1sem\DeepLearning\venv\lib\site-packages\torch\utils\data\dataloader.py", line 1466, in __del__
    self._shutdown_workers()
  File "E:\Studies\DataScience-1sem\DeepLearning\venv\lib\site-packages\torch\utils\data\dataloader.py", line 1424, in _shutdown_workers
    if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


ValueError: `.test(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.