In [None]:
import os
import random
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset

BASE_DIR = r""
DATASETS = ["2k", "4k", "8k", "16k", "22k"]
FREQ_BINS = [129, 257, 513, 1025, 1376]
NUM_CLASSES = 10
TRAIN_SAMPLES = 20000
TEST_SAMPLES  = 1000
BATCH_SIZE    = 128
# LEARNING_RATE = 1e-3
LEARNING_RATE = 1e-4
DEVICE        = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
print(DEVICE)

cuda


In [3]:
class SpectrogramDataset(Dataset):
    def __init__(self, file_list):
        self.file_list = file_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        path = self.file_list[idx]
        class_num = int(path.split("/")[-1].split("_")[0])
        spec = np.load(path)
        spec = torch.from_numpy(spec).float()
        spec = spec.T
        # spec = spec.unsqueeze(0)
        return spec, class_num

In [4]:
class LSTM(nn.Module):
    def __init__(self, freq_bins = -1, num_classes=NUM_CLASSES, model_scaler = 1):
        super().__init__()
        self.first_neurons = freq_bins*model_scaler*2
        self.second_neurons = freq_bins*model_scaler*4

        self.features = nn.LSTM(freq_bins, self.first_neurons, num_layers = 2, batch_first = True, dropout = 0.2)
        
        # classifier
        # after pooling batch, 32, 1, 1
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.first_neurons, self.second_neurons),
            nn.ReLU(inplace=True),
            nn.Linear(self.second_neurons, num_classes)
        )

    def forward(self, x):
        output, (hn, cn) = self.features(x)       # X is batch B, L, Hin
        x = self.classifier(output[:, -1, :])     # batch, num_classes
        return x

In [5]:
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for inputs, targets in tqdm(loader):
        inputs, targets = inputs.to(DEVICE, non_blocking=True), targets.to(DEVICE, non_blocking=True)
        optimizer.zero_grad()
        outputs = model.forward(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(loader.dataset)

In [6]:
import time

def evaluate(model, loader):
    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        for inputs, targets in loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)
            correct += (preds == targets).sum().item()
            total += targets.size(0)
        return correct / total

def run_for_dataset(name, idx = -1, skip = False, EPOCHS = 10, model_scaler=1):
    folder = os.path.join(BASE_DIR, name)
    for root, dirs, files in os.walk(folder, topdown=True):
        all_files = [folder + "/" + file for file in files]
    random.shuffle(all_files)
    assert len(all_files) >= TRAIN_SAMPLES + TEST_SAMPLES, \
        f"Not enough samples in {folder}"

    train_files = all_files[:TRAIN_SAMPLES]
    test_files  = all_files[TRAIN_SAMPLES:TRAIN_SAMPLES + TEST_SAMPLES]
    print("Getting Datasets")
    train_ds = SpectrogramDataset(train_files)
    test_ds  = SpectrogramDataset(test_files)
    print("Making Dataloaders")
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    model = LSTM(freq_bins = FREQ_BINS[i],model_scaler=model_scaler).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    print(f"Training on {name}: {TRAIN_SAMPLES} samples")
    epoch_acc = []
    train_times = []
    test_times = []
    for epoch in range(1, EPOCHS+1):
        time1 = time.time()
        loss = train_one_epoch(model, train_loader, criterion, optimizer)
        time2 = time.time()
        acc  = evaluate(model, test_loader)
        time3 = time.time()
        train_time = time2 - time1
        test_time = time3 - time2
        epoch_acc.append(acc)
        print(f"Epoch {epoch:2d} | loss: {loss:.4f} | test acc: {acc*100:5.2f}%")
        train_times.append(train_time)
        test_times.append(test_time)

    acc = evaluate(model, test_loader)
    print(f"Final test accuracy on {name}: {acc*100:.2f}%")
    return epoch_acc, train_times, test_times

In [None]:
LEARNING_RATE = 1e-4
BATCH_SIZE    = 128
if __name__ == "__main__":
    model_accs = []
    model_train = []
    model_test = []
    for model_scale in [2]:
        print(f"Model size {model_scale}")
        epoch_accs = []
        epoch_trains = []
        epoch_tests = []
        for i in range(len(DATASETS)):
            ds = DATASETS[i]
            print(f"Dataset {ds}")
            epoch_acc, epoch_train, epoch_test = run_for_dataset(ds, idx = i, EPOCHS=10, model_scaler=model_scale)
            epoch_accs.append(epoch_acc)
            epoch_trains.append(epoch_train)
            epoch_tests.append(epoch_test)
        model_accs.append(epoch_accs)
        model_train.append(epoch_trains)
        model_test.append(epoch_tests)
# best, lr = 1e-4, bs = 128, model_scale = 4, 84% after 10 epochs, dropout = 0.1

Model size 2
Dataset 2k
Getting Datasets
Making Dataloaders
Training on 2k: 20000 samples


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [04:40<00:00,  1.78s/it]


Epoch  1 | loss: 1.6917 | test acc: 59.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:17<00:00,  8.98it/s]


Epoch  2 | loss: 1.2265 | test acc: 62.30%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:18<00:00,  8.62it/s]


Epoch  3 | loss: 1.0298 | test acc: 61.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  8.05it/s]


Epoch  4 | loss: 0.9073 | test acc: 70.90%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:18<00:00,  8.55it/s]


Epoch  5 | loss: 0.8034 | test acc: 75.60%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:17<00:00,  9.13it/s]


Epoch  6 | loss: 0.7133 | test acc: 75.60%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  8.24it/s]


Epoch  7 | loss: 0.6574 | test acc: 77.80%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:18<00:00,  8.37it/s]


Epoch  8 | loss: 0.5865 | test acc: 78.60%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:17<00:00,  9.20it/s]


Epoch  9 | loss: 0.5500 | test acc: 81.20%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:17<00:00,  8.79it/s]


Epoch 10 | loss: 0.5107 | test acc: 83.50%
Final test accuracy on 2k: 83.50%
Dataset 4k
Getting Datasets
Making Dataloaders
Training on 4k: 20000 samples


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [05:40<00:00,  2.17s/it]


Epoch  1 | loss: 1.5590 | test acc: 56.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  8.04it/s]


Epoch  2 | loss: 1.1458 | test acc: 62.90%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  7.92it/s]


Epoch  3 | loss: 0.9532 | test acc: 69.60%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  7.98it/s]


Epoch  4 | loss: 0.8150 | test acc: 73.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:20<00:00,  7.77it/s]


Epoch  5 | loss: 0.7300 | test acc: 78.30%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  8.08it/s]


Epoch  6 | loss: 0.6621 | test acc: 75.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:19<00:00,  8.02it/s]


Epoch  7 | loss: 0.5839 | test acc: 80.00%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:21<00:00,  7.23it/s]


Epoch  8 | loss: 0.5330 | test acc: 81.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:23<00:00,  6.67it/s]


Epoch  9 | loss: 0.4763 | test acc: 82.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:18<00:00,  8.30it/s]


Epoch 10 | loss: 0.4220 | test acc: 82.30%
Final test accuracy on 4k: 82.30%
Dataset 8k
Getting Datasets
Making Dataloaders
Training on 8k: 20000 samples


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [06:46<00:00,  2.59s/it]


Epoch  1 | loss: 1.6169 | test acc: 51.90%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.82it/s]


Epoch  2 | loss: 1.2191 | test acc: 60.50%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.81it/s]


Epoch  3 | loss: 1.0784 | test acc: 69.70%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.74it/s]


Epoch  4 | loss: 0.9298 | test acc: 70.40%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.82it/s]


Epoch  5 | loss: 0.8724 | test acc: 70.10%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:40<00:00,  3.85it/s]


Epoch  6 | loss: 0.7677 | test acc: 74.90%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.81it/s]


Epoch  7 | loss: 0.6929 | test acc: 78.80%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.78it/s]


Epoch  8 | loss: 0.6413 | test acc: 79.60%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.79it/s]


Epoch  9 | loss: 0.5985 | test acc: 74.60%


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:41<00:00,  3.81it/s]


Epoch 10 | loss: 0.5427 | test acc: 81.30%
Final test accuracy on 8k: 81.30%
Dataset 16k
Getting Datasets
Making Dataloaders
Training on 16k: 20000 samples


100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [44:34<00:00, 17.03s/it]


Epoch  1 | loss: 1.8137 | test acc: 34.00%


  3%|██                                                                                | 4/157 [00:55<35:42, 14.00s/it]

In [None]:
print(np.array(model_train).shape)
print(np.array(model_test).shape)
print(np.array(model_accs).shape)
with open("LSTM_perf_data_2.npy", 'wb') as f:
    np.save(f, np.array(model_accs))
with open("LSTM_train_data_2.npy", 'wb') as f:
    np.save(f, np.array(model_train))
with open("LSTM_inference_data_2.npy", 'wb') as f:
    np.save(f, np.array(model_test))