In [1]:
import numpy as np
import pickle

import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

# from name_dataset import NameDataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader

# Dataset Loader

In [2]:
BATCH_SIZE = 1
N_WORKERS = torch.cuda.device_count() if torch.cuda.device_count() > 1 else 1
N_WORKERS

1

In [3]:
label_map = {
    'air_conditioner': 0,
    'car_horn': 1,
    'children_playing': 2,
    'dog_bark': 3,
    'drilling': 4,
    'engine_idling': 5,
    'gun_shot': 6,
    'jackhammer': 7,
    'siren': 8,
    'street_music': 9
}

In [4]:
class UrbanSound8kDataset(Dataset):
    """ Diabetes dataset."""

    # Initialize your data, download, etc.
    def __init__(self, file_path):
        with open(file_path, 'rb') as f:
            self.x_data, self.y_data = pickle.load(f)
        self.len = len(self.x_data)
        self.y_data = np.array([label_map[label] for label in self.y_data])

    def __getitem__(self, idx):
        x = self.x_data[idx].astype(np.float32)   # <- fix here
        y = self.y_data[idx]
        # print(type(x[0][0]))
        # print(type(y))
        return torch.from_numpy(x), torch.LongTensor(y)

    def __len__(self):
        return self.len


In [5]:
train_dataset = UrbanSound8kDataset('/kaggle/input/urbansound8k-feature-extraction/train_data.pkl')
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=N_WORKERS)
val_dataset = UrbanSound8kDataset('/kaggle/input/urbansound8k-feature-extraction/val_data.pkl')
val_loader = DataLoader(dataset=val_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=N_WORKERS)


In [6]:
print(len(train_dataset), len(train_dataset[0]), len(train_dataset[0][0]), len(train_dataset[0][0][0]))

21740 2 54 180


# RNN Model

In [7]:
import torch
import torch.nn as nn

class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, sequence_length=180, n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        self.n_directions = 2 if bidirectional else 1
        self.sequence_length = sequence_length

        self.projection = nn.Linear(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, n_layers, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_size * self.n_directions, output_size)

    def forward(self, signal):
    # Expecting signal: [B, S, input_size]
        if signal.dim() != 3:
            raise ValueError("Input should be of shape [batch, sequence, input_size]")
    
        batch_size = signal.size(0)
    
        # Transpose to [S, B, input_size]
        signal = signal.transpose(0, 1)
    
        # Apply projection across the last dimension: input_size -> hidden_size
        projected = self.projection(signal)  # [S, B, hidden_size]
    
        h0 = self._init_hidden(batch_size)  # [num_layers * directions, B, hidden_size]
    
        self.rnn.flatten_parameters()
        output, hn = self.rnn(projected, h0)
    
        if self.bidirectional:
            last_hidden = torch.cat((hn[-2], hn[-1]), dim=1)  # [B, hidden_size * 2]
        else:
            last_hidden = hn[-1]  # [B, hidden_size]

        return self.fc(last_hidden)


    def _init_hidden(self, batch_size):
        return torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size, device=next(self.parameters()).device)


SyntaxError: 'return' outside function (1467632785.py, line 40)

In [8]:
# Train cycle
def train():
    total_loss = 0

    for i, (signal, label) in enumerate(train_loader, 1):
        output = classifier(signal)

        loss = criterion(output, label)
        total_loss += loss.item()

        classifier.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format(
                time_since(start), epoch,  i *
                len(names), len(train_loader.dataset),
                100. * i * len(names) / len(train_loader.dataset),
                total_loss / i * len(names)))

    return total_loss


# Testing cycle
def test(name=None):
    
    print("evaluating trained model ...")
    correct = 0
    train_data_size = len(val_loader.dataset)

    for signal, label in val_loader:
        output = classifier(signal, seq_lengths)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, train_data_size, 100. * correct / train_data_size))

# Some utility functions

In [9]:
def create_variable(tensor):
    # Do cuda() before wrapping with variable
    if torch.cuda.is_available():
        return Variable(tensor.cuda())
    else:
        return Variable(tensor)

In [10]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Model Training & Valuation

In [11]:
N_EPOCHS = 100

N_INPUT = 54
HIDDEN_SIZE = 180
N_CLASSES = 10
N_LAYERS = 1


In [306]:
N_EPOCHS = 100

N_INPUT = 54
HIDDEN_SIZE = 180
N_CLASSES = 10
N_LAYERS = 1

classifier = RNNClassifier(
    input_size=N_INPUT,
    hidden_size=HIDDEN_SIZE,
    output_size=N_CLASSES,
    n_layers=N_LAYERS,
    bidirectional=False
)


In [307]:
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [33, xxx] -> [11, ...], [11, ...], [11, ...] on 3 GPUs
    classifier = nn.DataParallel(classifier)

In [308]:
if torch.cuda.is_available():
    classifier.cuda()

In [309]:
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

start = time.time()
print("Training for %d epochs..." % N_EPOCHS)
for epoch in range(1, N_EPOCHS + 1):
    # Train cycle
    train()

    # Testing
    test()

Training for 100 epochs...


RuntimeError: mat1 and mat2 shapes cannot be multiplied (54x180 and 54x180)