In [2]:
import numpy as np
import pickle

import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

# from name_dataset import NameDataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader

# Dataset Loader

In [130]:
BATCH_SIZE = 32
N_WORKERS = torch.cuda.device_count() if torch.cuda.device_count() > 1 else 1
N_WORKERS

1

In [131]:
label_map = {
    'air_conditioner': 0,
    'car_horn': 1,
    'children_playing': 2,
    'dog_bark': 3,
    'drilling': 4,
    'engine_idling': 5,
    'gun_shot': 6,
    'jackhammer': 7,
    'siren': 8,
    'street_music': 9
}

In [132]:
class UrbanSound8kDataset(Dataset):
    """ Diabetes dataset."""

    # Initialize your data, download, etc.
    def __init__(self, file_path):
        with open(file_path, 'rb') as f:
            self.x_data, self.y_data = pickle.load(f)
        self.len = len(self.x_data)
        self.y_data = np.array([label_map[label] for label in self.y_data])

    def __getitem__(self, idx):
        x = self.x_data[idx].astype(np.float32)   # <- fix here
        y = self.y_data[idx]
        # print(type(x[0][0]))
        # print(y)
        # print(torch.tensor(y, dtype=torch.long))
        return torch.from_numpy(x), torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return self.len


In [133]:
train_dataset = UrbanSound8kDataset('/kaggle/input/urbansound8k-feature-extraction/train_data.pkl')
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=N_WORKERS)
val_dataset = UrbanSound8kDataset('/kaggle/input/urbansound8k-feature-extraction/val_data.pkl')
val_loader = DataLoader(dataset=val_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=N_WORKERS)


In [134]:
print(len(train_dataset), len(train_dataset[0]), len(train_dataset[0][0]), len(train_dataset[0][0][0]))

21740 2 54 180


In [135]:
print(len(train_dataset), len(train_dataset[1]), train_dataset[1][1])

21740 2 tensor(7)


# RNN Model

In [136]:
class RNNClassifier(nn.Module):
    # Our model

    def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = int(bidirectional) + 1

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(input_size, hidden_size, n_layers,
                          bidirectional=bidirectional, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, signal):
        # Note: we run this all at once (over the whole input sequence)
        # input shape: B x S (input size)
        # transpose to make S(sequence) x B (batch)
        # input = input.t()
        batch_size = signal.size(0)
        signal = signal.permute(0, 2, 1)

        # Make a hidden
        hidden = self._init_hidden(batch_size)
        # print("hidden shape: ", hidden.shape)
        # print("signal shape: ", signal.shape)
        # # Embedding S x B -> S x B x I (embedding size)
        # embedded = self.embedding(input)

        # # Pack them up nicely
        # gru_input = pack_padded_sequence(
        #     embedded, seq_lengths.data.cpu().numpy())

        # To compact weights again call flatten_parameters().
        # self.gru.flatten_parameters()
        output, hidden = self.rnn(signal, hidden)
    
        # print("output shape: ", output.shape)
        # Use the last layer output as FC's input
        # No need to unpack, since we are going to use hidden
        fc_output = self.fc(hidden[-1])
        # print("fc_output shape: ", fc_output.shape)
        
        return fc_output

    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions,
                             batch_size, self.hidden_size)
        return create_variable(hidden)

In [147]:
# Train cycle
def train():
    total_loss = 0

    for i, (signal, label) in enumerate(train_loader, 1):
        output = classifier(signal)
        # print("signal size: ", signal.shape)
        # print("label size: ", label.shape)
        # print("label: ", label)
        loss = criterion(output, label)
        total_loss += loss.item()

        classifier.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format(
                time_since(start), epoch,  i *
                len(signal), len(train_loader.dataset),
                100. * i * len(signal) / len(train_loader.dataset),
                total_loss / i * len(signal)))

    return total_loss


# Testing cycle
def test(name=None):
    
    print("evaluating trained model ...")
    correct = 0
    train_data_size = len(val_loader.dataset)

    for signal, label in val_loader:
        output = classifier(signal)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(label.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, train_data_size, 100. * correct / train_data_size))

# Some utility functions

In [138]:
def create_variable(tensor):
    # Do cuda() before wrapping with variable
    if torch.cuda.is_available():
        return Variable(tensor.cuda())
    else:
        return Variable(tensor)

In [139]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Model Training & Valuation

In [140]:
N_EPOCHS = 100

N_INPUT = 54
HIDDEN_SIZE = 180
N_CLASSES = 10
N_LAYERS = 1


In [141]:
N_EPOCHS = 100

N_INPUT = 54
HIDDEN_SIZE = 180
N_CLASSES = 10
N_LAYERS = 1

classifier = RNNClassifier(
    input_size=N_INPUT,
    hidden_size=HIDDEN_SIZE,
    output_size=N_CLASSES,
    n_layers=N_LAYERS,
    bidirectional=False
)


In [142]:
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [33, xxx] -> [11, ...], [11, ...], [11, ...] on 3 GPUs
    classifier = nn.DataParallel(classifier)

In [143]:
if torch.cuda.is_available():
    classifier.cuda()

In [144]:
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

start = time.time()
print("Training for %d epochs..." % N_EPOCHS)
for epoch in range(1, N_EPOCHS + 1):
    # Train cycle
    train()

    # Testing
    test()

Training for 100 epochs...
evaluating trained model ...


NameError: name 'seq_lengths' is not defined

In [148]:
test()

evaluating trained model ...

Test set: Accuracy: 2494/6576 (38%)

