In [2]:
import numpy as np
import pickle

import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader

# from name_dataset import NameDataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import Dataset, DataLoader

# Loading data

In [155]:
with open('/kaggle/input/urbansound8k-feature-extraction/train_data.pkl', 'rb') as f:
    X_train, Y_train = pickle.load(f)

with open('/kaggle/input/urbansound8k-feature-extraction/val_data.pkl', 'rb') as f:
    X_val, Y_val = pickle.load(f)


In [159]:
import pandas as pd
import numpy as np

def stack_samples(X, Y):
    """
    X: list or array of shape (N, R, C)
    Y: list of N labels
    Returns: DataFrame of shape (N×R, C + 1)
    """
    X_np = np.array(X, dtype=np.float32)  # Shape: (N, R, C)
    Y_repeated = np.repeat(Y, X_np.shape[1])  # Repeat each label R times

    X_stacked = X_np.reshape(-1, X_np.shape[2])  # (N×R, C)

    df = pd.DataFrame(X_stacked)
    df['Label'] = Y_repeated

    return df

train_df = stack_samples(X_train, Y_train)
val_df = stack_samples(X_val, Y_val)
print(train_df.shape)
print(val_df.shape)

(1173960, 181)
(355104, 181)


In [157]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,171,172,173,174,175,176,177,178,179,Label
0,0.027114,0.069684,0.084108,0.086272,0.086115,0.068998,0.075101,0.08155,0.082987,0.075951,...,0.078393,0.053319,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
1,0.430208,5.758408,51.152721,60.381077,21.515299,4.466881,13.07495,55.018288,43.365993,16.410397,...,25.460247,5.669981,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
2,0.356129,2.768868,10.043193,5.306677,2.211567,2.200262,5.633092,11.511503,4.667964,2.912486,...,5.36731,2.295357,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
3,0.119155,0.518129,1.10415,0.707675,0.361656,0.435153,0.531613,1.172129,1.274063,1.58621,...,1.393059,0.565431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
4,0.200908,0.502827,0.919906,0.679136,0.617777,0.499163,0.740346,0.879904,0.768322,0.788436,...,1.237146,0.505317,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer


In [158]:
val_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,171,172,173,174,175,176,177,178,179,Label
0,0.04418,0.056054,0.064415,0.069324,0.067334,0.067759,0.0718,0.069728,0.069248,0.065749,...,0.070372,0.06128,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
1,1.723533,5.68797,9.228014,7.410962,7.198935,11.340713,7.369151,10.589535,10.131034,3.039403,...,14.573446,10.166873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
2,2.469047,7.855614,10.78901,9.70544,11.964504,14.177218,9.775521,12.749818,11.2034,4.147494,...,12.323687,5.08407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
3,2.648386,5.412858,5.202138,6.334105,10.531077,8.599378,4.707143,6.276899,5.755446,8.934631,...,10.442652,4.127263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer
4,0.576799,1.618431,2.621299,1.711736,1.985173,2.103726,1.531987,2.888803,3.074646,3.813351,...,4.061364,3.374841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,jackhammer


# Dataset Loader

In [130]:
BATCH_SIZE = 32
N_WORKERS = torch.cuda.device_count() if torch.cuda.device_count() > 1 else 1
N_WORKERS

1

In [131]:
label_map = {
    'air_conditioner': 0,
    'car_horn': 1,
    'children_playing': 2,
    'dog_bark': 3,
    'drilling': 4,
    'engine_idling': 5,
    'gun_shot': 6,
    'jackhammer': 7,
    'siren': 8,
    'street_music': 9
}

In [132]:
class UrbanSound8kDataset(Dataset):
    """ Diabetes dataset."""

    # Initialize your data, download, etc.
    def __init__(self, file_path):
        with open(file_path, 'rb') as f:
            self.x_data, self.y_data = pickle.load(f)
        self.len = len(self.x_data)
        self.y_data = np.array([label_map[label] for label in self.y_data])

    def __getitem__(self, idx):
        x = self.x_data[idx].astype(np.float32)   # <- fix here
        y = self.y_data[idx]
        # print(type(x[0][0]))
        # print(y)
        # print(torch.tensor(y, dtype=torch.long))
        return torch.from_numpy(x), torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return self.len


In [165]:
train_dataset = UrbanSound8kDataset('/kaggle/input/urbansound8k-feature-extraction/train_data.pkl')
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=N_WORKERS)
val_dataset = UrbanSound8kDataset('/kaggle/input/urbansound8k-feature-extraction/val_data.pkl')
val_loader = DataLoader(dataset=val_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=N_WORKERS)


In [166]:
print(len(train_dataset), len(train_dataset[0]), len(train_dataset[0][0]), len(train_dataset[0][0][0]))

21740 2 54 180


In [167]:
print(len(train_dataset), len(train_dataset[1]), train_dataset[1][1])

21740 2 tensor(7)


In [168]:
print(len(val_dataset), len(val_dataset[0]), len(val_dataset[0][0]), len(val_dataset[0][0][0]))

6576 2 54 180


In [169]:
print(len(val_dataset), len(val_dataset[1]), val_dataset[1][1])

6576 2 tensor(7)


# RNN Model

In [136]:
class RNNClassifier(nn.Module):
    # Our model

    def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = int(bidirectional) + 1

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(input_size, hidden_size, n_layers,
                          bidirectional=bidirectional, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, signal):
        # Note: we run this all at once (over the whole input sequence)
        # input shape: B x S (input size)
        # transpose to make S(sequence) x B (batch)
        # input = input.t()
        batch_size = signal.size(0)
        signal = signal.permute(0, 2, 1)

        # Make a hidden
        hidden = self._init_hidden(batch_size)
        # print("hidden shape: ", hidden.shape)
        # print("signal shape: ", signal.shape)
        # # Embedding S x B -> S x B x I (embedding size)
        # embedded = self.embedding(input)

        # # Pack them up nicely
        # gru_input = pack_padded_sequence(
        #     embedded, seq_lengths.data.cpu().numpy())

        # To compact weights again call flatten_parameters().
        # self.gru.flatten_parameters()
        output, hidden = self.rnn(signal, hidden)
    
        # print("output shape: ", output.shape)
        # Use the last layer output as FC's input
        # No need to unpack, since we are going to use hidden
        fc_output = self.fc(hidden[-1])
        # print("fc_output shape: ", fc_output.shape)
        
        return fc_output

    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions,
                             batch_size, self.hidden_size)
        return create_variable(hidden)

In [147]:
# Train cycle
def train():
    total_loss = 0

    for i, (signal, label) in enumerate(train_loader, 1):
        output = classifier(signal)
        # print("signal size: ", signal.shape)
        # print("label size: ", label.shape)
        # print("label: ", label)
        loss = criterion(output, label)
        total_loss += loss.item()

        classifier.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.2f}'.format(
                time_since(start), epoch,  i *
                len(signal), len(train_loader.dataset),
                100. * i * len(signal) / len(train_loader.dataset),
                total_loss / i * len(signal)))

    return total_loss


# Testing cycle
def test(name=None):
    
    print("evaluating trained model ...")
    correct = 0
    train_data_size = len(val_loader.dataset)

    for signal, label in val_loader:
        output = classifier(signal)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(label.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, train_data_size, 100. * correct / train_data_size))

# Some utility functions

In [138]:
def create_variable(tensor):
    # Do cuda() before wrapping with variable
    if torch.cuda.is_available():
        return Variable(tensor.cuda())
    else:
        return Variable(tensor)

In [139]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Model Training & Valuation

In [163]:
N_EPOCHS = 100

N_INPUT = 54
HIDDEN_SIZE = 180
N_CLASSES = 10
N_LAYERS = 1


In [170]:
N_EPOCHS = 100

N_INPUT = 54
HIDDEN_SIZE = 180
N_CLASSES = 10
N_LAYERS = 2

classifier = RNNClassifier(
    input_size=N_INPUT,
    hidden_size=HIDDEN_SIZE,
    output_size=N_CLASSES,
    n_layers=N_LAYERS,
    # bidirectional=False
)


In [171]:
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [33, xxx] -> [11, ...], [11, ...], [11, ...] on 3 GPUs
    classifier = nn.DataParallel(classifier)

In [172]:
if torch.cuda.is_available():
    classifier.cuda()

In [173]:
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

start = time.time()
print("Training for %d epochs..." % N_EPOCHS)
for epoch in range(1, N_EPOCHS + 1):
    # Train cycle
    train()

    # Testing
    test()

Training for 100 epochs...
evaluating trained model ...

Test set: Accuracy: 2863/6576 (44%)

evaluating trained model ...

Test set: Accuracy: 2991/6576 (45%)

evaluating trained model ...

Test set: Accuracy: 2959/6576 (45%)

evaluating trained model ...

Test set: Accuracy: 2841/6576 (43%)

evaluating trained model ...

Test set: Accuracy: 2930/6576 (45%)



KeyboardInterrupt: 