# Urban Sound Classification Project

Dataset source:https://www.kaggle.com/pavansanagapati/urban-sound-classification

The following code loads a sample of the sound file in the dataset:

In [1]:
import IPython.display as ipd
ipd.Audio('../datasets/UrbanSound/Train/1008.wav')

The following code shows the format of csv file:

In [13]:
import pandas as pd
index = pd.read_csv('../datasets/UrbanSound/train.csv')
index.head()

Unnamed: 0,ID,Class
0,0,siren
1,1,street_music
2,2,drilling
3,3,siren
4,4,dog_bark


We use librosa package to load the original sound files as well as extracting the features.

After that, we use h5py package to store the extracted data into hdf5 file.

In [1]:
from tqdm import tnrange, tqdm_notebook
import librosa
import numpy as np
import pandas as pd
import h5py

SOURCE = '../raw/Train/'

classes = {'siren': 0,
           'street_music': 1,
           'drilling': 2,
           'dog_bark': 3,
           'children_playing': 4,
           'gun_shot': 5,
           'engine_idling': 6,
           'air_conditioner': 7,
           'jackhammer': 8,
           'car_horn': 9}

h5_data = h5py.File('../data/extracted_data.hdf5', 'w')
index = pd.read_csv('../raw/train.csv')
labels = []
data = []
for row in tqdm_notebook(index.itertuples()):
    x, sample_rate = librosa.load(SOURCE + str(row.ID) + '.wav', duration = 2.97)
    ps = librosa.feature.melspectrogram(y = x, sr = sample_rate)

    # padding
    padding = 0
    if ps.shape != (128, 128):
        padding = 1
        diff = 128 - ps.shape[1]
        pattern = [0.0] * 128
        paddings = []
        for i in range(diff):
            paddings.append(pattern)
        paddings = np.array(paddings)
        ps = np.concatenate((ps, paddings.T), axis = 1)

    labels.append(classes[row.Class])
    data.append(ps)

h5_data.create_dataset('train_label', data = labels)
h5_data.create_dataset('train_data', data = data)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




<HDF5 dataset "train_data": shape (5435, 128, 128), type "<f8">

If using keras, then the training procedure is as below:

In [1]:
import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.models import Sequential
from tqdm import tnrange, tqdm
import numpy as np
import random
import h5py

S = []
h5_in = h5py.File('./extracted_data.hdf5', 'r')
data = h5_in['train_data']
labels = h5_in['train_label']

print(data.shape, labels.shape)

for i in range(labels.shape[0]):
    S.append((data[i], labels[i]))

random.shuffle(S)

train = S[: 5000]
val = S[5000: ]

X_train, Y_train = zip(*train)
X_val, Y_val = zip(*val)
X_train = np.array([x.reshape((128, 128, 1)) for x in X_train])
X_val = np.array([x.reshape((128, 128, 1)) for x in X_val])

Y_train = np.array(keras.utils.to_categorical(Y_train, 10))
Y_val = np.array(keras.utils.to_categorical(Y_val, 10))

'''
    Your model here
'''

Using TensorFlow backend.


(5435, 128, 128) (5435,)


'\n    Your model here\n'

And then, testing: