In [1]:
import tensorflow as tf
import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.models import Sequential
import librosa
import librosa.display
import numpy as np
import pandas as pd
import random

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [2]:
data = pd.read_csv('Desktop/data/UrbanSound8K/metadata/UrbanSound8K.csv')
data.head(5)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [3]:
data.shape

(8732, 8)

In [4]:
# Get data over 3 seconds long
valid_data = data[['slice_file_name', 'fold' ,'classID', 'class']][ data['end']-data['start'] >= 3 ]
valid_data.shape

(7468, 4)

In [5]:
# Example of a Siren spectrogram
y, sr = librosa.load('Desktop/data/UrbanSound8K/audio/fold6/135160-8-0-0.wav', duration=2.97)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

(128, 128)

In [6]:
valid_data['path'] = 'fold' + valid_data['fold'].astype('str') + '/' + valid_data['slice_file_name'].astype('str')

In [7]:
valid_data

Unnamed: 0,slice_file_name,fold,classID,class,path
1,100263-2-0-117.wav,5,2,children_playing,fold5/100263-2-0-117.wav
2,100263-2-0-121.wav,5,2,children_playing,fold5/100263-2-0-121.wav
3,100263-2-0-126.wav,5,2,children_playing,fold5/100263-2-0-126.wav
4,100263-2-0-137.wav,5,2,children_playing,fold5/100263-2-0-137.wav
5,100263-2-0-143.wav,5,2,children_playing,fold5/100263-2-0-143.wav
6,100263-2-0-161.wav,5,2,children_playing,fold5/100263-2-0-161.wav
7,100263-2-0-3.wav,5,2,children_playing,fold5/100263-2-0-3.wav
8,100263-2-0-36.wav,5,2,children_playing,fold5/100263-2-0-36.wav
14,100652-3-0-0.wav,2,3,dog_bark,fold2/100652-3-0-0.wav
15,100652-3-0-1.wav,2,3,dog_bark,fold2/100652-3-0-1.wav


In [8]:
D = [] # Dataset

for row in valid_data.itertuples():
    y, sr = librosa.load('Desktop/data/UrbanSound8K/audio/' + row.path, duration=2.97)  
    ps = librosa.feature.melspectrogram(y=y, sr=sr)
    if ps.shape != (128, 128): continue
    D.append( (ps, row.classID) )

In [9]:
dataset = D
random.shuffle(dataset)

train = dataset[:7000]
test = dataset[7000:]

X_train, y_train = zip(*train)
X_test, y_test = zip(*test)

# Reshape for CNN input
X_train = np.array([x.reshape( (128, 128, 1) ) for x in X_train])
X_test = np.array([x.reshape( (128, 128, 1) ) for x in X_test])

# One-Hot encoding for classes
y_train = np.array(keras.utils.to_categorical(y_train, 10))
y_test = np.array(keras.utils.to_categorical(y_test, 10))

In [10]:
model = Sequential()
input_shape=(128, 128, 1)

model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(10))
model.add(Activation('softmax'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [11]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
model.fit(
    x=X_train, 
    y=y_train,
    epochs=30,
    batch_size=128,
    validation_data= (X_test, y_test))

Instructions for updating:
Use tf.cast instead.
Train on 7000 samples, validate on 467 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x268dd9f6390>

In [13]:
score = model.evaluate(
    x=X_test,
    y=y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.6246750721073763
Test accuracy: 0.8072805150673272


In [14]:
model.predict(X_test).argmax(axis=1)

array([3, 5, 9, 5, 7, 8, 4, 8, 0, 2, 5, 5, 7, 0, 6, 5, 0, 7, 0, 7, 7, 9,
       3, 4, 3, 7, 2, 2, 7, 4, 9, 5, 9, 9, 5, 2, 7, 9, 9, 5, 4, 2, 0, 8,
       4, 9, 2, 9, 3, 7, 5, 7, 1, 2, 9, 2, 3, 5, 2, 4, 4, 4, 0, 8, 2, 2,
       7, 2, 4, 1, 0, 5, 2, 8, 2, 2, 3, 8, 0, 9, 3, 0, 0, 2, 7, 2, 7, 7,
       4, 8, 0, 0, 7, 4, 0, 5, 7, 5, 0, 9, 8, 0, 9, 9, 3, 5, 9, 3, 9, 1,
       7, 7, 2, 9, 2, 0, 2, 7, 0, 5, 8, 6, 4, 8, 1, 7, 4, 9, 7, 4, 2, 0,
       9, 7, 4, 5, 0, 3, 7, 3, 0, 0, 0, 7, 1, 3, 4, 7, 2, 9, 5, 7, 9, 7,
       2, 4, 0, 7, 3, 9, 2, 9, 7, 9, 3, 2, 4, 0, 7, 0, 8, 9, 8, 5, 2, 0,
       7, 7, 4, 9, 5, 4, 7, 1, 8, 7, 8, 5, 2, 5, 8, 9, 9, 8, 5, 5, 4, 4,
       2, 5, 6, 0, 7, 9, 4, 0, 5, 2, 9, 7, 4, 7, 0, 5, 0, 8, 9, 2, 0, 2,
       7, 8, 3, 9, 9, 2, 8, 2, 0, 1, 0, 2, 5, 5, 3, 5, 5, 2, 0, 7, 4, 8,
       5, 5, 7, 5, 0, 7, 8, 0, 0, 9, 7, 8, 7, 0, 7, 7, 8, 3, 2, 5, 8, 5,
       0, 2, 9, 5, 3, 7, 9, 5, 5, 2, 0, 0, 4, 3, 2, 2, 1, 0, 7, 2, 8, 8,
       2, 9, 4, 5, 7, 0, 0, 0, 9, 4, 0, 0, 0, 9, 8,

In [15]:
y_test.argmax(axis=1)

array([2, 5, 9, 5, 7, 8, 4, 8, 0, 8, 5, 5, 7, 8, 6, 5, 0, 1, 0, 7, 7, 9,
       3, 4, 3, 7, 4, 2, 7, 9, 9, 5, 9, 9, 5, 2, 7, 9, 9, 5, 4, 2, 0, 8,
       4, 9, 3, 9, 3, 7, 4, 7, 1, 3, 9, 9, 3, 5, 0, 4, 4, 4, 9, 8, 2, 8,
       7, 9, 8, 1, 0, 7, 2, 8, 2, 0, 9, 8, 0, 9, 9, 0, 0, 2, 4, 2, 7, 7,
       4, 8, 0, 5, 7, 4, 2, 5, 0, 5, 0, 9, 8, 0, 9, 9, 3, 5, 9, 2, 9, 1,
       7, 7, 2, 9, 3, 2, 2, 7, 0, 5, 8, 6, 4, 8, 1, 7, 1, 9, 7, 4, 2, 0,
       1, 7, 4, 5, 0, 3, 4, 3, 0, 0, 0, 7, 1, 3, 4, 7, 2, 9, 0, 7, 9, 7,
       2, 4, 0, 7, 3, 9, 0, 9, 7, 9, 3, 9, 4, 9, 7, 0, 8, 9, 8, 5, 2, 0,
       7, 7, 4, 9, 5, 3, 7, 1, 8, 4, 8, 5, 3, 5, 8, 9, 9, 8, 5, 5, 4, 4,
       2, 5, 6, 5, 7, 9, 4, 0, 5, 2, 9, 9, 4, 7, 0, 5, 0, 5, 9, 2, 5, 9,
       7, 8, 3, 9, 9, 2, 8, 2, 0, 5, 0, 2, 5, 2, 3, 5, 8, 3, 2, 7, 4, 8,
       5, 5, 7, 0, 0, 7, 8, 0, 8, 9, 7, 8, 7, 9, 5, 4, 8, 3, 0, 5, 8, 5,
       0, 2, 9, 5, 3, 8, 9, 5, 5, 2, 0, 5, 0, 3, 2, 9, 1, 0, 7, 2, 8, 8,
       2, 9, 4, 0, 7, 0, 0, 0, 9, 4, 0, 0, 0, 9, 8,