In [1]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from os.path import splitext
from glob import glob
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import Dropout, Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam, SGD, RMSprop
from keras import optimizers
import os

from numpy.random import seed

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""

Using TensorFlow backend.


In [2]:
# new version: Load spectograms to RAM


df = pd.read_csv("/home/manuel/eihw/data_work/manuel/urban_sounds/UrbanSound8K/metadata/UrbanSound8K.csv")
overlap = 0.5
#width = 64
sampling_rate = 16000
spec_length = 2 #sec
min_length = 0.2 #sec
n_classes = 10


file_wildcard = '/home/manuel/eihw/data_work/manuel/urban_sounds/UrbanSound8K/audio/*/*.wav'
file_folder = '/home/manuel/eihw/data_work/manuel/urban_sounds/UrbanSound8K/audio/'

print(df.head(5))
df = df[["slice_file_name", "classID"]]
x_train = []
y_train = []
x_test = []
y_test = []

for row_number, row in enumerate(df.iterrows()):
    filename = row[1]["slice_file_name"]
    label = int(row[1]["classID"])
    filepath = glob(file_folder + "*/" + filename)[0]
    y, sr = librosa.load(filepath, sr=sampling_rate)
    y_length = y.shape[0]/sampling_rate
    # create spectograms with length 2s and 50% (1s) overlap
    for j in range(int(y_length/spec_length/overlap+1)):
        y_spec = y[int(overlap*j*sampling_rate*spec_length):int((overlap*j+1)*sampling_rate*spec_length)]
        # minimum length for spectogram 0.2s
        if len(y_spec) < min_length * sampling_rate:
            break
        # zero pad too short files
        if len(y_spec) < spec_length * sampling_rate:
            y_spec = np.concatenate((y_spec, np.zeros(spec_length*sampling_rate - len(y_spec), dtype = np.int16)))
        spec = librosa.feature.melspectrogram(y=y_spec)
        spec = librosa.power_to_db(spec)
        spec = np.expand_dims(spec,0)
        spec = np.expand_dims(spec,3)
        if "fold10" in filepath:
            x_test.append(spec)
            y_test.append(label)
        else:
            x_train.append(spec)
            y_train.append(label)
    if row_number % 100 == 0:
        print("Loading: {}/{}".format(row_number, df.shape[0]))

x_train = np.vstack(x_train)
x_test = np.vstack(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

y_train = keras.utils.to_categorical(y_train, n_classes)
y_test = keras.utils.to_categorical(y_test, n_classes)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)


      slice_file_name    fsID  start        end  salience  fold  classID  \
0    100032-3-0-0.wav  100032    0.0   0.317551         1     5        3   
1  100263-2-0-117.wav  100263   58.5  62.500000         1     5        2   
2  100263-2-0-121.wav  100263   60.5  64.500000         1     5        2   
3  100263-2-0-126.wav  100263   63.0  67.000000         1     5        2   
4  100263-2-0-137.wav  100263   68.5  72.500000         1     5        2   

              class  
0          dog_bark  
1  children_playing  
2  children_playing  
3  children_playing  
4  children_playing  
Loading: 0/8732
Loading: 100/8732
Loading: 200/8732
Loading: 300/8732
Loading: 400/8732
Loading: 500/8732
Loading: 600/8732
Loading: 700/8732
Loading: 800/8732
Loading: 900/8732
Loading: 1000/8732
Loading: 1100/8732
Loading: 1200/8732
Loading: 1300/8732
Loading: 1400/8732
Loading: 1500/8732
Loading: 1600/8732
Loading: 1700/8732
Loading: 1800/8732
Loading: 1900/8732
Loading: 2000/8732
Loading: 2100/8732
Loadi

In [3]:
seed(42)

model = Sequential()
model.add(Conv2D(16, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])
model.summary()









Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 128, 63, 16)       160       
_________________________________________________________________
batch_normalization_1 (Batch (None, 128, 63, 16)       64        
_________________________________________________________________
activation_1 (Activation)    (None, 128, 63, 16)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128, 63, 16)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 126, 61, 32)       4640      
_________________________________________________________________
activation_2 (Activation)    (None, 126, 61, 32)       0         
___________________

Results depend heavily on the chosen test-set

In [4]:
history = model.fit(x_train, y_train,
          batch_size=64,
          epochs=3,
          verbose=1,
          validation_split=0.2)
results = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', results[0])
print('Test accuracy:', results[1])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 23089 samples, validate on 5773 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test loss: 1.2522798397748593
Test accuracy: 0.6839883079115312
