In [1]:
import pandas as pd
import os
import librosa
import numpy as np
import tensorflow as tf


from Documents.helpers.wavfilehelper import WavFileHelper
wavfilehelper = WavFileHelper()

In [2]:
audiodata = []
fulldatasetpath = 'C:/Users/lenovo/UrbanSound8K'

metadata = pd.read_csv(fulldatasetpath + '/metadata/UrbanSound8K.csv')


for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath('C:/Users/lenovo/UrbanSound8K/'),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    data = wavfilehelper.read_file_properties(file_name)
    audiodata.append(data)

audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])


In [3]:
def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        stft = np.abs(librosa.stft(audio))
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T,axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        mel = np.mean(librosa.feature.melspectrogram(audio, sr=sample_rate).T,axis=0)
        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(audio),
        sr=sample_rate).T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs,chroma,mel,contrast,tonnetz






In [4]:
fulldatasetpath = 'C:/Users/lenovo/UrbanSound8K/'

metadata = pd.read_csv(fulldatasetpath + '/metadata/UrbanSound8K.csv')

features = []
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    
    class_label = row["class"]
    mfccs,chroma,mel,contrast,tonnetz = extract_features(file_name)
    features.append([mfccs,chroma,mel,contrast,tonnetz,class_label])

featuresdf = pd.DataFrame(features, columns=['mfccs','chroma','mel','contrast','tonnetz','class_label'])
#end = timeit.timeit()
#print("Time taken to extract features : ",end-start)
print('Finished feature extraction from ', len(featuresdf), ' files')



Finished feature extraction from  8732  files


In [5]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
only_features=np.concatenate((featuresdf.mfccs.tolist(),featuresdf.chroma.tolist(),featuresdf.mel.tolist(),featuresdf.contrast.tolist(),featuresdf.tonnetz.tolist()),axis=1)
X = np.array(only_features)
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

train_x, test_x, train_y, test_y = train_test_split(X, yy, test_size=0.2, random_state = 42)

Using TensorFlow backend.


In [6]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout,Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.models import model_from_json

In [7]:
# neural network dimensions
n_dim = train_x.shape[1]
n_classes = train_y.shape[1]
n_hidden_units_1 = n_dim
n_hidden_units_2 = 400 # approx n_dim * 2
n_hidden_units_3 = 200 # half of layer 2

In [8]:
print(n_dim)
print("Features: {} Classes: {} ".format(n_dim, n_classes))

193
Features: 193 Classes: 10 


In [9]:

def create_model(activation_function='relu',init_type='normal', kernel_initializer="uniform", optimiser='Adamax', dropout_rate=0.5):
    model = Sequential()
    # layer 1
    #model.add(Convolution2D(n_hidden_units_1, input_dim=n_dim, activation=activation_function,kernel_initializer="uniform"))
    #model.add(MaxPooling2D(2,2))
    model.add(Dense(n_hidden_units_1, input_dim=n_dim, init=init_type, activation=activation_function))
    # layer 2
    model.add(Dense(n_hidden_units_2, kernel_initializer="uniform", activation=activation_function))
    model.add(Dropout(dropout_rate))
    # layer 3
    model.add(Dense(n_hidden_units_3, kernel_initializer="uniform" , activation=activation_function))
    model.add(Dropout(dropout_rate))
    # output layer
    model.add(Dense(n_classes, kernel_initializer="uniform", activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=optimiser, metrics=['accuracy'])
    return model

In [10]:
model = create_model()
earlystop = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto')


  


In [11]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 193)               37442     
_________________________________________________________________
dense_2 (Dense)              (None, 400)               77600     
_________________________________________________________________
dropout_1 (Dropout)          (None, 400)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 200)               80200     
_________________________________________________________________
dropout_2 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)                2010      
Total params: 197,252
Trainable params: 197,252
Non-trainable params: 0
________________________________________________

In [12]:
history = model.fit(train_x, train_y, epochs=30, validation_split=0.1, callbacks=[earlystop], batch_size=24)

Train on 6286 samples, validate on 699 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30


In [13]:
score, accuracy = model.evaluate(x = test_x, y = test_y , verbose = 1)



In [14]:
print(accuracy, score)

0.8981110477488452 0.40815886358704373


In [19]:
model_json=model.to_json()
with open("model.json",'w') as json_file:
    json_file.write(model_json)
model.save_weights("model.h5")
print("saved model to disk")

saved model to disk


In [21]:
json_file=open("model.json",'r')
loaded_model_json=json_file.read()
json_file.close()
loaded_model=model_from_json(loaded_model_json)
loaded_model.load_weights("model.h5")
print("loaded model from disk")

loaded model from disk


In [22]:
loaded_model.compile(loss='categorical_crossentropy', optimizer='Adamax',metrics=['accuracy'])

In [23]:
score, accuracy = model.evaluate(x = test_x, y = test_y , verbose = 1)
print(accuracy, score)

0.8981110477488452 0.40815886358704373
