In [66]:
%matplotlib notebook

import glob
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram


In [67]:
def load_sound_files(file_paths):
    raw_sounds = []
    for fp in file_paths:
        X, sr = librosa.load(fp)
        raw_sounds.append(X)
    return raw_sounds


In [68]:
def plot_waves(sound_names, raw_sounds):
    i = 1
    fig = plt.figure()
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        librosa.display.waveplot(np.array(f),sr=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle("Figure 1: Waveplot",x=0.5, y=0.915,fontsize=18)
    plt.show()

In [69]:
def plot_specgram(sound_names, raw_sounds):
    i = 1
    fig = plt.figure()
    for n, f in zip(sound_names, raw_sounds):
        plt.subplot(10, 1, i)
        specgram(np.array(f), Fs=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle("Figure 2: Spectrogram", x=0.5, y=0.915, fontsize=18)
    plt.show()


In [70]:
def plot_log_power_specgram(sound_names, raw_sounds):
    i = 1
    fig = plt.figure()
    for n, f in zip(sound_names, raw_sounds):
        plt.subplot(10, 1, i)
        D = librosa.core.amplitude_to_db(np.abs(librosa.stft(f)) ** 2)
        librosa.display.specshow(D, x_axis='time', y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle("Figure 3: Log power spectrogram", x=0.5, y=0.915, fontsize=18)
    plt.show()


In [71]:
sound_file_paths = ['sounds/car_horn/7061-6-0-0.wav']


In [72]:
sound_names = ["car horn"]

In [73]:
raw_sounds = load_sound_files(sound_file_paths)

In [74]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [75]:
def parse_audio_files(parent_dir,sub_dirs,file_ext="*.wav"):
    features, labels = np.empty((0,193)), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        files = glob.glob(os.path.join(parent_dir, sub_dir, file_ext))
        for i, fn in enumerate(files):
            print("{}, {} left".format(fn, len(files) - i))
            try:
              mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
            except Exception as e:
              print("Error encountered while parsing file: ", fn)
              continue
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            features = np.vstack([features,ext_features])
            labels = np.append(labels, fn.split('/')[2].split('-')[1])
    return np.array(features), np.array(labels, dtype = np.int)

In [76]:
def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [77]:
# plot_waves(sound_names, raw_sounds)
# plot_specgram(sound_names, raw_sounds)
# plot_log_power_specgram(sound_names, raw_sounds)

In [78]:
import tensorflow as tf
from sklearn.metrics import precision_recall_fscore_support

In [79]:
parent_dir = 'sounds'

sub_dirs = ['car_horn']
features, labels = parse_audio_files(parent_dir, sub_dirs)

sounds/car_horn/135776-2-0-49.wav, 873 left
sounds/car_horn/46654-6-0-0.wav, 872 left
sounds/car_horn/57320-0-0-24.wav, 871 left
sounds/car_horn/134717-0-0-26.wav, 870 left
sounds/car_horn/174276-7-5-0.wav, 869 left
sounds/car_horn/180937-7-2-6.wav, 868 left
sounds/car_horn/17913-4-0-1.wav, 867 left
sounds/car_horn/103074-7-4-6.wav, 866 left
sounds/car_horn/176787-5-0-9.wav, 865 left
sounds/car_horn/55020-4-0-11.wav, 864 left
sounds/car_horn/138015-3-0-7.wav, 863 left
sounds/car_horn/180937-7-0-4.wav, 862 left
sounds/car_horn/180937-7-1-13.wav, 861 left
sounds/car_horn/157867-8-0-10.wav, 860 left
sounds/car_horn/103074-7-2-0.wav, 859 left
sounds/car_horn/113205-5-1-4.wav, 858 left
sounds/car_horn/57584-4-0-8.wav, 857 left
sounds/car_horn/118279-8-0-8.wav, 856 left
sounds/car_horn/137156-9-0-73.wav, 855 left
sounds/car_horn/165645-4-1-0.wav, 854 left
sounds/car_horn/182800-2-2-0.wav, 853 left
sounds/car_horn/103074-7-0-2.wav, 852 left
sounds/car_horn/159738-8-0-1.wav, 851 left
sounds/ca



sounds/car_horn/118279-8-0-9.wav, 841 left
sounds/car_horn/191431-9-0-8.wav, 840 left
sounds/car_horn/139951-9-0-33.wav, 839 left
sounds/car_horn/113205-5-1-5.wav, 838 left
sounds/car_horn/180937-7-1-12.wav, 837 left
sounds/car_horn/180937-7-0-5.wav, 836 left
sounds/car_horn/157867-8-0-11.wav, 835 left
sounds/car_horn/99180-9-0-7.wav, 834 left
sounds/car_horn/176787-5-0-8.wav, 833 left
sounds/car_horn/55020-4-0-10.wav, 832 left
sounds/car_horn/192124-2-0-0.wav, 831 left
sounds/car_horn/98223-7-1-0.wav, 830 left
sounds/car_horn/17913-4-0-0.wav, 829 left
sounds/car_horn/134717-0-0-27.wav, 828 left
sounds/car_horn/72261-3-0-23.wav, 827 left
sounds/car_horn/180937-7-2-7.wav, 826 left
sounds/car_horn/143651-2-0-0.wav, 825 left
sounds/car_horn/57320-0-0-31.wav, 824 left
sounds/car_horn/54858-3-1-2.wav, 823 left
sounds/car_horn/98223-7-3-0.wav, 822 left
sounds/car_horn/17913-4-2-0.wav, 821 left
sounds/car_horn/97317-2-0-28.wav, 820 left
sounds/car_horn/134717-0-0-19.wav, 819 left
sounds/car_h

sounds/car_horn/57320-0-0-9.wav, 649 left
sounds/car_horn/194732-9-0-95.wav, 648 left
sounds/car_horn/40722-8-0-1.wav, 647 left
sounds/car_horn/146186-5-0-13.wav, 646 left
sounds/car_horn/193698-2-0-42.wav, 645 left
sounds/car_horn/160009-2-0-11.wav, 644 left
sounds/car_horn/77766-9-0-4.wav, 643 left
sounds/car_horn/176787-5-0-26.wav, 642 left
sounds/car_horn/164053-8-2-1.wav, 641 left
sounds/car_horn/108362-2-0-30.wav, 640 left
sounds/car_horn/159738-8-0-19.wav, 639 left
sounds/car_horn/139951-9-0-45.wav, 638 left
sounds/car_horn/102305-6-0-0.wav, 637 left
sounds/car_horn/180937-7-1-7.wav, 636 left
sounds/car_horn/192124-2-0-15.wav, 635 left
sounds/car_horn/98223-7-2-0.wav, 634 left
sounds/car_horn/124489-9-0-17.wav, 633 left
sounds/car_horn/46669-4-0-49.wav, 632 left
sounds/car_horn/194732-9-0-126.wav, 631 left
sounds/car_horn/46669-4-0-61.wav, 630 left
sounds/car_horn/119455-5-0-7.wav, 629 left
sounds/car_horn/151977-0-0-2.wav, 628 left
sounds/car_horn/157867-8-0-1.wav, 627 left
sou

sounds/car_horn/174276-7-2-0.wav, 457 left
sounds/car_horn/176714-2-0-77.wav, 456 left
sounds/car_horn/134717-0-0-6.wav, 455 left
sounds/car_horn/197073-3-3-0.wav, 454 left
sounds/car_horn/103258-5-0-15.wav, 453 left
sounds/car_horn/40722-8-0-4.wav, 452 left
sounds/car_horn/118101-3-0-4.wav, 451 left
sounds/car_horn/15564-2-0-0.wav, 450 left
sounds/car_horn/106905-8-0-2.wav, 449 left
sounds/car_horn/176787-5-0-23.wav, 448 left
sounds/car_horn/108362-2-0-21.wav, 447 left
sounds/car_horn/180937-7-1-2.wav, 446 left
sounds/car_horn/108041-9-0-11.wav, 445 left
sounds/car_horn/119455-5-0-2.wav, 444 left
sounds/car_horn/157867-8-0-4.wav, 443 left
sounds/car_horn/26270-9-0-35.wav, 442 left
sounds/car_horn/151977-0-0-7.wav, 441 left
sounds/car_horn/191431-9-0-66.wav, 440 left
sounds/car_horn/197318-6-12-0.wav, 439 left
sounds/car_horn/124489-9-0-12.wav, 438 left
sounds/car_horn/105415-2-0-15.wav, 437 left
sounds/car_horn/180937-7-3-0.wav, 436 left
sounds/car_horn/78360-4-0-11.wav, 435 left
soun

sounds/car_horn/147764-4-7-0.wav, 267 left
sounds/car_horn/180256-3-0-3.wav, 266 left
sounds/car_horn/24074-1-0-0.wav, 265 left
sounds/car_horn/177621-0-0-104.wav, 264 left
sounds/car_horn/24074-1-0-2.wav, 263 left
sounds/car_horn/135776-2-0-32.wav, 262 left
sounds/car_horn/180256-3-0-1.wav, 261 left
sounds/car_horn/57320-0-0-5.wav, 260 left
sounds/car_horn/176638-1-1-0.wav, 259 left
sounds/car_horn/177621-0-0-46.wav, 258 left
sounds/car_horn/46656-6-4-0.wav, 257 left
sounds/car_horn/147764-4-5-0.wav, 256 left
sounds/car_horn/177621-0-0-91.wav, 255 left
sounds/car_horn/164312-3-4-0.wav, 254 left
sounds/car_horn/176787-5-0-16.wav, 253 left
sounds/car_horn/78360-4-0-7.wav, 252 left
sounds/car_horn/159738-8-0-15.wav, 251 left
sounds/car_horn/50901-0-1-0.wav, 250 left
sounds/car_horn/73277-9-0-19.wav, 249 left
sounds/car_horn/118963-3-0-0.wav, 248 left
sounds/car_horn/114587-3-0-7.wav, 247 left
sounds/car_horn/165067-2-0-9.wav, 246 left
sounds/car_horn/46669-4-0-45.wav, 245 left
sounds/car

  if np.any(X < 0) or np.any(X_ref < 0):
  Z = np.maximum(X, X_ref).astype(dtype)
  bad_idx = (Z < np.finfo(dtype).tiny)


sounds/car_horn/177621-0-0-35.wav, 170 left
sounds/car_horn/155202-9-0-135.wav, 169 left
sounds/car_horn/57320-0-0-10.wav, 168 left
sounds/car_horn/146186-5-0-5.wav, 167 left
sounds/car_horn/134717-0-0-12.wav, 166 left
sounds/car_horn/122690-6-0-0.wav, 165 left
sounds/car_horn/57320-0-0-38.wav, 164 left
sounds/car_horn/97317-2-0-23.wav, 163 left
sounds/car_horn/87275-1-3-0.wav, 162 left
sounds/car_horn/176714-2-0-33.wav, 161 left
sounds/car_horn/146186-5-0-7.wav, 160 left
sounds/car_horn/176787-5-0-3.wav, 159 left
sounds/car_horn/160009-2-0-50.wav, 158 left
sounds/car_horn/55020-4-0-0.wav, 157 left
sounds/car_horn/157867-8-0-26.wav, 156 left
sounds/car_horn/196400-6-0-0.wav, 155 left
sounds/car_horn/125791-3-0-13.wav, 154 left
sounds/car_horn/69304-9-0-7.wav, 153 left
sounds/car_horn/57584-4-0-2.wav, 152 left
sounds/car_horn/137156-9-0-79.wav, 151 left
sounds/car_horn/46918-5-0-2.wav, 150 left
sounds/car_horn/118279-8-0-2.wav, 149 left
sounds/car_horn/108041-9-0-2.wav, 148 left
sounds/

In [None]:
labels = one_hot_encode(labels)


In [97]:
train_test_split = np.random.rand(len(features)) < 0.70
train_x = features[train_test_split]
train_y = labels[train_test_split]
test_x = features[~train_test_split]
test_y = labels[~train_test_split]

In [81]:
training_epochs = 5000
n_dim = features.shape[1]
n_classes = 10
n_hidden_units_one = 280 
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.01

In [82]:
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])

W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)


W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)


W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)

init = tf.global_variables_initializer()

In [96]:
cost_function = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(y_), reduction_indices=[1])) 
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [100]:
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(training_epochs):
        if epoch % 500 == 0:
            print("epoch number == {}".format(epoch))
        _,cost = sess.run([optimizer,cost_function],feed_dict={X:train_x,Y:train_y})
        cost_history = np.append(cost_history,cost)
        
    print("Test accuracy: ",round(sess.run(accuracy, feed_dict={X: test_x, Y: test_y}),3))
    
    y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: test_x})
    y_true = sess.run(tf.argmax(test_y,1))
    saver.save(sess, "/tmp/model.ckpt")

epoch number == 0
epoch number == 500
epoch number == 1000
epoch number == 1500
epoch number == 2000
epoch number == 2500
epoch number == 3000
epoch number == 3500
epoch number == 4000
epoch number == 4500
Test accuracy:  0.914


In [115]:
features = np.empty(0)
ext_features = np.hstack(extract_feature('sounds/car_horn/7061-6-0-0.wav'))
mytest_x = np.array(np.vstack([features,ext_features]))

with tf.Session() as sess:
    # Restore variables from disk.
    saver.restore(sess, "/tmp/model.ckpt")
    y_pred = sess.run(tf.argmax(y_,1), feed_dict={X: mytest_x})

ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [113]:
y_pred

array([6])

In [114]:
features.shape[1]

193