In [1]:
import pandas as pd
import wave
import struct
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
%matplotlib inline

def parse_wave_python(filename):
    with wave.open(filename, 'rb') as wave_file:
        sample_rate = wave_file.getframerate()
        length_in_seconds = wave_file.getnframes() / sample_rate
        
        first_sample = struct.unpack(
            '<h', wave_file.readframes(1))[0]
        second_sample = struct.unpack(
            '<h', wave_file.readframes(1))[0]
    print('''
Parsed {filename}
-----------------------------------------------
Channels: {num_channels}
Sample Rate: {sample_rate}
First Sample: {first_sample}
Second Sample: {second_sample}
Length in Seconds: {length_in_seconds}'''.format(
            filename=filename,
            num_channels=wave_file.getnchannels(),
            sample_rate=wave_file.getframerate(),
            first_sample=first_sample,
            second_sample=second_sample,
            length_in_seconds=length_in_seconds))
        
parse_wave_python('Train_folder/train/1.wav')



Parsed Train_folder/train/1.wav
-----------------------------------------------
Channels: 1
Sample Rate: 48000
First Sample: -3939
Second Sample: -4940
Length in Seconds: 4.0


In [2]:
#To extract the useful features from sound data, we will use Librosa library
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [3]:
mfccs, chroma, mel, contrast,tonnetz = extract_feature('Train_folder/train/4316.wav')

In [4]:
mfccs

array([-1.03483262e+02,  4.37438912e+01,  1.62066177e+01,  2.04347375e+01,
        2.18889983e+00,  2.40929443e+01,  8.44791575e+00,  2.73242543e+01,
        3.93113719e+00, -6.75690264e+00,  5.98093523e+00,  1.44193967e+01,
       -5.05454240e+00,  5.42332584e+00, -8.42011760e+00,  5.09449510e+00,
       -1.69607206e+00, -2.25535080e+00, -1.18245267e+00, -6.55411655e+00,
       -4.44424257e+00,  7.92568749e+00, -4.03099996e+00, -3.59425263e+00,
       -3.16242792e-02,  7.58745412e-01,  3.59599853e-01,  1.72537591e+00,
       -1.07468338e+00,  8.60886468e+00, -4.12876660e+00, -4.00765638e-01,
        6.55324621e+00, -7.58209877e+00, -4.13918052e+00, -8.27819963e-01,
       -4.91430500e+00,  5.49845310e+00, -9.06442783e+00,  5.51283659e+00])

In [5]:
# reading csv file  
df = pd.read_csv("Train_folder/train.csv") 
df.head()

Unnamed: 0,ID,Class
0,0,siren
1,1,street_music
2,2,drilling
3,3,siren
4,4,dog_bark


In [6]:
lista = df['Class'].unique().tolist()

In [7]:
wavcatalog = [i for i in enumerate(lista)]

In [8]:
wavcatalog

[(0, 'siren'),
 (1, 'street_music'),
 (2, 'drilling'),
 (3, 'dog_bark'),
 (4, 'children_playing'),
 (5, 'gun_shot'),
 (6, 'engine_idling'),
 (7, 'air_conditioner'),
 (8, 'jackhammer'),
 (9, 'car_horn')]

In [9]:
#get drilling
wavcatalog[2][1]
#get tuple (2, drilling)
wavcatalog[2]

(2, 'drilling')

In [10]:
features, labels = np.empty((0,193)), np.empty(0)
for i in range(df.shape[0]):
   number = df.iloc[i,0]
   label = df.iloc[i,1]
   filename = f'{number}.wav'
   lbl = f'{label}'
   try:
       mfccs, chroma, mel, contrast,tonnetz = extract_feature(f'Train_folder/train/{filename}')
   except:
       print(f"File {filename} didn't work")
       continue
   ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
   features = np.vstack([features,ext_features])
   if label == wavcatalog[0][1]:
        labels = np.append(labels, wavcatalog[0][0])
   elif label == wavcatalog[1][1]:
        labels = np.append(labels, wavcatalog[1][0])
   elif label == wavcatalog[2][1]:
        labels = np.append(labels, wavcatalog[2][0])
   elif label == wavcatalog[3][1]:
        labels = np.append(labels, wavcatalog[3][0])
   elif label == wavcatalog[4][1]:
        labels = np.append(labels, wavcatalog[4][0])
   elif label == wavcatalog[5][1]:
        labels = np.append(labels, wavcatalog[5][0])
   elif label == wavcatalog[6][1]:
        labels = np.append(labels, wavcatalog[6][0])
   elif label == wavcatalog[7][1]:
        labels = np.append(labels, wavcatalog[7][0])
   elif label == wavcatalog[8][1]:
        labels = np.append(labels, wavcatalog[8][0])
   elif label == wavcatalog[9][1]:
        labels = np.append(labels, wavcatalog[9][0])
  
   #labels = np.append(labels, fn.split('/')[2].split('-')[1])
    #return np.array(features), np.array(labels, dtype = np.int)
   
   #print(chroma)
   #print(filename)
   #print(lbl)
#     print(df.iloc[i,0])

  if np.any(X < 0) or np.any(X_ref < 0):
  Z = np.maximum(X, X_ref).astype(dtype)
  bad_idx = (Z < np.finfo(dtype).tiny)


File 300.wav didn't work
File 1488.wav didn't work
File 2458.wav didn't work


In [11]:
labels

array([0., 1., 2., ..., 6., 6., 7.])

In [12]:
features

array([[-7.50957191e+01,  1.29611846e+02, -3.26698975e+01, ...,
         1.57509179e-03, -5.37471939e-03,  5.26261495e-03],
       [-8.35056669e+00,  1.13704109e+02, -1.91475123e+01, ...,
        -5.36374958e-03,  2.90953126e-03, -1.54599678e-03],
       [-1.15225264e+02,  3.67671473e+00, -3.87245168e+01, ...,
         2.73520672e-02, -9.37955141e-03, -4.78768348e-03],
       ...,
       [-3.12261079e+02,  4.62412806e+01,  5.05164659e+00, ...,
         1.62614482e-01, -3.83779053e-02, -1.89554327e-02],
       [-2.71249094e+02,  1.33006300e+02, -2.83203666e+01, ...,
         2.02666673e-01, -1.93794808e-02, -5.00848620e-02],
       [-2.91026597e+02,  2.33391865e+02, -4.70676553e+00, ...,
         5.15215980e-02,  1.03867911e-02, -1.45272843e-02]])

In [13]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples = scaler.fit_transform(features)

In [14]:
scaled_train_samples

array([[0.71283036, 0.59447857, 0.56851254, ..., 0.40620691, 0.46932733,
        0.27467782],
       [0.78216945, 0.54631151, 0.62625267, ..., 0.39764163, 0.52111423,
        0.25072002],
       [0.67114125, 0.21315939, 0.54265952, ..., 0.43802591, 0.44429214,
        0.23931333],
       ...,
       [0.46644797, 0.34204071, 0.72958225, ..., 0.60499331, 0.26301626,
        0.18946056],
       [0.50905382, 0.60475664, 0.58708489, ..., 0.65443357, 0.38178011,
        0.07992387],
       [0.4885077 , 0.90871424, 0.68791416, ..., 0.46786068, 0.56785643,
        0.20504211]])

In [16]:
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.core import Dense
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

Using TensorFlow backend.


In [23]:
#Create Layers
model = Sequential([
    Dense(16, input_shape=(193,), activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 16)                3104      
_________________________________________________________________
dense_8 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_9 (Dense)              (None, 10)                330       
Total params: 3,978
Trainable params: 3,978
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.compile(Adam(lr=.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [26]:
model.fit(scaled_train_samples, labels, validation_split=0.1, batch_size=10, epochs=20, shuffle=True, verbose=2)

Instructions for updating:
Use tf.cast instead.
Train on 4888 samples, validate on 544 samples
Epoch 1/20
 - 2s - loss: 2.2925 - acc: 0.1066 - val_loss: 2.2689 - val_acc: 0.1250
Epoch 2/20
 - 1s - loss: 2.2556 - acc: 0.1418 - val_loss: 2.2389 - val_acc: 0.1507
Epoch 3/20
 - 1s - loss: 2.2198 - acc: 0.2013 - val_loss: 2.2030 - val_acc: 0.2022
Epoch 4/20
 - 1s - loss: 2.1825 - acc: 0.2306 - val_loss: 2.1647 - val_acc: 0.2408
Epoch 5/20
 - 1s - loss: 2.1410 - acc: 0.2477 - val_loss: 2.1235 - val_acc: 0.2904
Epoch 6/20
 - 1s - loss: 2.0951 - acc: 0.2680 - val_loss: 2.0752 - val_acc: 0.3125
Epoch 7/20
 - 1s - loss: 2.0442 - acc: 0.2968 - val_loss: 2.0215 - val_acc: 0.3456
Epoch 8/20
 - 1s - loss: 1.9864 - acc: 0.3343 - val_loss: 1.9672 - val_acc: 0.3382
Epoch 9/20
 - 1s - loss: 1.9360 - acc: 0.3566 - val_loss: 1.9211 - val_acc: 0.3529
Epoch 10/20
 - 1s - loss: 1.8952 - acc: 0.3791 - val_loss: 1.8873 - val_acc: 0.3493
Epoch 11/20
 - 1s - loss: 1.8608 - acc: 0.3832 - val_loss: 1.8544 - val_ac

<keras.callbacks.History at 0x112714e3d68>

In [None]:
#Validation loss should go down and validation accuracy goes up (which should be close to 1)
#Model has been trained to a .46 percent accuracy rate (that the data is categorized correctly ) 
#Increasing the Epoch to get closer to 1