# Download dataset

/bin/bash: get: command not found


In [None]:
import librosa 
import librosa.display
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd
import numba 
import sklearn 
import os
from scipy.io import wavfile
import warnings
warnings.filterwarnings('ignore')

from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, Flatten, Conv1D, Input, MaxPool1D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import backend as K

K.clear_session()

In [None]:
train_audio_path = 'data/train'
samples, sr = librosa.load(train_audio_path+'yes/0a7c2a8d_nohash_0.wav', sr=16000)
ipd.Audio(samples, rate=sr)

In [None]:
labels = os.listdir(train_audio_path)
labels

In [None]:
# find count of each label and plot 
labels = os.listdir(train_audio_path)
no_of_recordings=[]
for label in labels: 
  waves = [f for f in os.listdir(train_audio_path + label) if f.endswith('.wav')]
  no_of_recordings.append(len(waves))


# plot 
plt.figure(figsize=(30,5))
index = np.arange(len(labels))
plt.bar(index, no_of_recordings)
plt.xlabel('Commands', fontsize=12)
plt.ylabel('No. of recordings', fontsize=12)
plt.xticks(index, labels, fontsize=15, rotation=60)
plt.title('No. of recordings for each command')
plt.show()

labels=['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go']

In [None]:
duration_of_recordings=[]
for label in labels: 
  waves = [f for f in os.listdir(train_audio_path + '/' + label) if f.endswith('.wav')]
  for wav in waves:
    sample_rate, samples = wavfile.read(train_audio_path + '/' + label + '/' + wav)
    duration_of_recordings.append(float(len(samples)/sample_rate))

plt.hist(np.array(duration_of_recordings))

In [None]:
all_wave = []
all_label = []
for label in labels:
  print(label)
  waves = [f for f in os.listdir(train_audio_path + '/' + label) if f.endswith('.wav')]
  for wav in waves:
    samples, sample_rate = librosa.load(train_audio_path + '/' + label + '/' + wav, sr = 16000)
    samples = librosa.resample(samples, sample_rate, 8000)
    if(len(samples) == 8000):
      all_wave.append(samples)
      all_label.append(label)

In [None]:
# convert the output labels to integer encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(all_label)
classes = list(le.classes_)

In [None]:
# now, convert the integer encoded labels to a one-hot vector since it is a multi-classification problem.

from keras.utils import np_utils
y = np_utils.to_categorical(y, num_classes=len(labels)) 

In [None]:
# reshape the 2D array to 3D since the input to the conv1d must be a 3D array.
all_waves = np.array(all_wave).reshape(-1,8000,1)

In [None]:
from sklearn.model_selection import train_test_split
x_tr, x_val, y_tr, y_val = train_test_split(np.array(all_wave), np.array(y), stratify = y, test_size = 0.2, random_

In [None]:
from tensorflow.python.keras.layers.pooling import MaxPooling1D
# model 


inputs = Input(shape=(8000,1))

# first Conv1D layer 
conv = Conv1D(8, 13, padding='valid', activation='relu', strides=1)(inputs)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)

# second layer
conv = Conv1D(16, 11, padding='valid', activation='relu', strides=1)(conv)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)

# third layer
conv = Conv1D(32, 9, padding='valid', activation='relu', strides=1)(conv)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)

# fourth layer 
conv = Conv1D(64, 7, padding='valid', activation='relu', strides=1)(conv)
conv = MaxPooling1D(3)(conv)
conv = Dropout(0.3)(conv)

# flatten layer 
conv = Flatten()(conv)

# Dense Layer 
conv = Dense(256, activation='relu')(conv)
conv = Dropout(0.03)(conv)

# Dense layer 2
conv = Dense(128, activation='relu')(conv)
conv = Dropout(0.03)(conv)

outputs = Dense(len(labels), activation='softmax')(conv)

model = Model(inputs, outputs)
