## Import Package.

     -Required Package


In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
import librosa
import os
from keras.utils import to_categorical
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
import keras
from sklearn.metrics import classification_report
from keras.utils import to_categorical


Using TensorFlow backend.


In [2]:
def get_data():
    pad2d = lambda a, i: a[:, 0: i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0],i - a.shape[1]))))
    labels = []
    mfccs = []

    for f in os.listdir('recordings/'):
        if f.endswith('.wav'):
            wav, sr = librosa.load('recordings/' + f)
            mfcc = librosa.feature.mfcc(wav)
            padded_mfcc = pad2d(mfcc,40)
            mfccs.append(padded_mfcc)
            label = f.split('_')[0]
            labels.append(label)
    return np.array(mfccs), to_categorical(labels)


In [3]:
def get_cnn_model(input_shape, num_classes):
    model = Sequential()

    model.add(Conv2D(64, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())

    model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
    model.add(BatchNormalization())
    
    model.add(Conv2D(84, kernel_size=(2, 2), activation='relu'))
    model.add(BatchNormalization())

    
    model.add(Conv2D(164, kernel_size=(2, 2), activation='relu'))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    
    model.add(Dense(48, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.20))
    
    
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])

    return model


In [4]:
def get_all():
    mfccs, labels = get_data()

    dim_1 = mfccs.shape[1]
    dim_2 = mfccs.shape[2]
    channels = 1
    classes = 10

    X = mfccs
    X = X.reshape((mfccs.shape[0], dim_1, dim_2, channels))
    y = labels

    input_shape = (dim_1, dim_2, channels)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

    model = get_cnn_model(input_shape, classes)

    return X_train, X_test, y_train, y_test,model

In [5]:
X_train, X_test, y_train, y_test,cnn_model = get_all()


In [6]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1549, 20, 40, 1) (173, 20, 40, 1) (1549, 10) (173, 10)


In [7]:
cnn_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 19, 39, 64)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 19, 39, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 38, 48)        12336     
_________________________________________________________________
batch_normalization_2 (Batch (None, 18, 38, 48)        192       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 17, 37, 84)        16212     
_________________________________________________________________
batch_normalization_3 (Batch (None, 17, 37, 84)        336       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 16, 36, 164)      

In [8]:
keras_callback = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=1,
                                             write_graph=True, write_images=True)

cnn_model.fit(X_train, y_train, batch_size=64, epochs=50, verbose=1, validation_split=0.1, callbacks=[keras_callback])

cnn_model.save('trained_model_new.h5')



Train on 1394 samples, validate on 155 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [9]:
def check_preds(X, y):
    trained_model = keras.models.load_model('trained_model_new.h5')
    predictions = trained_model.predict_classes(X)

    print(classification_report(y, to_categorical(predictions)))

In [10]:
check_preds(X_test, y_test)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.86      0.95      0.90        19
           2       0.95      0.90      0.92        20
           3       1.00      1.00      1.00        19
           4       0.95      0.95      0.95        21
           5       0.88      0.93      0.90        15
           6       1.00      0.82      0.90        11
           7       1.00      0.93      0.96        14
           8       0.94      0.89      0.91        18
           9       0.80      0.92      0.86        13

   micro avg       0.94      0.94      0.94       173
   macro avg       0.94      0.93      0.93       173
weighted avg       0.94      0.94      0.94       173
 samples avg       0.94      0.94      0.94       173



In [14]:
from sklearn.metrics import accuracy_score
import numpy as np
y_pred = (trained_model.predict_classes(X_test))
accuracy_score(np.argmax(y_test,axis=1), y_pred)
# #print(np.argmax(y_test,axis=1),y_pred.shape)
# np.argmax(y_test,axis=1).shape

0.9364161849710982

In [13]:
trained_model = keras.models.load_model('trained_model_new.h5')

# Record sound for prediction

In [None]:
from tkinter import *
import sounddevice as sd
import sounddevice as sd
from scipy.io.wavfile import write
import os, sys, subprocess
import numpy as np
import json
import tkinter as tk
import librosa
import noisereduce as nr
from tkinter import messagebox
import tkinter.font as font

root = Tk()

root.geometry("720x600")
root.configure(background='white')
root.title("Testing")

myFont = font.Font(family='sans-serif')

def record():
    global data
    fs = 44100  
    seconds = .75
    myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
    sd.wait()
    data =  myrecording

def play():
    global update,fr
    write('/home/incentive/Videos/Group Project/Digit Recognition/test record/' +  'temp'+'.wav', 44100, data)
    audio_data, sampling_rate = librosa.load('/home/incentive/Videos/Group Project/Digit Recognition/test record/'+'temp.wav')
    noisy_part = audio_data[10000:15000]  
    reduced_noise = nr.reduce_noise(audio_clip=audio_data, noise_clip=noisy_part, verbose=False)
    update = reduced_noise
    fr=sampling_rate
    sd.play(reduced_noise,sampling_rate)
    sd.wait(6)
    
def popupmsg(msg):
    popup = tk.Toplevel(root)
    popup.configure(background='white')
    popup.geometry("300x300")
    popup.wm_title("Prediction")
    popup.tkraise(root) # This just tells the message to be on top of the root window.
    tk.Label(popup, text=msg,   bg='white',fg = "Red", font = "arial 50 bold italic").pack(side="top", fill="x", pady=50)
    tk.Button(popup, text="Thank You!", bg="cyan",height=2,width=10,
              font = "Helvetica 30 bold italic",command = popup.destroy).pack()
    
def check():
    pad2d = lambda a, i: a[:, 0: i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0],i - a.shape[1]))))
    mfcc = librosa.feature.mfcc(update)
    padded_mfcc = pad2d(mfcc,40)
    data = padded_mfcc.reshape(1,padded_mfcc.shape[0],padded_mfcc.shape[1],1) 
    predictions = trained_model.predict_classes(data)
    popupmsg(str(predictions.item()))
    


label_dict = json.load(open("label.txt"))

tk.Label(root, 
		 text="Testing Digit Recognizer",
		 fg = "Red",
         bg='white',
		 font = "arial 30 bold italic").pack(pady=20)

record_button = Button(root, text="Start record",bg="blue", command=record,height=3,width=20,font = "Helvetica 30 bold italic").pack(pady=10)

play_button = Button(root, text="Play",bg="red", command=play,height=3,width=20,font = "Helvetica 30 bold italic").pack(pady=10)
check_button = Button(root, text="Predict",bg="green", command=check,height=3,width=20,font = "Helvetica 30 bold italic").pack(pady=10)

root.mainloop() 

In [None]:
# help(cnn_model)