In [589]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from skimage.restoration import denoise_wavelet
import sounddevice as sd

# data extracting and processing

In [590]:
data, sample_rate = librosa.load(walley_sample)
print(sample_rate)

22050


In [706]:

all_data = []

data_path_dict = {
    0: ["1/" + file_path for file_path in os.listdir("1/")],
    1: ["2/" + file_path for file_path in os.listdir("2/")],
    2: ["3/" + file_path for file_path in os.listdir("3/")],
    3: ["4/" + file_path for file_path in os.listdir("4/")],
    4: ["5/" + file_path for file_path in os.listdir("5/")],
    5: ["6/" + file_path for file_path in os.listdir("6/")],
    6: ["7/" + file_path for file_path in os.listdir("7/")],
    7: ["8/" + file_path for file_path in os.listdir("8/")],
    8: ["9/" + file_path for file_path in os.listdir("9/")],
    9: ["10/" + file_path for file_path in os.listdir("10/")],
    10: ["11/" + file_path for file_path in os.listdir("11/")],
    11: ["12/" + file_path for file_path in os.listdir("12/")],
    12: ["13/" + file_path for file_path in os.listdir("13/")],
    13: ["14/" + file_path for file_path in os.listdir("14/")],
    14: ["15/" + file_path for file_path in os.listdir("15/")],
    15: ["16/" + file_path for file_path in os.listdir("16/")],
    16: ["17/" + file_path for file_path in os.listdir("17/")],
    17: ["18/" + file_path for file_path in os.listdir("18/")],
    18: ["19/" + file_path for file_path in os.listdir("19/")],
    19: ["20/" + file_path for file_path in os.listdir("20/")]
    
}


for class_label, list_of_files in data_path_dict.items():
    for single_file in list_of_files:
        audio, sample_rate = librosa.load(single_file) 
        audio = denoise_wavelet(audio, method = 'BayesShrink', mode = 'soft', wavelet_levels = 10, wavelet = 'db8', rescale_sigma='True')
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) 
        mfcc_processed = np.mean(mfcc.T, axis=0) 
        all_data.append([mfcc_processed, class_label])
    print(f"Info: Succesfully Preprocessed Class Label {class_label}")

df = pd.DataFrame(all_data, columns=["feature", "class_label"])

print(audio.shape)
print(mfcc_processed.shape)

Info: Succesfully Preprocessed Class Label 0
Info: Succesfully Preprocessed Class Label 1
Info: Succesfully Preprocessed Class Label 2
Info: Succesfully Preprocessed Class Label 3
Info: Succesfully Preprocessed Class Label 4
Info: Succesfully Preprocessed Class Label 5
Info: Succesfully Preprocessed Class Label 6
Info: Succesfully Preprocessed Class Label 7
Info: Succesfully Preprocessed Class Label 8
Info: Succesfully Preprocessed Class Label 9
Info: Succesfully Preprocessed Class Label 10
Info: Succesfully Preprocessed Class Label 11
Info: Succesfully Preprocessed Class Label 12
Info: Succesfully Preprocessed Class Label 13
Info: Succesfully Preprocessed Class Label 14
Info: Succesfully Preprocessed Class Label 15
Info: Succesfully Preprocessed Class Label 16
Info: Succesfully Preprocessed Class Label 17
Info: Succesfully Preprocessed Class Label 18
Info: Succesfully Preprocessed Class Label 19
(44100,)
(40,)


In [694]:
filename = 'data1'
outfile = open(filename,'wb')
pickle.dump(df,outfile)
outfile.close()

# training model

In [695]:
infile = open('data1','rb')
df = pickle.load(infile)
infile.close()


In [707]:
X = df["feature"].values
X = np.concatenate(X, axis=0).reshape(len(X), 40)
y = np.array(df["class_label"].tolist())


In [708]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.2)

In [709]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA as RandomizedPCA
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix

In [710]:
model = SVC(C=1, kernel='linear', gamma='auto')

model.fit(xtrain, ytrain)



SVC(C=1, gamma='auto', kernel='linear')

In [711]:
y_pred = model.predict(xtest)

from sklearn import metrics
from sklearn.metrics import confusion_matrix
print("accuracy:", metrics.accuracy_score(y_true=ytest, y_pred=y_pred), "\n")

print(metrics.confusion_matrix(y_true=ytest, y_pred=y_pred))

accuracy: 0.97 

[[2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 8 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 5 0 0 1 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4]]


In [701]:

filename = 'model 1'
outfile = open(filename,'wb')
pickle.dump(model,outfile)
outfile.close()

# prediction

In [702]:
infile = open('model 1','rb')
model = pickle.load(infile)
infile.close()


In [712]:
cc = np.array(['اهلا' , 'مرحبا' , 'سلام', 'رمضان' , 'سكر' , 'برتقال' , 'تفاح' , 'عنب' , 'ليمون' , 'أسد' , 'نمر' , 'دب' , 'فهد' , 'اصفر' , 'غزال' , 'قرد' , 'احمر' , 'يد' , 'قدم' , 'راس' ])


In [713]:
def record_audio(n_times=50):
    

    input("To start recording Wake Word press Enter: ")
    for i in range(n_times):
        fs = 44100
        seconds = 2

        myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
        sd.wait()
        write('vv/' + str(1) + ".wav", fs, myrecording)
        
        
        audio, sample_rate = librosa.load("vv/1.wav")
        audio = denoise_wavelet(audio, method = 'BayesShrink', mode = 'soft', wavelet_levels = 10, wavelet = 'db8', rescale_sigma='True')
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) 
        mfcc_processed = np.mean(mfcc.T, axis=0).reshape(1,40)
       
        
        result=model.predict(mfcc_processed)
      
        print(cc[result])
        input(f"Press to record next or two stop press ctrl + C ({i + 1}/{n_times}): ")
        


In [None]:
record_audio(n_times=25)

To start recording Wake Word press Enter: 
['اصفر']
Press to record next or two stop press ctrl + C (1/25): 
['اصفر']
Press to record next or two stop press ctrl + C (2/25): 
['سلام']
Press to record next or two stop press ctrl + C (3/25): 
['اصفر']
Press to record next or two stop press ctrl + C (4/25): 
['تفاح']
Press to record next or two stop press ctrl + C (5/25): 
['أسد']
Press to record next or two stop press ctrl + C (6/25): 
['سكر']
Press to record next or two stop press ctrl + C (7/25): 
['اصفر']
Press to record next or two stop press ctrl + C (8/25): 
['مرحبا']
Press to record next or two stop press ctrl + C (9/25): 
['سكر']
Press to record next or two stop press ctrl + C (10/25): 
['تفاح']
Press to record next or two stop press ctrl + C (11/25): 
['سكر']
Press to record next or two stop press ctrl + C (12/25): 
['تفاح']
Press to record next or two stop press ctrl + C (13/25): 
['أسد']
Press to record next or two stop press ctrl + C (14/25): 
['عنب']
Press to record next or 