In [342]:
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
import os
import pandas as pd
import librosa
import speech_recognition as sr
import numpy as np
import scipy
from sklearn import metrics
from functions import mfcc

In [343]:
filelist = os.listdir('voices//ourvoices//AbdelrhmanSaid')
filelist += os.listdir('voices//ourvoices//Esraa')
filelist += os.listdir('voices/others/Kamel')
df_our = pd.DataFrame(filelist)
df_our['label']='open'
df_our = df_our.rename(columns={0:'file'})
df_our

Unnamed: 0,file,label
0,2022-12-08T11_08_28.068Z.wav,open
1,2022-12-08T11_08_34.463Z.wav,open
2,2022-12-08T11_08_39.459Z.wav,open
3,2022-12-08T11_08_45.143Z.wav,open
4,2022-12-08T11_08_50.643Z.wav,open
...,...,...
88,kamelvoices (5).wav,open
89,kamelvoices (6).wav,open
90,kamelvoices (7).wav,open
91,kamelvoices (8).wav,open


In [344]:
filelist = os.listdir('voices/otherWords/Sama')
filelist += os.listdir('voices/otherWords/Shaban')
filelist += os.listdir('voices/otherWords/DinaHussam')
filelist += os.listdir('voices/otherWords/Romaisaa')
filelist += os.listdir('voices/otherWords/Kamel')
filelist += os.listdir('voices/otherWords/Nasser')
df_other = pd.DataFrame(filelist)
df_other['label']='other'
df_other = df_other.rename(columns={0:'file'})
df_other

Unnamed: 0,file,label
0,samaclose (10).wav,other
1,samaclose (11).wav,other
2,samaclose (12).wav,other
3,samaclose (13).wav,other
4,samaclose (14).wav,other
...,...,...
107,2022-12-09T19_33_18.995Z.wav,other
108,2022-12-09T19_33_22.111Z.wav,other
109,2022-12-09T19_33_24.856Z.wav,other
110,2022-12-09T19_33_27.556Z.wav,other


In [345]:
df = pd.concat([df_our, df_other], ignore_index=True)
df

Unnamed: 0,file,label
0,2022-12-08T11_08_28.068Z.wav,open
1,2022-12-08T11_08_34.463Z.wav,open
2,2022-12-08T11_08_39.459Z.wav,open
3,2022-12-08T11_08_45.143Z.wav,open
4,2022-12-08T11_08_50.643Z.wav,open
...,...,...
200,2022-12-09T19_33_18.995Z.wav,other
201,2022-12-09T19_33_22.111Z.wav,other
202,2022-12-09T19_33_24.856Z.wav,other
203,2022-12-09T19_33_27.556Z.wav,other


In [346]:
def extract_features(files, name="allwords"):

    # Sets the name to be the path to where the file is in my computer
    file_name = os.path.join(os.path.abspath(
        'voices/{}').format(name)+'/'+str(files['file']))

    # Loads the audio file as a floating point time series and assigns the default sample rate
    # Sample rate is set to 22050 by default
    try:
        X, sample_rate = librosa.load(file_name)
    except:
        print(files)
    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
    mfccs = np.mean(mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)

    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

    # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(
        S=stft, sr=sample_rate).T, axis=0)
    zcr = np.mean(librosa.feature.zero_crossing_rate(X).T,axis=0)
    rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
    rmse = np.mean(librosa.feature.rms(y=X).T,axis=0) 
    spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0) 
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0) 
    
    label = files.label

    return mfccs, chroma, zcr, rmse, spec_bw, spec_cent,rolloff,label


def feat(features_label):
    features = []
    for i in range(0, len(features_label)):
        features.append(np.concatenate((features_label[i][0], features_label[i][1],
                                        features_label[i][2], features_label[i][3],
                                        features_label[i][4],features_label[i][5],features_label[i][6]), axis=0))
    return features

In [347]:
features_label = df.apply(extract_features, axis=1)
features=feat(features_label)

  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0)
  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an erro

In [348]:
X_train, X_test, y_train, y_test = train_test_split(features,df['label'], test_size=0.3, random_state=1)

In [349]:
clf = DecisionTreeClassifier(criterion="entropy", max_depth=5)
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

In [350]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9193548387096774


In [413]:
filelist = os.listdir('voices//webtest//') 

#read them into pandas
df_test = pd.DataFrame(filelist)
df_test['label']=0
df_test = df_test.rename(columns={0:'file'})

In [414]:
features_label2 = df_test.apply(extract_features,name="allvoices", axis=1)
features=feat(features_label2)
y_pred = clf.predict(features)
y_pred

  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0)


array(['open'], dtype=object)

In [415]:
import pickle
model_filename = 'SpeechUp.pkl'
pickle.dump(clf, open(model_filename,'wb'))