In [183]:
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
import os
import pandas as pd
import librosa
import speech_recognition as sr
import numpy as np
import scipy
from sklearn import metrics
from functions import mfcc

In [184]:
filelist = os.listdir('voices//ourvoices//AbdelrhmanSaid')
#filelist += os.listdir('voices//ourvoices//Nasser')
filelist += os.listdir('voices//ourvoices/Esraa')
filelist+= os.listdir('voices//ourvoices/MaryamMegahed')
df_our = pd.DataFrame(filelist)
df_our['label']='open'
df_our = df_our.rename(columns={0:'file'})
df_our

Unnamed: 0,file,label
0,2022-12-08T11_08_28.068Z.wav,open
1,2022-12-08T11_08_34.463Z.wav,open
2,2022-12-08T11_08_39.459Z.wav,open
3,2022-12-08T11_08_45.143Z.wav,open
4,2022-12-08T11_08_50.643Z.wav,open
...,...,...
102,2022-12-11T11_13_24.552Z.wav,open
103,2022-12-11T11_13_28.153Z.wav,open
104,2022-12-11T11_13_32.044Z.wav,open
105,2022-12-11T11_13_38.358Z.wav,open


In [185]:
filelist = os.listdir('voices/otherWords/wrongWords')
#filelist += os.listdir('voices/otherWords/Nasser')
df_other = pd.DataFrame(filelist)
df_other['label']='other'
df_other = df_other.rename(columns={0:'file'})
df_other

Unnamed: 0,file,label
0,2022-12-11T10_29_15.061Z.wav,other
1,2022-12-11T10_29_25.539Z.wav,other
2,2022-12-11T10_29_28.794Z.wav,other
3,2022-12-11T10_29_31.875Z.wav,other
4,2022-12-11T10_29_34.715Z.wav,other
...,...,...
118,2022-12-11T10_49_16.645Z.wav,other
119,2022-12-11T10_49_20.057Z.wav,other
120,2022-12-11T10_49_23.220Z.wav,other
121,2022-12-11T10_49_27.359Z.wav,other


In [186]:
df = pd.concat([df_our, df_other], ignore_index=True)
df

Unnamed: 0,file,label
0,2022-12-08T11_08_28.068Z.wav,open
1,2022-12-08T11_08_34.463Z.wav,open
2,2022-12-08T11_08_39.459Z.wav,open
3,2022-12-08T11_08_45.143Z.wav,open
4,2022-12-08T11_08_50.643Z.wav,open
...,...,...
225,2022-12-11T10_49_16.645Z.wav,other
226,2022-12-11T10_49_20.057Z.wav,other
227,2022-12-11T10_49_23.220Z.wav,other
228,2022-12-11T10_49_27.359Z.wav,other


In [187]:
def extract_features(files, name="allwords"):

    # Sets the name to be the path to where the file is in my computer
    file_name = os.path.join(os.path.abspath(
        'voices/{}').format(name)+'/'+str(files['file']))

    # Loads the audio file as a floating point time series and assigns the default sample rate
    # Sample rate is set to 22050 by default
   
    X, sample_rate = librosa.load(file_name)
    #except:
     #   print(files)
    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
    mfccs = np.mean(mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)

    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

    # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(
        S=stft, sr=sample_rate).T, axis=0)
    zcr = np.mean(librosa.feature.zero_crossing_rate(X).T,axis=0)
    rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
    rmse = np.mean(librosa.feature.rms(y=X).T,axis=0) 
    spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0) 
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0) 
    
    label = files.label

    return mfccs, chroma, zcr, rmse, spec_bw, spec_cent,rolloff,label


def feat(features_label):
    features = []
    for i in range(0, len(features_label)):
        features.append(np.concatenate((features_label[i][0], features_label[i][1],
                                        features_label[i][2], features_label[i][3],
                                        features_label[i][4],features_label[i][5],features_label[i][6]), axis=0))
    return features

In [188]:
features_label = df.apply(extract_features, axis=1)
features=feat(features_label)

  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0)
  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an erro

In [189]:
X_train, X_test, y_train, y_test = train_test_split(features,df['label'], test_size=0.3, random_state=1)

In [190]:
clf = DecisionTreeClassifier(criterion="entropy", max_depth=6)
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

In [191]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7971014492753623


In [192]:
filelist = os.listdir('voices//webtest//') 

#read them into pandas
df_test = pd.DataFrame(filelist)
df_test['label']=0
df_test = df_test.rename(columns={0:'file'})

In [193]:
features_label2 = df_test.apply(extract_features,name="allvoices", axis=1)
features=feat(features_label2)
y_pred = clf.predict(features)
y_pred

  rolloff = np.mean(librosa.feature.spectral_rolloff(X, sr=sample_rate).T,axis=0)
  spec_cent = np.mean(librosa.feature.spectral_centroid(X, sr=sample_rate).T,axis=0)
  spec_bw = np.mean(librosa.feature.spectral_bandwidth(X, sr=sample_rate).T,axis=0)


array(['other'], dtype=object)

In [194]:
import pickle
model_filename = 'SpeechUp.pkl'
pickle.dump(clf, open(model_filename,'wb'))