In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [2]:
def extract_mfccs(file_path):
    y, sr = librosa.load(file_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    mfccs = mfccs.T
    delta_mfccs = librosa.feature.delta(mfccs)
    delta_mfccs = delta_mfccs
    
    #(216) rows = frames ,  and (26) columns = features
    features = np.hstack([mfccs, delta_mfccs])
    feature_names = [f"MFCC_{i+1}" for i in range(features.shape[1])]
    return pd.DataFrame(features, columns=feature_names)


In [3]:
def extract_pitch(file_path):
    y, sr = librosa.load(file_path)
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches[magnitudes > np.max(magnitudes) * 0.85])
    return pd.DataFrame({"Pitch": [pitch]})

In [4]:
def extract_chroma(file_path):
    y, sr = librosa.load(file_path)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma = chroma.T
    feature_names = [f"Chroma_{i+1}" for i in range(chroma.shape[1])]
    return pd.DataFrame(chroma, columns=feature_names)

In [5]:
def extract_zero_crossings(file_path):
    y, sr = librosa.load(file_path)
    zero_crossings = librosa.feature.zero_crossing_rate(y)
    zero_crossings = zero_crossings.T
    feature_names = [f"ZeroCrossings_{i+1}" for i in range(zero_crossings.shape[1])]
    return pd.DataFrame(zero_crossings, columns=feature_names)

In [6]:
def extract_spectral_contrast(file_path):
    y, sr = librosa.load(file_path)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_contrast = spectral_contrast.T 
    feature_names = [f"SpectralContrast_{i+1}" for i in range(spectral_contrast.shape[1])]
    return pd.DataFrame(spectral_contrast, columns=feature_names)

In [7]:
def process_user_folder(folder_path):
    mfccs_list = []
    pitch_list = []
    chroma_list = []
    zero_crossings_list = []
    spectral_contrast_list = []
    flag = 1

    # Iterate over all .wav files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".wav"):
            file_path = os.path.join(folder_path, file_name)
            if flag:
                mfccs_list = extract_mfccs(file_path)
                pitch = extract_pitch 
                pitch_list.append(pitch)
                chroma_list = extract_chroma(file_path)
                zero_crossings_list = extract_zero_crossings(file_path)
                spectral_contrast_list = extract_spectral_contrast(file_path)

                flag = 0
            else:
                # Extract features for each file
                mfccs = extract_mfccs(file_path)
                pitch = extract_pitch(file_path)
                chroma = extract_chroma(file_path)
                zero_crossings = extract_zero_crossings(file_path)
                spectral_contrast = extract_spectral_contrast(file_path)
                # Append features to the lists
                mfccs_list = pd.concat([mfccs,mfccs_list]) 
                chroma_list = pd.concat([chroma, chroma_list])
                zero_crossings_list = pd.concat([zero_crossings, zero_crossings_list])
                spectral_contrast_list = pd.concat([spectral_contrast, spectral_contrast_list])
                pitch_list.append(pitch)


    # Create feature matrix with separate columns for each feature type
    #Data set without the pitch
    compined_features = pd.concat([mfccs_list, chroma_list, zero_crossings_list, spectral_contrast_list], axis=1)

    return compined_features

In [8]:
# Specify the path of the main folder
main_folder_path = "voices"

# Get a list of all subfolders inside the main folder
subfolders = [f.path for f in os.scandir(main_folder_path) if f.is_dir()]
# create a list of two train datas one for voice recognition (ahmed_ali, bedro, hassan, muhannad) and the other for speech recognition (grant_me_access,unlock_the_gate,open_middle_door)
data_voice_recognition_features = {
    "ahmed_ali": [],
    "bedro": [],
    "hassan": [],
    "muhannad": []

}
data_speech_recognition_features = {
    "grant_me_access": [],
    "open_middle_door": [],
    "unlock_the_gate": []
}

# Loop over each subfolder
for subfolder_path , i in zip(subfolders , range(len(subfolders))) :
    # Extract the name of the subfolder
    subfolder_name = os.path.basename(subfolder_path)
    print(subfolder_name)
    
    # Create a variable with the features of the subfolder
    features = process_user_folder(subfolder_path)

    # Assign the variable name based on the subfolder name
    globals()[subfolder_name] = features
    if subfolder_name == "ahmed_ali" or subfolder_name == "bedro" or subfolder_name == "hassan" or subfolder_name == "muhannad" or subfolder_name == "abdulla_ahmed" or subfolder_name == "ashf" or subfolder_name == "atef" or subfolder_name == "hazem_rafaat":
        data_voice_recognition_features[subfolder_name] = features
    else:
        data_speech_recognition_features[subfolder_name] = features

abdulla_ahmed
ahmed_ali
ashf
atef
bedro
grant_me_access
hassan
hazem_rafaat
muhannad
open_middle_door
unlock_the_gate
Voices


In [9]:
# do the same for voice recognition
# Combine all the extracted features into a single DataFrame
all_features = pd.concat([data_voice_recognition_features["ahmed_ali"], data_voice_recognition_features["bedro"], data_voice_recognition_features["hassan"], data_voice_recognition_features["muhannad"]])
# Create labels for the features based on the subfolder names
labels = ["ahmed_ali"] * data_voice_recognition_features["ahmed_ali"].shape[0] + ["bedro"] * data_voice_recognition_features["bedro"].shape[0] + ["hassan"] * data_voice_recognition_features["hassan"].shape[0] + ["muhannad"] * data_voice_recognition_features["muhannad"].shape[0] 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, labels, test_size=0.2, random_state=42)


In [10]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=300, random_state=15, min_samples_split=2, min_samples_leaf=1, max_depth=50)


# Train the model
rf_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test_scaled)

# Evaluate the Random Forest model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
classification_rep_rf = classification_report(y_test, y_pred_rf)

print(f'Random Forest Accuracy: {accuracy_rf:.2f}')
print('Random Forest Classification Report:\n', classification_rep_rf)
with open('model.pkl', 'wb') as model_file:
        pickle.dump(rf_model, model_file)
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)



Random Forest Accuracy: 0.90
Random Forest Classification Report:
               precision    recall  f1-score   support

   ahmed_ali       0.92      0.93      0.92       482
       bedro       0.96      0.90      0.93       526
      hassan       0.83      0.91      0.87       571
    muhannad       0.90      0.82      0.86       379

    accuracy                           0.90      1958
   macro avg       0.90      0.89      0.89      1958
weighted avg       0.90      0.90      0.90      1958


In [11]:
import pickle

# Assuming your model is stored in 'rf_model' and scaler is stored in 'scaler'
with open('rf_model.pkl', 'wb') as model_file:
    pickle.dump(rf_model, model_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)




In [12]:
def process_file(file_path):
    # Extract features for the input file
    mfccs = extract_mfccs(file_path)
    pitch = extract_pitch(file_path)
    chroma = extract_chroma(file_path)
    zero_crossings = extract_zero_crossings(file_path)
    spectral_contrast = extract_spectral_contrast(file_path)

    # Combine the features into a single DataFrame
    features = pd.concat([mfccs, chroma, zero_crossings, spectral_contrast], axis=1)

    return features

In [19]:
def predict_person(file_path):
        # Extract features from the input file
        features = process_file(file_path)

        # Scale the features using the loaded scaler
        input_features_scaled = scaler.transform(features)

        # Make predictions using the loaded model
        prediction = rf_model.predict(input_features_scaled)
        prediction = np.unique(prediction, return_counts=True)
        print(prediction)
        return prediction[0][np.argmax(prediction[1])]

print(predict_person("honda_trial_2.wav"))

(array(['ahmed_ali', 'bedro', 'hassan', 'muhannad'], dtype='<U9'), array([54,  2, 51,  1], dtype=int64))
ahmed_ali
