In [87]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [88]:
def extract_mfccs(file_path):
    y, sr = librosa.load(file_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    mfccs = mfccs.T
    mean=[]
    var=[]
    #(216) rows = frames ,  and (26) columns = features
    for i in range(len(mfccs)):
        mean.append(np.mean(mfccs[i]))
        var.append(np.var(mfccs[i]))

    features = np.hstack([mean,var])
    return pd.DataFrame(features, columns=["MFCCs"])

In [89]:
def extract_pitch(file_path):
    y, sr = librosa.load(file_path)
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches[magnitudes > np.max(magnitudes) * 0.85])
    return pd.DataFrame({"Pitch": [pitch]})

In [90]:
def extract_chroma(file_path):
    y, sr = librosa.load(file_path)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma = chroma.T
    feature_names = [f"Chroma_{i+1}" for i in range(chroma.shape[1])]
    return pd.DataFrame(chroma, columns=feature_names)

In [91]:
def extract_zero_crossings(file_path):
    y, sr = librosa.load(file_path)
    zero_crossings = librosa.feature.zero_crossing_rate(y)
    zero_crossings = zero_crossings.T
    feature_names = [f"ZeroCrossings_{i+1}" for i in range(zero_crossings.shape[1])]
    return pd.DataFrame(zero_crossings, columns=feature_names)

In [92]:
def extract_spectral_contrast(file_path):
    y, sr = librosa.load(file_path)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_contrast = spectral_contrast.T 
    feature_names = [f"SpectralContrast_{i+1}" for i in range(spectral_contrast.shape[1])]
    return pd.DataFrame(spectral_contrast, columns=feature_names)

In [109]:
def process_user_folder(folder_path):
    mfccs_list = []

    # Iterate over all .wav files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".wav"):
            file_path = os.path.join(folder_path, file_name)

            # Extract features for each file
            mfccs = extract_mfccs(file_path)

            # Append the DataFrame to the list
            mfccs_list.append(mfccs)

    # Concatenate all DataFrames in the list along columns
    if mfccs_list:
        compined_features = pd.concat(mfccs_list, axis=1)
        return compined_features

    # return None


In [110]:
# Specify the path of the main folder
main_folder_path = "voices"

# Get a list of all subfolders inside the main folder
subfolders = [f.path for f in os.scandir(main_folder_path) if f.is_dir()]
# Initialize an empty dictionary for storing features
data_voice_recognition_features = {
    "ahmed_ali": None,
    "bedro": None,
    "hassan": None,
    "muhannad": None,
    "abdulla_ahmed": None,
    "ashf": None,
    "atef": None,
    "hazem_rafaat": None
}

# Loop over each subfolder
for subfolder_path in subfolders:
    # Extract the name of the subfolder
    subfolder_name = os.path.basename(subfolder_path)

    if subfolder_name in data_voice_recognition_features.keys():
        # Create a variable with the features of the subfolder
        features = process_user_folder(subfolder_path)

        # Update the dictionary with the DataFrame (or None if no features)
        data_voice_recognition_features[subfolder_name] = features

# Create a DataFrame from the dictionary
df_voice_recognition_features = pd.concat(data_voice_recognition_features.values(), axis=1)

# Transpose the DataFrame to have voices as rows and features as columns
df_voice_recognition_features = df_voice_recognition_features.T

# Reset index to make 'Subfolder' a column
df_voice_recognition_features.reset_index(inplace=True)

# Rename the 'index' column to 'Voice'
df_voice_recognition_features.rename(columns={'index': 'Voice'}, inplace=True)

In [111]:
data_voice_recognition_features

{'ahmed_ali':          MFCCs      MFCCs      MFCCs      MFCCs      MFCCs      MFCCs  \
 0   -16.382910 -10.935716 -18.388676 -16.481791 -17.809553 -15.260443   
 1   -18.052292 -12.832460 -20.071465 -18.007294 -19.191351 -17.435467   
 2   -19.192226 -19.318407 -18.185013 -19.327501 -20.943422 -17.570810   
 3   -15.380415 -15.853374 -15.162692 -16.491520 -16.852911 -15.827769   
 4   -13.209841 -13.173674 -14.204920 -15.428556 -15.411261 -14.698550   
 ..         ...        ...        ...        ...        ...        ...   
 193        NaN        NaN        NaN        NaN        NaN        NaN   
 194        NaN        NaN        NaN        NaN        NaN        NaN   
 195        NaN        NaN        NaN        NaN        NaN        NaN   
 196        NaN        NaN        NaN        NaN        NaN        NaN   
 197        NaN        NaN        NaN        NaN        NaN        NaN   
 
          MFCCs      MFCCs      MFCCs      MFCCs  ...      MFCCs      MFCCs  \
 0   -18.093000 -1

In [112]:
print(df_voice_recognition_features)


     Voice          0          1          2          3          4          5  \
0    MFCCs -16.382910 -18.052292 -19.192226 -15.380415 -13.209841 -13.308472   
1    MFCCs -10.935716 -12.832460 -19.318407 -15.853374 -13.173674 -13.224785   
2    MFCCs -18.388676 -20.071465 -18.185013 -15.162692 -14.204920 -14.464134   
3    MFCCs -16.481791 -18.007294 -19.327501 -16.491520 -15.428556 -12.703568   
4    MFCCs -17.809553 -19.191351 -20.943422 -16.852911 -15.411261 -14.127642   
..     ...        ...        ...        ...        ...        ...        ...   
224  MFCCs -30.852840 -18.847103 -14.758459 -16.505819 -14.087309 -12.932907   
225  MFCCs -31.597967 -16.616116 -12.345858 -14.036962 -14.139610 -12.477659   
226  MFCCs -32.203934 -18.460608 -13.384254 -14.174228 -13.937067 -12.396997   
227  MFCCs -31.991669 -15.626043 -11.568723 -13.248625 -15.085413 -11.856977   
228  MFCCs -33.313667 -28.127085 -22.668053 -18.066122 -14.480784 -13.353956   

             6          7          8  .

In [113]:
# Transpose the DataFrame to swap rows and columns
all_features = df_voice_recognition_features.transpose()

# Reset the index
all_features = all_features.reset_index()

# Rename the columns for better clarity
all_features.columns = ["Voice"] + [f"Feature_{i+1}" for i in range(all_features.shape[1]-1)]

# Print the modified DataFrame
print(all_features)

     Voice  Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  Feature_6  \
0    Voice      MFCCs      MFCCs      MFCCs      MFCCs      MFCCs      MFCCs   
1        0  -16.38291 -10.935716 -18.388676 -16.481791 -17.809553 -15.260443   
2        1 -18.052292  -12.83246 -20.071465 -18.007294 -19.191351 -17.435467   
3        2 -19.192226 -19.318407 -18.185013 -19.327501 -20.943422  -17.57081   
4        3 -15.380415 -15.853374 -15.162692  -16.49152 -16.852911 -15.827769   
..     ...        ...        ...        ...        ...        ...        ...   
242    241        NaN        NaN        NaN        NaN        NaN        NaN   
243    242        NaN        NaN        NaN        NaN        NaN        NaN   
244    243        NaN        NaN        NaN        NaN        NaN        NaN   
245    244        NaN        NaN        NaN        NaN        NaN        NaN   
246    245        NaN        NaN        NaN        NaN        NaN        NaN   

     Feature_7  Feature_8  Feature_9  .

In [114]:
X_train, X_test, y_train, y_test = train_test_split(all_features.iloc[:, 1:], all_features.iloc[:, 0], test_size=0.2, random_state=42)

In [115]:
# Initialize the SVM model
svm_model_speech = SVC(kernel='linear', C=10, gamma=0.1)

# Train the SVM model
svm_model_speech.fit(X_train, y_train)



ValueError: could not convert string to float: 'MFCCs'

In [106]:
# Make predictions on the test set
predictions = svm_model_speech.predict(X_test)



ValueError: Input X contains NaN.
SVC does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [107]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, predictions)
classification_rep = classification_report(y_test, predictions, zero_division=0)
# Print the results
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_rep)

ValueError: Found input variables with inconsistent numbers of samples: [50, 2]

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.01, 0.1, 1, 10]}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_svm_model = grid_search.best_estimator_

In [None]:
from sklearn import metrics

y_pred = best_svm_model.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='weighted')
recall = metrics.recall_score(y_test, y_pred, average='weighted')
f1_score = metrics.f1_score(y_test, y_pred, average='weighted')

print(f"Best Parameters: {best_params}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")

In [None]:
# Initialize the SVM model
svm_model_voice = SVC(kernel='linear', C=10, gamma=0.01)


In [None]:
# Make predictions on the test set
predictions = svm_model_voice.predict(X_test)



In [None]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, predictions)
classification_rep = classification_report(y_test, predictions)

# Print the results
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_rep)