In [43]:
# load the 'prononciation' files
# generate the df with mfcc, vector, row mean
# train all models from additional_features file
# collect 5 test samples
# augment to 5*7 = 35
# predict 

### Pronounciation files

In [1]:
import os

files = []
for filename in os.listdir("augmented_audio/"):
    if "pronounciation" in filename.lower():
        files.append("augmented_audio/" + filename)

files

['augmented_audio/Abhishek10Pronounciation.wavoutput_augmented.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_compressed.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_cropped.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_noisy.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_pitch_shifted.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_speed_changed.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_stretched.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_augmented.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_compressed.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_cropped.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_noisy.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_pitch_shifted.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_speed_changed.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_stretched.wav',
 'augmented_audio/Abhishek2Pronounc

### Creating df of Pronounciation files - MFCC, Vector

In [1]:
import os
import librosa
import pandas as pd
import re
import numpy as np
from scipy.io.wavfile import read
from sklearn import preprocessing
import python_speech_features as mfcc

def extract_mfcc(file_path, n_mfcc=25):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs.flatten()

# Function to extract features (MFCC and delta coefficients)
def extract_features(audio, rate):
    mfcc_feature = mfcc.mfcc(audio, rate, 0.025, 0.01, 20, nfft=1200, appendEnergy=True)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature, delta))
    return combined.flatten()

# Function to calculate delta coefficients
def calculate_delta(array):
    rows, cols = array.shape
    deltas = np.zeros((rows, 20))
    n = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= n:
            if i - j < 0:
                first = 0
            else:
                first = i - j
            if i + j > rows - 1:
                second = rows - 1
            else:
                second = i + j
            index.append((second, first))
            j += 1
        deltas[i] = (array[index[0][0]] - array[index[0][1]] + (2 * (array[index[1][0]] - array[index[1][1]]))) / 10
    return deltas

# Specify the folder path
folder_path = "augmented_audio/"

# List all files in the folder
files = os.listdir(folder_path)

# Create a dictionary to store data for each word
word_data = {}

# Traverse through each file
for file_name in files:
    if "pronounciation" in file_name.lower():
        if file_name.lower().endswith(".wav"):
            # Parse the file name to extract information
            pattern = r'([A-Za-z]+)\d+([A-Za-z]+)'
            # Use re.match to find the pattern in the file name
            match = re.match(pattern, file_name)
            if match:
                # Extract the name and word from the matched groups
                name = match.group(1)
                word = match.group(2)
            
            # Check if the word is already in the dictionary
            if word not in word_data:
                word_data[word] = {'Name': [], 'MFCC': [], 'Vector': []}

            # Load the original audio and extract MFCC
            input_file_path = os.path.join(folder_path, file_name)
            mfccs = extract_mfcc(input_file_path)
            
            # Load the original audio
            sr, audio = read(os.path.join(folder_path, file_name))
            
            # Extract features (MFCC and delta coefficients)
            features = extract_features(audio, sr)
            
            # Add data to the dictionary
            word_data[word]['Name'].append(name)
            word_data[word]['MFCC'].append(mfccs)
            word_data[word]['Vector'].append(features)  # Add the vector values

# Create DataFrames for each word
word_dfs = {}
for word, data in word_data.items():
    df = pd.DataFrame(data)
    word_dfs[word] = df

# Display DataFrames for each word
for word, df in word_dfs.items():
    print(f"\nWord: {word}")
    print(df)
df

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd



Word: Pronounciation
         Name                                               MFCC  \
0    Abhishek  [-399.33517, -410.98068, -446.12094, -462.5722...   
1    Abhishek  [-393.50916, -416.78497, -443.1696, -447.20905...   
2    Abhishek  [-398.04355, -407.43915, -423.16266, -427.1824...   
3    Abhishek  [-279.38562, -255.72926, -259.68298, -265.5101...   
4    Abhishek  [-398.58353, -407.81512, -439.38373, -454.3276...   
..        ...                                                ...   
205  Sunamdha  [-454.1231, -468.5504, -485.27255, -494.31076,...   
206  Sunamdha  [-304.044, -272.84253, -268.24622, -271.28018,...   
207  Sunamdha  [-455.58807, -471.42218, -504.5795, -518.75836...   
208  Sunamdha  [-445.229, -472.5361, -499.69522, -508.4312, -...   
209  Sunamdha  [-453.48724, -471.49097, -492.32755, -502.4375...   

                                                Vector  
0    [1.4150455334195575, 0.9546512831353698, 0.366...  
1    [0.747931590109873, 0.4648794312754385, 0.

Unnamed: 0,Name,MFCC,Vector
0,Abhishek,"[-399.33517, -410.98068, -446.12094, -462.5722...","[1.4150455334195575, 0.9546512831353698, 0.366..."
1,Abhishek,"[-393.50916, -416.78497, -443.1696, -447.20905...","[0.747931590109873, 0.4648794312754385, 0.2183..."
2,Abhishek,"[-398.04355, -407.43915, -423.16266, -427.1824...","[0.35104955499818213, 0.8060239526847797, 0.11..."
3,Abhishek,"[-279.38562, -255.72926, -259.68298, -265.5101...","[0.008702108523313145, 0.83070381055796, 1.778..."
4,Abhishek,"[-398.58353, -407.81512, -439.38373, -454.3276...","[0.9068272101350625, 0.8688605322756997, 0.287..."
...,...,...,...
205,Sunamdha,"[-454.1231, -468.5504, -485.27255, -494.31076,...","[-0.5167410570874565, -0.4283380087267415, 0.3..."
206,Sunamdha,"[-304.044, -272.84253, -268.24622, -271.28018,...","[-0.41733527759116845, -0.6118640605585646, -0..."
207,Sunamdha,"[-455.58807, -471.42218, -504.5795, -518.75836...","[-0.20458474852108818, -0.3249366339081963, 0...."
208,Sunamdha,"[-445.229, -472.5361, -499.69522, -508.4312, -...","[-0.47416335629183765, -0.5520375781211978, 0...."


### Training all models on MFCC

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df_vector = df.copy()

# Assuming df is your DataFrame with 'Vector' and 'Label' columns
X = df_vector['MFCC'].values
y = df_vector['Name']

# Define a custom padding function
def pad_sequences_with_mean(sequences, max_length):
    padded_sequences = np.zeros((len(sequences), max_length))
    
    for i, seq in enumerate(sequences):
        seq_len = len(seq)
        if seq_len > 0:
            mean_value = np.mean(seq)
            padded_sequences[i, :seq_len] = seq
            padded_sequences[i, seq_len:] = mean_value
    
    return padded_sequences

# Find the maximum length of sequences
max_length = max(len(seq) for seq in X)

# Pad the sequences with the mean value
X_padded = pad_sequences_with_mean(X, max_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

# Initialize various classifiers and create a dictionary to store trained models
trained_models = {}

classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'MLP Classifier': MLPClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'Bagging Classifier': BaggingClassifier(),
    'Extra Trees Classifier': ExtraTreesClassifier(),
    'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis()
}

# Train and evaluate each classifier
for name, classifier in classifiers.items():
    print(f"\nTraining and evaluating {name}...")
    
    # Fit the model on the training data
    classifier.fit(X_train, y_train)
    
    # Make predictions on the test data
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    
    # Save the trained model in the dictionary
    trained_models[name] = classifier
    
    # Print the evaluation metrics
    print(f"Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(classification_rep)



Training and evaluating Logistic Regression...
Accuracy: 1.00
Confusion Matrix:
[[12  0  0]
 [ 0 15  0]
 [ 0  0 15]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       1.00      1.00      1.00        12
        Arun       1.00      1.00      1.00        15
    Sunamdha       1.00      1.00      1.00        15

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42


Training and evaluating Decision Tree...
Accuracy: 0.90
Confusion Matrix:
[[11  0  1]
 [ 2 13  0]
 [ 0  1 14]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.85      0.92      0.88        12
        Arun       0.93      0.87      0.90        15
    Sunamdha       0.93      0.93      0.93        15

    accuracy                           0.90        42
   macro avg       0.90      0.91      0.90        42
weighted avg    



Accuracy: 0.64
Confusion Matrix:
[[ 1  0 11]
 [ 2 11  2]
 [ 0  0 15]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.33      0.08      0.13        12
        Arun       1.00      0.73      0.85        15
    Sunamdha       0.54      1.00      0.70        15

    accuracy                           0.64        42
   macro avg       0.62      0.61      0.56        42
weighted avg       0.64      0.64      0.59        42


Training and evaluating Bagging Classifier...
Accuracy: 0.86
Confusion Matrix:
[[11  1  0]
 [ 2 13  0]
 [ 2  1 12]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.73      0.92      0.81        12
        Arun       0.87      0.87      0.87        15
    Sunamdha       1.00      0.80      0.89        15

    accuracy                           0.86        42
   macro avg       0.87      0.86      0.86        42
weighted avg       0.88      0.86      0.86        42


Trai



In [None]:
max_length

4325

In [63]:
trained_models

{'Logistic Regression': LogisticRegression(),
 'Decision Tree': DecisionTreeClassifier(),
 'Random Forest': RandomForestClassifier(),
 'Gradient Boosting': GradientBoostingClassifier(),
 'Support Vector Machine': SVC(),
 'K-Nearest Neighbors': KNeighborsClassifier(),
 'Naive Bayes': GaussianNB(),
 'MLP Classifier': MLPClassifier(),
 'AdaBoost Classifier': AdaBoostClassifier(),
 'Bagging Classifier': BaggingClassifier(),
 'Extra Trees Classifier': ExtraTreesClassifier(),
 'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis()}

### Collect test sample files

In [71]:
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv

name = "Sunamdha"
freq = 44100
duration = 2

words = ['Pronounciation'] 

for i in range(10):
    file_name = "audio_files/test/test/" + name + str(i+1) + word + '.wav'
    print("Recording file " + file_name)
    print(f'{word} - {i+1}')
    recording = sd.rec(int(duration * freq), samplerate=freq, channels=1)
    sd.wait()
    write(file_name, freq, recording)
    print("Recorded file " + file_name)

Recording file audio_files/test/test/Sunamdha1Pronounciation.wav
Pronounciation - 1
Recorded file audio_files/test/test/Sunamdha1Pronounciation.wav
Recording file audio_files/test/test/Sunamdha2Pronounciation.wav
Pronounciation - 2
Recorded file audio_files/test/test/Sunamdha2Pronounciation.wav
Recording file audio_files/test/test/Sunamdha3Pronounciation.wav
Pronounciation - 3
Recorded file audio_files/test/test/Sunamdha3Pronounciation.wav
Recording file audio_files/test/test/Sunamdha4Pronounciation.wav
Pronounciation - 4
Recorded file audio_files/test/test/Sunamdha4Pronounciation.wav
Recording file audio_files/test/test/Sunamdha5Pronounciation.wav
Pronounciation - 5
Recorded file audio_files/test/test/Sunamdha5Pronounciation.wav
Recording file audio_files/test/test/Sunamdha6Pronounciation.wav
Pronounciation - 6
Recorded file audio_files/test/test/Sunamdha6Pronounciation.wav
Recording file audio_files/test/test/Sunamdha7Pronounciation.wav
Pronounciation - 7
Recorded file audio_files/te

### Predict live audio clip with all models

In [34]:
import os
import librosa
import pandas as pd
import re
import numpy as np
from scipy.io.wavfile import read
from sklearn import preprocessing
import python_speech_features as mfcc

def extract_mfcc(file_path, n_mfcc=25):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs.flatten()

# Function to extract features (MFCC and delta coefficients)
def extract_features(audio, rate):
    mfcc_feature = mfcc.mfcc(audio, rate, 0.025, 0.01, 20, nfft=1200, appendEnergy=True)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature, delta))
    return combined.flatten()

# Function to calculate delta coefficients
def calculate_delta(array):
    rows, cols = array.shape
    deltas = np.zeros((rows, 20))
    n = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= n:
            if i - j < 0:
                first = 0
            else:
                first = i - j
            if i + j > rows - 1:
                second = rows - 1
            else:
                second = i + j
            index.append((second, first))
            j += 1
        deltas[i] = (array[index[0][0]] - array[index[0][1]] + (2 * (array[index[1][0]] - array[index[1][1]]))) / 10
    return deltas

def create_df_test_vec(folder_path):
    files = os.listdir(folder_path)
    # vector_data = {'Vector': []}
    mfcc_data = {'MFCC': []}

    for file_name in files:
        if file_name.lower().endswith(".wav"):
            # MFCC Dataset
            input_file_path = os.path.join(folder_path, file_name)
            mfccs = extract_mfcc(input_file_path)
            mfcc_data['MFCC'].append(mfccs)

            # Vector Dataset
            # Load the original audio
            # sr, audio = read(os.path.join(folder_path, file_name))
            # # Extract features (MFCC and delta coefficients)
            # features = extract_features(audio, sr)
            # # Add vector to the dictionary
            # vector_data['Vector'].append(features)  # Add the vector values

    # Create DataFrame
    df_test_mfcc = pd.DataFrame(mfcc_data)

    return df_test_mfcc

# Specify the folder path
folder_path = "audio_files/test/"

# Create DataFrame
df_test_mfcc = create_df_test_vec(folder_path)

In [35]:
df_test_mfcc

Unnamed: 0,MFCC
0,"[-498.53348, -490.7242, -496.195, -500.975, -5..."
1,"[-267.81674, -282.32547, -305.14893, -304.0901..."
2,"[-238.04095, -300.84586, -435.73254, -435.6609..."
3,"[-485.6647, -365.62228, -308.145, -325.15506, ..."
4,"[-502.50278, -481.8657, -482.20917, -483.40936..."


In [36]:
df_test_mfcc['MFCC'][0].shape

(4325,)

In [37]:
X_test_vec = df_test_mfcc['MFCC'].values

max_length_test = max(len(seq) for seq in X_test_vec)
max_length_test

4325

In [6]:
type(X_test_vec)

numpy.ndarray

In [38]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Assuming df_test_vec is your DataFrame with the 'Vector' column
X_test_vec = df_test_mfcc['MFCC'].values

# Find the maximum length of sequences for the test data
max_length_test = max(len(seq) for seq in X_test_vec)

# Pad or truncate the test sequences to match the training data's max_length
X_test_padded = pad_sequences_with_mean(X_test_vec, max_length_test)

# Initialize a dictionary to store predictions
predictions = {}

# Make predictions using the trained models
for name, model in trained_models.items():
    # Ensure the test_vector has the same shape as the training data
    # For example, you may need to pad or reshape it
    # test_vector_padded = pad_sequences_with_mean(X_test_vec, max_length_test)
    
    # Make the prediction
    y_pred = model.predict(X_test_padded)
    
    # Store the predictions in the dictionary
    predictions[name] = y_pred

# Create a DataFrame from the predictions
df_predictions = pd.DataFrame(predictions)

# Display the predicted usernames
df_predictions

Unnamed: 0,Logistic Regression,Decision Tree,Random Forest,Gradient Boosting,Support Vector Machine,K-Nearest Neighbors,Naive Bayes,MLP Classifier,AdaBoost Classifier,Bagging Classifier,Extra Trees Classifier,Quadratic Discriminant Analysis
0,Abhishek,Sunamdha,Abhishek,Abhishek,Abhishek,Abhishek,Abhishek,Abhishek,Sunamdha,Abhishek,Abhishek,Sunamdha
1,Abhishek,Abhishek,Abhishek,Abhishek,Abhishek,Abhishek,Abhishek,Abhishek,Sunamdha,Abhishek,Abhishek,Abhishek
2,Abhishek,Arun,Arun,Arun,Arun,Arun,Arun,Arun,Arun,Abhishek,Abhishek,Abhishek
3,Abhishek,Sunamdha,Abhishek,Abhishek,Sunamdha,Sunamdha,Abhishek,Abhishek,Sunamdha,Sunamdha,Abhishek,Sunamdha
4,Abhishek,Abhishek,Abhishek,Abhishek,Sunamdha,Abhishek,Abhishek,Abhishek,Sunamdha,Abhishek,Abhishek,Sunamdha


In [39]:
# best models dictionary - 
seleted_models = {
    "Arun": ["Decision Tree", "Random Forest", "Gradient Boosting", "Naive Bayes", "Adaboost Classifier", "Bagging Classifier", "Extra Trees Classifier"],
    "Abhishek": ["Decision Tree", "Random Forest", "Gradient Boosting", "Support Vector Machine", "K-Nearest Neighbors", "Naive Bayes", "MLP Classifier", "Bagging Classifier", "Extra Trees Classifier"],
    "Sunamdha": ["Support Vector Machine", "K-Nearest Neighbors", "MLP Classifier", "Bagging Classifier"]
}

In [None]:
# now random voice comes, then predicted across all models and score taken out. >=3 scores set aside. Then check how many of these shortlist models 
# match with dictionary models. The more interection name choose as user who spoke.



In [8]:
# best models for Arun - 

import os
import joblib

# Define the folder path
folder_path = "selective_models/"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)

# Assuming best_models is a list containing the names of the best models
best_model_names = ["Decision Tree", "Random Forest", "Gradient Boosting", "Naive Bayes", "Adaboost Classifier", "Bagging Classifier", "Extra Trees Classifier"]

# Save the best models with the specified prefix
for model_name in best_model_names:
    # Check if the model name exists in the trained_models dictionary
    if model_name in trained_models:
        model = trained_models[model_name]
        # Adjust the naming convention as needed
        model_filename = os.path.join(folder_path, f"Arun-{model_name}-Pronounciation.pkl")
        joblib.dump(model, model_filename)
        print(f"Model '{model_name}' saved in '{folder_path}'.")
    else:
        print(f"Model '{model_name}' not found in trained_models dictionary.")


Model 'Decision Tree' saved in 'selective_models/'.
Model 'Random Forest' saved in 'selective_models/'.
Model 'Gradient Boosting' saved in 'selective_models/'.
Model 'Naive Bayes' saved in 'selective_models/'.
Model 'Adaboost Classifier' not found in trained_models dictionary.
Model 'Bagging Classifier' saved in 'selective_models/'.
Model 'Extra Trees Classifier' saved in 'selective_models/'.


In [None]:
# load all pronounciation related models - 
# once loaded 

### Training all models on MFCC and Vector

In [5]:
def pad_sequences_with_mean(sequences, max_length):
    padded_sequences = np.zeros((len(sequences), max_length))
    
    for i, seq in enumerate(sequences):
        seq_len = len(seq)
        if seq_len > 0:
            mean_value = np.mean(seq)
            padded_sequences[i, :seq_len] = seq
            padded_sequences[i, seq_len:] = mean_value
    
    return padded_sequences

In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df_vector = df.copy()

# Assuming df is your DataFrame with 'Vector' and 'Label' columns
X_mfcc = df_vector['MFCC'].values
X_vector = df_vector['Vector'].values
y = df_vector['Name']

max_len_mfcc = max(len(x) for x in X_mfcc)
max_len_vector = max(len(x) for x in X_vector)
max_len = max(max_len_mfcc, max_len_vector)

X_mfcc_padded = pad_sequences_with_mean(X_mfcc, max_len)
X_vector_padded = pad_sequences_with_mean(X_vector, max_len)

# Concatenate 'MFCC' and 'Vector' arrays along the second axis
X_combined = np.concatenate([X_mfcc_padded, X_vector_padded], axis=1)


# Define a custom padding function
def pad_sequences_with_mean(sequences, max_length):
    padded_sequences = np.zeros((len(sequences), max_length))
    
    for i, seq in enumerate(sequences):
        seq_len = len(seq)
        if seq_len > 0:
            mean_value = np.mean(seq)
            padded_sequences[i, :seq_len] = seq
            padded_sequences[i, seq_len:] = mean_value
    
    return padded_sequences

# Find the maximum length of sequences
# max_length = max(len(seq) for seq in X)

# # Pad the sequences with the mean value
# X_padded = pad_sequences_with_mean(X, max_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=23)

# Initialize various classifiers and create a dictionary to store trained models
trained_models = {}

classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'MLP Classifier': MLPClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'Bagging Classifier': BaggingClassifier(),
    'Extra Trees Classifier': ExtraTreesClassifier(),
    'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis()
}

# Train and evaluate each classifier
for name, classifier in classifiers.items():
    print(f"\nTraining and evaluating {name}...")
    
    # Fit the model on the training data
    classifier.fit(X_train, y_train)
    
    # Make predictions on the test data
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    
    # Save the trained model in the dictionary
    trained_models[name] = classifier
    
    # Print the evaluation metrics
    print(f"Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(classification_rep)


Training and evaluating Logistic Regression...
Accuracy: 1.00
Confusion Matrix:
[[15  0  0]
 [ 0 15  0]
 [ 0  0 12]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       1.00      1.00      1.00        15
        Arun       1.00      1.00      1.00        15
    Sunamdha       1.00      1.00      1.00        12

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42


Training and evaluating Decision Tree...
Accuracy: 0.88
Confusion Matrix:
[[14  1  0]
 [ 2 13  0]
 [ 1  1 10]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.82      0.93      0.88        15
        Arun       0.87      0.87      0.87        15
    Sunamdha       1.00      0.83      0.91        12

    accuracy                           0.88        42
   macro avg       0.90      0.88      0.88        42
weighted avg     



Accuracy: 0.62
Confusion Matrix:
[[13  2  0]
 [11  4  0]
 [ 3  0  9]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.48      0.87      0.62        15
        Arun       0.67      0.27      0.38        15
    Sunamdha       1.00      0.75      0.86        12

    accuracy                           0.62        42
   macro avg       0.72      0.63      0.62        42
weighted avg       0.70      0.62      0.60        42


Training and evaluating Bagging Classifier...
Accuracy: 0.98
Confusion Matrix:
[[14  0  1]
 [ 0 15  0]
 [ 0  0 12]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       1.00      0.93      0.97        15
        Arun       1.00      1.00      1.00        15
    Sunamdha       0.92      1.00      0.96        12

    accuracy                           0.98        42
   macro avg       0.97      0.98      0.98        42
weighted avg       0.98      0.98      0.98        42


Trai



Accuracy: 0.81
Confusion Matrix:
[[12  1  2]
 [ 1 10  4]
 [ 0  0 12]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.92      0.80      0.86        15
        Arun       0.91      0.67      0.77        15
    Sunamdha       0.67      1.00      0.80        12

    accuracy                           0.81        42
   macro avg       0.83      0.82      0.81        42
weighted avg       0.84      0.81      0.81        42



In [54]:
import os
import librosa
import pandas as pd
import re
import numpy as np
from scipy.io.wavfile import read
from sklearn import preprocessing
import python_speech_features as mfcc

def extract_mfcc(file_path, n_mfcc=25):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs.flatten()

# Function to extract features (MFCC and delta coefficients)
def extract_features(audio, rate):
    mfcc_feature = mfcc.mfcc(audio, rate, 0.025, 0.01, 20, nfft=1200, appendEnergy=True)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature, delta))
    return combined.flatten()

# Function to calculate delta coefficients
def calculate_delta(array):
    rows, cols = array.shape
    deltas = np.zeros((rows, 20))
    n = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= n:
            if i - j < 0:
                first = 0
            else:
                first = i - j
            if i + j > rows - 1:
                second = rows - 1
            else:
                second = i + j
            index.append((second, first))
            j += 1
        deltas[i] = (array[index[0][0]] - array[index[0][1]] + (2 * (array[index[1][0]] - array[index[1][1]]))) / 10
    return deltas

def create_df_test_vec(folder_path):
    files = os.listdir(folder_path)
    # vector_data = {'Vector': []}
    vec_mfcc_data = {'MFCC': [], 'Vector': []}

    for file_name in files:
        if file_name.lower().endswith(".wav"):
            # MFCC Dataset
            input_file_path = os.path.join(folder_path, file_name)
            mfccs = extract_mfcc(input_file_path)
            vec_mfcc_data['MFCC'].append(mfccs)

            # Vector Dataset
            # Load the original audio
            sr, audio = read(os.path.join(folder_path, file_name))
            # Extract features (MFCC and delta coefficients)
            features = extract_features(audio, sr)
            # Add vector to the dictionary
            vec_mfcc_data['Vector'].append(features)  # Add the vector values

    # Create DataFrame
    df_test_mfcc_vec = pd.DataFrame(vec_mfcc_data)

    return df_test_mfcc_vec

# Specify the folder path
folder_path = "audio_files/test/test/"

# Create DataFrame
df_test_mfcc_vec = create_df_test_vec(folder_path)

In [50]:
df_test_mfcc_vec

Unnamed: 0,MFCC,Vector
0,"[-773.37177, -773.37177, -773.37177, -773.3717...","[-1.2264953037130666, -1.5692548036474234, -1...."
1,"[-713.7566, -713.7566, -713.7566, -713.7566, -...","[-1.3419352300619702, -1.6418373323387692, -1...."
2,"[-769.26025, -769.26025, -769.26025, -769.2602...","[-1.1601130120094152, -1.4955321356818048, -1...."
3,"[-759.9057, -759.9057, -759.9057, -759.9057, -...","[-1.2181456955416914, -1.5679575195827389, -1...."
4,"[-827.6323, -827.6323, -827.6323, -827.6323, -...","[-1.0357001428889139, -1.3650344273825437, -1...."


In [61]:
X_combined[0].shape

(31920,)

In [57]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

X_mfcc = df_test_mfcc_vec['MFCC'].values
X_vector = df_test_mfcc_vec['Vector'].values

max_len_mfcc = max(len(x) for x in X_mfcc)
max_len_vector = max(len(x) for x in X_vector)
max_len = max(max_len_mfcc, max_len_vector)

X_mfcc_padded = pad_sequences_with_mean(X_mfcc, max_len)
X_vector_padded = pad_sequences_with_mean(X_vector, max_len)

# Concatenate 'MFCC' and 'Vector' arrays along the second axis
X_combined = np.concatenate([X_mfcc_padded, X_vector_padded], axis=1)


# Initialize a dictionary to store predictions
predictions = {}

# Make predictions using the trained models
for name, model in trained_models.items():
    # Ensure the test_vector has the same shape as the training data
    # For example, you may need to pad or reshape it
    # test_vector_padded = pad_sequences_with_mean(X_test_vec, max_length_test)
    
    # Make the prediction
    y_pred = model.predict(X_combined)
    
    # Store the predictions in the dictionary
    predictions[name] = y_pred

# Create a DataFrame from the predictions
df_predictions = pd.DataFrame(predictions)

# Display the predicted usernames
df_predictions

ValueError: X has 31920 features, but LogisticRegression is expecting 15920 features as input.