In [1]:
# load the 'prononciation' files
# generate the df with mfcc, vector, row mean
# train all models from additional_features file
# collect 5 test samples
# augment to 5*7 = 35
# predict 

### Pronounciation files

In [1]:
import os

files = []
for filename in os.listdir("augmented_audio/"):
    if "pronounciation" in filename.lower():
        files.append("augmented_audio/" + filename)

files

['augmented_audio/Abhishek10Pronounciation.wavoutput_augmented.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_compressed.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_cropped.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_noisy.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_pitch_shifted.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_speed_changed.wav',
 'augmented_audio/Abhishek10Pronounciation.wavoutput_stretched.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_augmented.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_compressed.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_cropped.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_noisy.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_pitch_shifted.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_speed_changed.wav',
 'augmented_audio/Abhishek1Pronounciation.wavoutput_stretched.wav',
 'augmented_audio/Abhishek2Pronounc

### Creating df of Pronounciation files - MFCC, Vector

In [6]:
import os
import librosa
import pandas as pd
import re
import numpy as np
from scipy.io.wavfile import read
from sklearn import preprocessing
import python_speech_features as mfcc

def extract_mfcc(file_path, n_mfcc=25):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs.flatten()

# Function to extract features (MFCC and delta coefficients)
def extract_features(audio, rate):
    mfcc_feature = mfcc.mfcc(audio, rate, 0.025, 0.01, 20, nfft=1200, appendEnergy=True)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature, delta))
    return combined.flatten()

# Function to calculate delta coefficients
def calculate_delta(array):
    rows, cols = array.shape
    deltas = np.zeros((rows, 20))
    n = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= n:
            if i - j < 0:
                first = 0
            else:
                first = i - j
            if i + j > rows - 1:
                second = rows - 1
            else:
                second = i + j
            index.append((second, first))
            j += 1
        deltas[i] = (array[index[0][0]] - array[index[0][1]] + (2 * (array[index[1][0]] - array[index[1][1]]))) / 10
    return deltas

# Specify the folder path
folder_path = "augmented_audio/"

# List all files in the folder
files = os.listdir(folder_path)

# Create a dictionary to store data for each word
word_data = {}

# Traverse through each file
for file_name in files:
    if "pronounciation" in file_name.lower():
        if file_name.lower().endswith(".wav"):
            # Parse the file name to extract information
            pattern = r'([A-Za-z]+)\d+([A-Za-z]+)'
            # Use re.match to find the pattern in the file name
            match = re.match(pattern, file_name)
            if match:
                # Extract the name and word from the matched groups
                name = match.group(1)
                word = match.group(2)
            
            # Check if the word is already in the dictionary
            if word not in word_data:
                word_data[word] = {'Name': [], 'MFCC': [], 'Vector': []}

            # Load the original audio and extract MFCC
            input_file_path = os.path.join(folder_path, file_name)
            mfccs = extract_mfcc(input_file_path)
            
            # Load the original audio
            sr, audio = read(os.path.join(folder_path, file_name))
            
            # Extract features (MFCC and delta coefficients)
            features = extract_features(audio, sr)
            
            # Add data to the dictionary
            word_data[word]['Name'].append(name)
            word_data[word]['MFCC'].append(mfccs)
            word_data[word]['Vector'].append(features)  # Add the vector values

# Create DataFrames for each word
word_dfs = {}
for word, data in word_data.items():
    df = pd.DataFrame(data)
    word_dfs[word] = df

# Display DataFrames for each word
for word, df in word_dfs.items():
    print(f"\nWord: {word}")
    print(df)
df


Word: Pronounciation
         Name                                               MFCC  \
0    Abhishek  [-399.33517, -410.98068, -446.12094, -462.5722...   
1    Abhishek  [-393.50916, -416.78497, -443.1696, -447.20905...   
2    Abhishek  [-398.04355, -407.43915, -423.16266, -427.1824...   
3    Abhishek  [-279.38562, -255.72926, -259.68298, -265.5101...   
4    Abhishek  [-398.58353, -407.81512, -439.38373, -454.3276...   
..        ...                                                ...   
205  Sunamdha  [-454.1231, -468.5504, -485.27255, -494.31076,...   
206  Sunamdha  [-304.044, -272.84253, -268.24622, -271.28018,...   
207  Sunamdha  [-455.58807, -471.42218, -504.5795, -518.75836...   
208  Sunamdha  [-445.229, -472.5361, -499.69522, -508.4312, -...   
209  Sunamdha  [-453.48724, -471.49097, -492.32755, -502.4375...   

                                                Vector  
0    [1.4150455334195575, 0.9546512831353698, 0.366...  
1    [0.747931590109873, 0.4648794312754385, 0.

Unnamed: 0,Name,MFCC,Vector
0,Abhishek,"[-399.33517, -410.98068, -446.12094, -462.5722...","[1.4150455334195575, 0.9546512831353698, 0.366..."
1,Abhishek,"[-393.50916, -416.78497, -443.1696, -447.20905...","[0.747931590109873, 0.4648794312754385, 0.2183..."
2,Abhishek,"[-398.04355, -407.43915, -423.16266, -427.1824...","[0.35104955499818213, 0.8060239526847797, 0.11..."
3,Abhishek,"[-279.38562, -255.72926, -259.68298, -265.5101...","[0.008702108523313145, 0.83070381055796, 1.778..."
4,Abhishek,"[-398.58353, -407.81512, -439.38373, -454.3276...","[0.9068272101350625, 0.8688605322756997, 0.287..."
...,...,...,...
205,Sunamdha,"[-454.1231, -468.5504, -485.27255, -494.31076,...","[-0.5167410570874565, -0.4283380087267415, 0.3..."
206,Sunamdha,"[-304.044, -272.84253, -268.24622, -271.28018,...","[-0.41733527759116845, -0.6118640605585646, -0..."
207,Sunamdha,"[-455.58807, -471.42218, -504.5795, -518.75836...","[-0.20458474852108818, -0.3249366339081963, 0...."
208,Sunamdha,"[-445.229, -472.5361, -499.69522, -508.4312, -...","[-0.47416335629183765, -0.5520375781211978, 0...."


### Training all models on Vector

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

df_vector = df.copy()

# Assuming df is your DataFrame with 'Vector' and 'Label' columns
X = df_vector['Vector'].values
y = df_vector['Name']

# Define a custom padding function
def pad_sequences_with_mean(sequences, max_length):
    padded_sequences = np.zeros((len(sequences), max_length))
    
    for i, seq in enumerate(sequences):
        seq_len = len(seq)
        if seq_len > 0:
            mean_value = np.mean(seq)
            padded_sequences[i, :seq_len] = seq
            padded_sequences[i, seq_len:] = mean_value
    
    return padded_sequences

# Find the maximum length of sequences
max_length = max(len(seq) for seq in X)

# Pad the sequences with the mean value
X_padded = pad_sequences_with_mean(X, max_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=22)

# Initialize various classifiers and create a dictionary to store trained models
trained_models = {}

classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'MLP Classifier': MLPClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'Bagging Classifier': BaggingClassifier(),
    'Extra Trees Classifier': ExtraTreesClassifier(),
    'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis()
}

# Train and evaluate each classifier
for name, classifier in classifiers.items():
    print(f"\nTraining and evaluating {name}...")
    
    # Fit the model on the training data
    classifier.fit(X_train, y_train)
    
    # Make predictions on the test data
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    
    # Save the trained model in the dictionary
    trained_models[name] = classifier
    
    # Print the evaluation metrics
    print(f"Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(classification_rep)


Training and evaluating Logistic Regression...
Accuracy: 1.00
Confusion Matrix:
[[12  0  0]
 [ 0 15  0]
 [ 0  0 15]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       1.00      1.00      1.00        12
        Arun       1.00      1.00      1.00        15
    Sunamdha       1.00      1.00      1.00        15

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42


Training and evaluating Decision Tree...
Accuracy: 0.83
Confusion Matrix:
[[10  2  0]
 [ 3 12  0]
 [ 2  0 13]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.67      0.83      0.74        12
        Arun       0.86      0.80      0.83        15
    Sunamdha       1.00      0.87      0.93        15

    accuracy                           0.83        42
   macro avg       0.84      0.83      0.83        42
weighted avg     



Accuracy: 0.83
Confusion Matrix:
[[12  0  0]
 [ 6  9  0]
 [ 1  0 14]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.63      1.00      0.77        12
        Arun       1.00      0.60      0.75        15
    Sunamdha       1.00      0.93      0.97        15

    accuracy                           0.83        42
   macro avg       0.88      0.84      0.83        42
weighted avg       0.89      0.83      0.83        42


Training and evaluating Bagging Classifier...
Accuracy: 0.93
Confusion Matrix:
[[12  0  0]
 [ 3 12  0]
 [ 0  0 15]]
Classification Report:
              precision    recall  f1-score   support

    Abhishek       0.80      1.00      0.89        12
        Arun       1.00      0.80      0.89        15
    Sunamdha       1.00      1.00      1.00        15

    accuracy                           0.93        42
   macro avg       0.93      0.93      0.93        42
weighted avg       0.94      0.93      0.93        42


Trai



In [9]:
trained_models

{'Logistic Regression': LogisticRegression(),
 'Decision Tree': DecisionTreeClassifier(),
 'Random Forest': RandomForestClassifier(),
 'Gradient Boosting': GradientBoostingClassifier(),
 'Support Vector Machine': SVC(),
 'K-Nearest Neighbors': KNeighborsClassifier(),
 'Naive Bayes': GaussianNB(),
 'MLP Classifier': MLPClassifier(),
 'AdaBoost Classifier': AdaBoostClassifier(),
 'Bagging Classifier': BaggingClassifier(),
 'Extra Trees Classifier': ExtraTreesClassifier(),
 'Quadratic Discriminant Analysis': QuadraticDiscriminantAnalysis()}

### Predict live audio clip with all models

In [11]:
import sounddevice as sd
from scipy.io.wavfile import write

name = input("Enter your name: ")
freq = 44100
duration = 2
word = "Pronounciation"
for i in range(5):
    file_name = "audio_files/test/" + name + str(i+1) + word + '.wav'
    print("Recording file " + file_name)
    print(f'{word} - {i+1}')
    recording = sd.rec(int(duration * freq), samplerate=freq, channels=2)
    sd.wait()
    write(file_name, freq, recording)
    print("Recorded file " + file_name)

import os
from scipy.io.wavfile import read
import numpy as np

# Assuming df is your DataFrame with 'Vector' and 'Label' columns
folder_path = '/path/to/your/folder'

# Function to extract features and make predictions for a given audio file
def predict_for_audio_file(file_path, trained_models, max_length):
    # Load the original audio
    sr, audio = read(file_path)

    # Extract features (MFCC and delta coefficients)
    features = extract_features(audio, sr)

    # Make predictions using the saved models
    predictions = {}
    for name, model in trained_models.items():
        # Ensure the test_vector has the same shape as the training data
        # For example, you may need to pad or reshape it
        test_vector_padded = pad_sequences_with_mean([features], max_length)

        # Make the prediction
        prediction = model.predict(test_vector_padded)
        predictions[name] = prediction

    return predictions

# Get a list of all WAV files in the folder
wav_files = [file for file in os.listdir(folder_path) if file.endswith('.wav')]

# Loop through all WAV files and make predictions
for wav_file in wav_files:
    wav_file_path = os.path.join(folder_path, wav_file)
    
    # Make predictions for the current audio file
    predictions = predict_for_audio_file(wav_file_path, trained_models, max_length)
    
    # Print predictions for each model
    print(f"\nPredictions for {wav_file}:")
    for name, prediction in predictions.items():
        print(f"{name} Prediction: {prediction}")

Recording file audio_files/test/Arun1Pronounciation.wav
Pronounciation - 1


NameError: name 'write' is not defined