In [4]:
import random
import sounddevice as sd
import numpy as np
import librosa
import joblib
import os
from python_speech_features import mfcc
from sklearn import preprocessing

# Specify the folder path for KNN models
models_folder = 'models/model.h5'

def record_audio(duration=3, sr=22050):
    print("Please read the word.")
    audio_data = sd.rec(int(duration * sr), samplerate=sr, channels=1, dtype=np.float32)
    sd.wait()
    return audio_data.flatten(), sr

# Initialize a dictionary to store user recordings
user_recordings = {}

# Record audio for each selected word
for word_model in selected_words:
    word = word_model.split('_')[0]
    print(f"\nPlease read the word: {word.capitalize()}")

    # Record audio
    audio_data, sr = record_audio()

    # Extract MFCC features
    features = extract_features(audio_data, sr)

    # Store the user's recording and corresponding MFCC features
    user_recordings[word] = {'Audio': audio_data, 'Vector': features}

# Predict the speaker for each word using the corresponding KNN model
for word, recording_data in user_recordings.items():
    # Load the corresponding KNN model
    model_path = os.path.join(models_folder, f'{word}_knn_model.joblib')
    knn_model = joblib.load(model_path)
    
    # Retrieve the number of features expected by the KNN model
    expected_num_features = knn_model._fit_X.shape[1]
    
    # Ensure the input data has the same number of features as the training data
    num_features_recording = len(recording_data['Vector'])
    
    if num_features_recording != expected_num_features:
        # Calculate the mean value of the features
        mean_value = np.mean(recording_data['Vector'])
        
        # Calculate the difference between expected and actual number of features
        num_features_diff = expected_num_features - num_features_recording
        
        if num_features_diff < 0:
            # Truncate the features if the difference is negative
            recording_data['Vector'] = recording_data['Vector'][:expected_num_features]
        else:
            # Pad the features with the mean value to match the expected dimensions
            recording_data['Vector'] = np.pad(recording_data['Vector'], (0, num_features_diff), constant_values=mean_value)
    
    # Predict the speaker using MFCC features
    user_prediction = knn_model.predict([recording_data['Vector']])
    print(f"\nFor word '{word}', predicted speaker: {user_prediction[0]}")

4