In [3]:
import numpy as np
from tensorflow.keras.models import load_model
import librosa
from sklearn.preprocessing import LabelEncoder
import os
import sys
from tensorflow.keras.models import Model

# Set the environment variable to disable certain optimizations
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

# Function to extract MFCC features from an audio file
def extract_mfcc(file_path, n_mfcc=13):
    audio, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

# Load the trained model
model = load_model("cnn_model.h5", compile=False)

# Compile the model with metrics (to suppress the warning)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load the label encoder's classes
label_classes = np.load('label_encoder_classes.npy', allow_pickle=True)
label_encoder = LabelEncoder()
label_encoder.classes_ = label_classes

# Function to predict the speaker of a given audio file
def predict_speaker(file_path, model):
    mfcc = extract_mfcc(file_path)
    mfcc = mfcc.reshape(1, 13, 1)  # Adjust shape as needed
    prediction = model.predict(mfcc)
    predicted_label_index = np.argmax(prediction)
    predicted_speaker = label_encoder.inverse_transform([predicted_label_index])
    return predicted_speaker[0], prediction[0][predicted_label_index]

# Main function to handle user input
def main(audio_file):
    if not os.path.isfile(audio_file):
        print(f"Error: {audio_file} is not a valid file.")
        return

    try:
        predicted_speaker, confidence = predict_speaker(audio_file, model)
        print(f"Audio file: {audio_file}")
        print(f"Predicted Speaker: {predicted_speaker}")
        print(f"Confidence: {confidence:.4f}")
    except Exception as e:
        print(f"An error occurred: {e}")

# If running in a Jupyter Notebook, specify the file path directly
audio_file = "16000_pcm_speeches/Benjamin_Netanyau/3.wav"  # Replace this with your actual file path
main(audio_file)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402ms/step
Audio file: 16000_pcm_speeches/Benjamin_Netanyau/3.wav
Predicted Speaker: Benjamin_Netanyau
Confidence: 0.9996
