In [18]:
import os
import wave
import numpy as np
import librosa
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [2]:
def generate_silent_wav(filepath, duration_sec=1, sample_rate=44100, channels=1, noise_std=5):
    """
    Generates a WAV file that simulates realistic "silence" by adding a very low-level noise floor.
    This mimics the slight background noise from an open mic when no one is speaking.
    
    Parameters:
        filepath (str): The full path where the WAV file will be saved.
        duration_sec (float): Duration of the audio in seconds.
        sample_rate (int): Sampling rate.
        channels (int): Number of channels.
        noise_std (int): Standard deviation (in integer amplitude units) of the noise.
                         Adjust this value to simulate more or less background noise.
    """
    n_samples = int(sample_rate * duration_sec)
    
    # Generate a noise floor: Gaussian noise centered at 0 with a small standard deviation.
    # This simulates the "ambient" electronic noise of a microphone.
    silence = np.random.normal(loc=0, scale=noise_std, size=n_samples * channels)
    silence = silence.astype(np.int16)
    
    with wave.open(filepath, 'w') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(2)  # 16-bit audio: 2 bytes per sample
        wf.setframerate(sample_rate)
        wf.writeframes(silence.tobytes())

def generate_null_audios(n_files=3, duration=1.0):
    """
    Generates a number of silent audio files in Audios/Audios_Null.
    
    Parameters:
        n_files (int): Number of null audio files to create.
        duration (float): Duration in seconds for each audio.
    """
    base_folder = "Audios"
    null_folder = os.path.join(base_folder, "Audios_Null")
    os.makedirs(null_folder, exist_ok=True)
    
    for i in range(n_files):
        for n in range(5):
            file_name = f"null-0{i+1}_0{n+1}.wav"
            file_path = os.path.join(null_folder, file_name)
            generate_silent_wav(file_path, duration_sec=duration)
            print(f"Generated: {file_path}")

In [3]:
def load_audios_into_dataframe(root_folder="Audios", valid_extensions=('.wav', '.mp3', '.m4a', '.flac', '.ogg', '.wma', '.aac')):
    """
    Recursively loads all audio files from the given root folder into a DataFrame.
    
    Each row includes:
      - filepath: Full path to the file.
      - filename: File name.
      - label: Name of the immediate parent folder.
      - audio: Audio data as a numpy array.
      - sample_rate: Sampling rate of the loaded audio.
      - duration: Duration of the audio in seconds.
    """
    audio_files = []
    for dirpath, _, filenames in os.walk(root_folder):
        for filename in filenames:
            if filename.lower().endswith(valid_extensions):
                full_path = os.path.join(dirpath, filename)
                audio_files.append(full_path)
    
    data = []
    for filepath in audio_files:
        filename = os.path.basename(filepath)
        label = os.path.basename(os.path.dirname(filepath))
        try:
            audio_data, sample_rate = librosa.load(filepath, sr=None)
            duration = librosa.get_duration(y=audio_data, sr=sample_rate)
        except Exception as e:
            print(f"Error loading {filepath}: {e}")
            audio_data, sample_rate, duration = None, None, None
        
        data.append({
            'filepath': filepath,
            'filename': filename,
            'label': label,
            'audio': audio_data,
            'sample_rate': sample_rate,
            'duration': duration
        })
    
    return pd.DataFrame(data)

In [4]:
def add_noise(audio, noise_factor=0.005):
    """Adds white noise to the audio."""
    noise = np.random.randn(len(audio))
    return audio + noise_factor * noise

def apply_time_stretch(audio, rate=1.2):
    """
    Stretches (or compresses) the audio in time.
    A rate > 1.0 speeds up the audio; rate < 1.0 slows it down.
    """
    return librosa.effects.time_stretch(audio, rate=rate)

def apply_pitch_shift(audio, sr, n_steps=2):
    """
    Shifts the pitch of the audio by n_steps semitones.
    """
    return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)

def change_volume(audio, factor=1.5):
    """Changes the volume by multiplying the audio signal."""
    return audio * factor

def augment_audio(audio, sr):
    """
    Applies a set of augmentations to an audio sample.
    
    Returns:
        List of tuples: Each tuple contains (augmentation_method, augmented_audio).
    """
    augmentations = []
    
    # Original (no augmentation)
    augmentations.append(('original', audio))
    
    # Add Noise
    noise_audio = add_noise(audio, noise_factor=0.005)
    augmentations.append(('noise', noise_audio))
    
    # Time Stretch (speed up)
    try:
        ts_audio = apply_time_stretch(audio, rate=1.2)
        augmentations.append(('time_stretch', ts_audio))
    except Exception as e:
        print(f"Error in time stretching: {e}")
    
    # Pitch Shift (raise pitch)
    try:
        ps_audio = apply_pitch_shift(audio, sr, n_steps=2)
        augmentations.append(('pitch_shift', ps_audio))
    except Exception as e:
        print(f"Error in pitch shifting: {e}")
    
    # Volume Change (increase volume)
    vol_audio = change_volume(audio, factor=1.5)
    augmentations.append(('volume_up', vol_audio))

    # Volume Change (increase volume)
    vol_audio = change_volume(audio, factor=0.5)
    augmentations.append(('volume_down', vol_audio))
    
    return augmentations

def create_augmented_dataframe(df):
    """
    Given a DataFrame of loaded audios, applies augmentations to each sample and
    creates a new DataFrame that includes augmented versions.
    
    An additional column 'aug_method' indicates the augmentation applied.
    """
    augmented_data = []
    for idx, row in df.iterrows():
        audio = row['audio']
        sr = row['sample_rate']
        label = row['label']
        base_filepath = row['filepath']
        filename = row['filename']
        
        aug_list = augment_audio(audio, sr)
        for aug_method, aug_audio in aug_list:
            try:
                duration = librosa.get_duration(y=aug_audio, sr=sr)
            except Exception as e:
                print(f"Error calculating duration for {filename} ({aug_method}): {e}")
                duration = None
            
            augmented_data.append({
                'original_filepath': base_filepath,
                'filename': filename,
                'label': label,
                'aug_method': aug_method,
                'audio': aug_audio,
                'sample_rate': sr,
                'duration': duration
            })
    
    return pd.DataFrame(augmented_data)

In [5]:
def extract_features_from_audio(audio, sr):
    """
    Extracts a set of audio features from an already-loaded audio array.
    
    Features include:
      - duration, zero crossing rate, spectral centroid, spectral rolloff,
      - 13 MFCCs (mean values), RMS energy, and tempo.
    """
    features = {}
    features['duration'] = librosa.get_duration(y=audio, sr=sr)
    
    # Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(audio)
    features['zero_crossing_rate'] = float(np.mean(zcr))
    
    # Spectral Centroid
    spec_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
    features['spectral_centroid'] = float(np.mean(spec_centroid))
    
    # Spectral Rolloff
    spec_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    features['spectral_rolloff'] = float(np.mean(spec_rolloff))
    
    # MFCCs (first 13 coefficients)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    for i in range(13):
        features[f'mfcc_{i+1}'] = float(np.mean(mfccs[i]))
    
    # RMS Energy
    rms = librosa.feature.rms(y=audio)
    features['rms'] = float(np.mean(rms))
    
    # Tempo (BPM)
    try:
        tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
        if isinstance(tempo, (list, np.ndarray)):
            features['tempo'] = float(tempo[0])
        elif isinstance(tempo, str):
            features['tempo'] = float(tempo.strip("[]"))
        else:
            features['tempo'] = float(tempo)
    except Exception as e:
        print(f"Error computing tempo: {e}")
        features['tempo'] = None

    return features

def create_features_from_augmented_dataframe(df_augmented):
    """
    Given a DataFrame of augmented audios, extract audio features for each record.
    
    Returns:
        DataFrame: Contains metadata along with the extracted features.
    """
    feature_data = []
    for idx, row in df_augmented.iterrows():
        audio = row['audio']
        sr = row['sample_rate']
        features = extract_features_from_audio(audio, sr)
        
        record = {
            'original_filepath': row['original_filepath'],
            'filename': row['filename'],
            'label': row['label'],
            'aug_method': row['aug_method']
        }
        record.update(features)
        feature_data.append(record)
    
    return pd.DataFrame(feature_data)

Extraccion de caracteristicas

In [6]:
def extract_features(file_path):
    """
    Loads an audio file and extracts several audio features,
    ensuring that numerical features (like tempo) are stored as floats.
    """
    try:
        audio, sr = librosa.load(file_path, sr=None)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

    features = {}
    features['duration'] = librosa.get_duration(y=audio, sr=sr)
    features['sample_rate'] = sr

    # Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(audio)
    features['zero_crossing_rate'] = float(np.mean(zcr))

    # Spectral Centroid
    spec_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
    features['spectral_centroid'] = float(np.mean(spec_centroid))

    # Spectral Rolloff
    spec_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    features['spectral_rolloff'] = float(np.mean(spec_rolloff))

    # MFCCs (first 13 coefficients)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    for i in range(13):
        features[f'mfcc_{i+1}'] = float(np.mean(mfccs[i]))

    # RMS Energy
    rms = librosa.feature.rms(y=audio)
    features['rms'] = float(np.mean(rms))

    # Tempo (BPM)
    try:
        tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
        # Check if tempo is not a plain number
        if isinstance(tempo, (list, np.ndarray)):
            features['tempo'] = float(tempo[0])
        elif isinstance(tempo, str):
            # Remove any brackets and convert to float
            features['tempo'] = float(tempo.strip("[]"))
        else:
            features['tempo'] = float(tempo)
    except Exception as e:
        print(f"Error computing tempo for {file_path}: {e}")
        features['tempo'] = None

    return features

In [7]:
def find_audio_files(root_folder, valid_extensions=('.wav', '.mp3', '.m4a', '.flac', '.ogg', '.wma', '.aac')):
    """
    Recursively finds all audio files in the specified folder.
    
    Parameters:
        root_folder (str): The folder in which to search.
        valid_extensions (tuple): Allowed audio file extensions.
    
    Returns:
        list: Full file paths of found audio files.
    """
    audio_files = []
    for dirpath, _, filenames in os.walk(root_folder):
        for filename in filenames:
            if filename.lower().endswith(valid_extensions):
                full_path = os.path.join(dirpath, filename)
                audio_files.append(full_path)
    return audio_files

In [8]:
def create_features_dataframe(root_folder="Audios"):
    """
    Extracts audio features for all files under the given folder and returns a DataFrame.
    
    Each row includes the file metadata and the extracted features.
    
    Parameters:
        root_folder (str): Base folder containing audio files.
    
    Returns:
        DataFrame: DataFrame with audio features.
    """
    audio_files = find_audio_files(root_folder)
    data = []
    
    for file_path in audio_files:
        #print(f"Extracting features from: {file_path}")
        features = extract_features(file_path)
        if features is not None:
            label = os.path.basename(os.path.dirname(file_path))
            record = {
                "filepath": file_path,
                "filename": os.path.basename(file_path),
                "label": label
            }
            record.update(features)
            data.append(record)
    
    df = pd.DataFrame(data)
    return df

In [10]:

# Step 1: Generate Null (Silent) Audio Files
print("=== Generating Null Audio Files ===")
generate_null_audios(n_files=3, duration=1.0)

# Step 2: Load All Audio Files into a DataFrame
print("\n=== Loading Audio Files ===")
df_audios = load_audios_into_dataframe("Audios")
print("Loaded Audios DataFrame:")
print(df_audios.head())
    
# Step 3: Create Augmented Versions of Each Audio
print("\n=== Creating Augmented Audio Versions ===")
df_augmented = create_augmented_dataframe(df_audios)
print("Augmented Audios DataFrame:")
print(df_augmented.head())
    
#Step 4: Extract Features from Each Audio File
print("\n=== Extracting Audio Features ===")
df_features = create_features_from_augmented_dataframe(df_augmented)
print("Extracted Audio Features DataFrame:")
print(df_features.head())

#Step 5: Save into CSV
df_audios.to_csv("loaded_audios.csv", index=False)
df_augmented.to_csv("augmented_audios.csv", index=False)
df_features.to_csv("extracted_audio_features.csv", index=False)
print("\nDataFrames have been saved to CSV files.")


=== Generating Null Audio Files ===
Generated: Audios/Audios_Null/null-01_01.wav
Generated: Audios/Audios_Null/null-01_02.wav
Generated: Audios/Audios_Null/null-01_03.wav
Generated: Audios/Audios_Null/null-01_04.wav
Generated: Audios/Audios_Null/null-01_05.wav
Generated: Audios/Audios_Null/null-02_01.wav
Generated: Audios/Audios_Null/null-02_02.wav
Generated: Audios/Audios_Null/null-02_03.wav
Generated: Audios/Audios_Null/null-02_04.wav
Generated: Audios/Audios_Null/null-02_05.wav
Generated: Audios/Audios_Null/null-03_01.wav
Generated: Audios/Audios_Null/null-03_02.wav
Generated: Audios/Audios_Null/null-03_03.wav
Generated: Audios/Audios_Null/null-03_04.wav
Generated: Audios/Audios_Null/null-03_05.wav

=== Loading Audio Files ===


  audio_data, sample_rate = librosa.load(filepath, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Loaded Audios DataFrame:
                      filepath         filename label  \
0  Audios/Luis/LuisG-03_02.m4a  LuisG-03_02.m4a  Luis   
1  Audios/Luis/LuisG-03_03.m4a  LuisG-03_03.m4a  Luis   
2  Audios/Luis/LuisG-01_04.m4a  LuisG-01_04.m4a  Luis   
3  Audios/Luis/LuisG-03_01.m4a  LuisG-03_01.m4a  Luis   
4  Audios/Luis/LuisG-01_05.m4a  LuisG-01_05.m4a  Luis   

                                               audio  sample_rate  duration  
0  [-0.00064086914, -0.00036621094, -0.0005493164...        48000  2.281333  
1  [-0.0016479492, -0.0016784668, -0.0018615723, ...        48000  2.644000  
2  [0.0005187988, 0.00045776367, 0.00033569336, 0...        48000  4.073333  
3  [-0.00033569336, -0.00024414062, -0.0004272461...        48000  2.324000  
4  [-3.0517578e-05, -0.00064086914, -0.0006713867...        48000  3.156000  

=== Creating Augmented Audio Versions ===
Augmented Audios DataFrame:
             original_filepath         filename label    aug_method  \
0  Audios/Luis/LuisG-0

In [11]:
def parse_filename(filename):
    # Remove file extension if present
    base = os.path.splitext(filename)[0]
    # Assume the format is Name-Phrase_ID
    try:
        name_phrase, recording_id = base.split('_')
        name, phrase = name_phrase.split('-')
    except ValueError:
        raise ValueError(f"Filename {filename} does not match expected format 'Name-Phrase_ID'")
    return name, phrase, recording_id

# Example usage:
filename = "LuisG-03_02.wav"
speaker, phrase, rec_id = parse_filename(filename)
print(speaker, phrase, rec_id)  # Output: LuisG 03 02

LuisG 03 02


In [12]:
def add_metadata_from_filename(df):
    speakers, phrases = [], []
    for fn in df['filename']:
        speaker, phrase, _ = parse_filename(fn)
        speakers.append(speaker)
        phrases.append(phrase)
    df['speaker'] = speakers
    df['phrase'] = phrases
    return df

In [39]:
df_features = pd.read_csv("extracted_audio_features.csv")

# Extract speaker and phrase from the filename and add them as columns
speakers, phrases = [], []
for fn in df_features['filename']:
    speaker, phrase, _ = parse_filename(fn)
    speakers.append(speaker)
    phrases.append(phrase)
df_features['speaker'] = speakers
df_features['phrase'] = phrases

# -----------------------------------------------------------------------------
# Step 1: Define the Feature Columns
# -----------------------------------------------------------------------------
# Adjust this list as needed to match the features you have extracted.
feature_columns = ['zero_crossing_rate', 'spectral_centroid',
                   'spectral_rolloff', 'rms', 'tempo'] + [f'mfcc_{i+1}' for i in range(13)]

# -----------------------------------------------------------------------------
# Step 2: Train One Model per Phrase and Store Data for Cross Testing
# -----------------------------------------------------------------------------
phrase_models = {}  # Will hold the trained model for each phrase
phrase_data = {}    # Will hold the X, y data for each phrase (all samples, not split)

for phrase in ["01", "02", "03"]:
    # Filter the DataFrame for the current phrase
    df_phrase = df_features[df_features['phrase'] == phrase]
    X = df_phrase[feature_columns]
    y = df_phrase['speaker']
    
    # Save the data for cross testing later
    phrase_data[phrase] = (X, y)
    
    # For the in-phrase evaluation, split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train a RandomForest classifier (you can choose another classifier if desired)
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    
    # In-phrase evaluation (model trained on phrase, tested on the same phrase)
    print(f"=== In-Phrase Evaluation for Phrase {phrase} ===")
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    
    # Save the trained model
    phrase_models[phrase] = clf

# -----------------------------------------------------------------------------
# Step 3: Cross Testing: Evaluate Each Model on Data from All Phrases
# -----------------------------------------------------------------------------
print("\n=== Cross Testing Models on Other Phrases ===")
for train_phrase, model in phrase_models.items():
    print(f"\nModel trained on phrase {train_phrase}:")
    for test_phrase, (X_test, y_test) in phrase_data.items():
        y_pred = model.predict(X_test)
        print(f"\n  Testing on phrase {test_phrase}:")
        print(classification_report(y_test, y_pred))

=== In-Phrase Evaluation for Phrase 01 ===
              precision    recall  f1-score   support

      AdriaM       1.00      1.00      1.00         4
        AleM       1.00      1.00      1.00         3
       BetoM       1.00      1.00      1.00         5
       Bruce       1.00      1.00      1.00         4
      Camila       1.00      1.00      1.00         5
       Cielo       1.00      1.00      1.00         5
      Daniel       0.92      1.00      0.96        12
       David       1.00      1.00      1.00         6
      Didier       0.86      1.00      0.92         6
         Eri       1.00      1.00      1.00        12
        Erik       1.00      1.00      1.00         9
    Fernando       1.00      1.00      1.00         7
        Irma       1.00      1.00      1.00         6
       Jorge       1.00      1.00      1.00         6
       LuisG       1.00      1.00      1.00         3
       Maria       1.00      1.00      1.00         6
     Mariana       1.00      1.00     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [40]:
# After training each model, export it:
for phrase, model in phrase_models.items():
    filename = f"model_phrase_{phrase}.pkl"
    joblib.dump(model, filename)
    print(f"Model for phrase {phrase} exported to {filename}")

Model for phrase 01 exported to model_phrase_01.pkl
Model for phrase 02 exported to model_phrase_02.pkl
Model for phrase 03 exported to model_phrase_03.pkl


In [46]:
import time
import numpy as np
import sounddevice as sd
import joblib

# -------------------------------
# Minimal Recording Function
# -------------------------------
def record_audio(duration=4, sample_rate=44100):
    """
    Records audio from the microphone for a given duration.
    
    Returns:
        audio (np.ndarray): Recorded 1D audio array.
        sample_rate (int): The sampling rate.
    """
    #print(f"Recording for {duration} seconds...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()  # Wait until recording finishes
    return np.squeeze(audio), sample_rate

# -------------------------------
# Minimal Model Loading and Prediction Functions
# -------------------------------
def load_model(model_filename):
    """
    Loads a pre-trained model from disk.
    """
    return joblib.load(model_filename)

def predict_speaker(model, feature_vector):
    """
    Predicts the speaker from the feature vector using the loaded model.
    
    Args:
        model: Pre-trained classifier.
        feature_vector (np.ndarray): 1D array of features.
        
    Returns:
        The predicted speaker (string).
    """
    # The model expects a 2D array: (1, number_of_features)
    prediction = model.predict(feature_vector)
    return prediction[0]

# -------------------------------
# Main Real-Time Inference Loop
# -------------------------------
def main_inference_loop():
    # Load your pre-trained model (assumed to be saved already)
    model_1 = load_model("model_phrase_01.pkl")
    model_2 = load_model("model_phrase_02.pkl")
    model_3 = load_model("model_phrase_03.pkl")
    
    # FEATURE_COLUMNS is assumed to be defined in your notebook
    # For example:
    # FEATURE_COLUMNS = ['duration', 'zero_crossing_rate', 'spectral_centroid', 'spectral_rolloff',
    #                    'rms', 'tempo'] + [f'mfcc_{i+1}' for i in range(13)]
    
    print("Starting real-time inference. Speak into your microphone...")
    try:
        while True:
            # 1. Record audio from the mic.
            audio, sr = record_audio()
            
            # 2. Extract features using your already-defined function.
            # This function should return a dictionary mapping feature names to values.
            features = extract_features_from_audio(audio, sr)
            
            # 3. Create a feature vector in the same order as used during training.
            feature_vector = pd.DataFrame([features], columns=feature_columns)
            #print(feature_vector.head())
            # 4. Get the prediction from the model.
            predicted_speaker_1 = predict_speaker(model_1, feature_vector)
            predicted_speaker_2 = predict_speaker(model_2, feature_vector)
            predicted_speaker_3 = predict_speaker(model_3, feature_vector)
            print("Predicted Speaker 1:", predicted_speaker_1)
            print("Predicted Speaker 2:", predicted_speaker_1)
            print("Predicted Speaker 3:", predicted_speaker_1)
            # Optional: pause briefly before the next recording.
            time.sleep(0.5)
    except KeyboardInterrupt:
        print("Real-time inference stopped.")

# Run the inference loop if executed as a script or in a notebook cell.
if __name__ == "__main__":
    main_inference_loop()


Starting real-time inference. Speak into your microphone...
Predicted Speaker 1: Jorge
Predicted Speaker 2: Jorge
Predicted Speaker 3: Jorge
Predicted Speaker 1: null
Predicted Speaker 2: null
Predicted Speaker 3: null
Predicted Speaker 1: Jorge
Predicted Speaker 2: Jorge
Predicted Speaker 3: Jorge
Predicted Speaker 1: Jorge
Predicted Speaker 2: Jorge
Predicted Speaker 3: Jorge
Predicted Speaker 1: Jorge
Predicted Speaker 2: Jorge
Predicted Speaker 3: Jorge
Predicted Speaker 1: Jorge
Predicted Speaker 2: Jorge
Predicted Speaker 3: Jorge
Predicted Speaker 1: Didier
Predicted Speaker 2: Didier
Predicted Speaker 3: Didier
Predicted Speaker 1: Erik
Predicted Speaker 2: Erik
Predicted Speaker 3: Erik
Predicted Speaker 1: Didier
Predicted Speaker 2: Didier
Predicted Speaker 3: Didier
Real-time inference stopped.


In [34]:
%%capture
!pip install sounddevice

In [57]:
# Load your saved audio file (ensure the path is correct)
audio_path = "Testing_Audios/TestJ2.m4a"
audio, sr = librosa.load(audio_path, sr=None)

# Use your existing function to extract features from the audio.
# This function should return a dictionary with keys matching FEATURE_COLUMNS.
features = extract_features_from_audio(audio, sr)

# Convert the feature dictionary to a DataFrame so the feature names are preserved.
feature_vector = pd.DataFrame([features], columns=feature_columns)

# Load your pre-trained model (assumed to be exported using joblib)
model_1 = load_model("model_phrase_01.pkl")
model_2 = load_model("model_phrase_02.pkl")
model_3 = load_model("model_phrase_03.pkl")

# Predict the speaker using the model.
predicted_speaker1 = model_1.predict(feature_vector)
predicted_speaker2 = model_2.predict(feature_vector)
predicted_speaker3 = model_3.predict(feature_vector)

print("Predicted Speaker1:", predicted_speaker1)
print("Predicted Speaker2:", predicted_speaker2)
print("Predicted Speaker3:", predicted_speaker3)

Predicted Speaker1: ['Jorge']
Predicted Speaker2: ['Jorge']
Predicted Speaker3: ['Jorge']


  audio, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
