In [15]:
import joblib
import sounddevice as sd
import numpy as np
import librosa
from sklearn import preprocessing
import soundfile as sf
import pandas as pd

# Cargar el modelo RandomForest
model_filename = './best_model.pkl'
loaded_model = joblib.load(model_filename)

# Función para preprocesar el audio
def preprocess_audio(audio):
    data = []
    y = audio
    sr = 16000

    # Calcular las características de audio utilizando librosa
    chroma_stft_mean = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    chroma_stft_var = np.var(librosa.feature.chroma_stft(y=y, sr=sr))
    rms_mean = np.mean(librosa.feature.rms(y=y))
    rms_var = np.var(librosa.feature.rms(y=y))
    spectral_centroid_mean = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_centroid_var = np.var(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_bandwidth_mean = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    spectral_bandwidth_var = np.var(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    # Agregar más características aquí...

    # Calcular las nuevas características
    rolloff_mean = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    rolloff_var = np.var(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zero_crossing_rate_mean = np.mean(librosa.feature.zero_crossing_rate(y))
    zero_crossing_rate_var = np.var(librosa.feature.zero_crossing_rate(y))
    harmony_mean = np.mean(librosa.effects.harmonic(y))
    harmony_var = np.var(librosa.effects.harmonic(y))
    perceptr_mean = np.mean(librosa.effects.percussive(y))
    perceptr_var = np.var(librosa.effects.percussive(y))
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Calcular los coeficientes MFCC
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfcc_means = np.mean(mfccs, axis=1)
    mfcc_vars = np.var(mfccs, axis=1)

    # Agregar los valores a la lista de datos
    data.append([chroma_stft_mean, chroma_stft_var, rms_mean, rms_var,
                 spectral_centroid_mean, spectral_centroid_var, spectral_bandwidth_mean, spectral_bandwidth_var,
                 rolloff_mean, rolloff_var, zero_crossing_rate_mean, zero_crossing_rate_var,
                 harmony_mean, harmony_var, perceptr_mean, perceptr_var,
                 tempo] + list(mfcc_means) + list(mfcc_vars))
    return data

# Función para predecir la etiqueta del audio y sus probabilidades
def predict_audio_label_and_proba(features):
    label = loaded_model.predict(features)
    proba = loaded_model.predict_proba(features)
    classes_mapping = loaded_model.classes_
    label_with_proba = list(zip(classes_mapping, proba[0]))
    return label, label_with_proba

# Función para capturar y clasificar el audio del micrófono
def classify_microphone_audio():
    sr = 16000  # Frecuencia de muestreo
    duration = 6  # Duración en segundos
    
    # Crear DataFrame con los datos preprocesados
    feature_names = ["chroma_stft_mean", "chroma_stft_var", "rms_mean", "rms_var",
                 "spectral_centroid_mean", "spectral_centroid_var", "spectral_bandwidth_mean", "spectral_bandwidth_var",
                 "rolloff_mean", "rolloff_var", "zero_crossing_rate_mean", "zero_crossing_rate_var",
                 "harmony_mean", "harmony_var", "perceptr_mean", "perceptr_var",
                 "tempo"] + [f'mfcc{i}_mean' for i in range(1, 21)] + [f'mfcc{i}_var' for i in range(1, 21)]
    
    print("Capturando audio del micrófono...")
    audio = sd.rec(int(sr * duration), samplerate=sr, channels=1)
    sd.wait()
    
    print("Preprocesando audio...")
    audio = np.squeeze(audio)
    features = preprocess_audio(audio)
    
    df = pd.DataFrame(features, columns=feature_names)
    
    print("Realizando predicción...")
    predicted_label, probabilities = predict_audio_label_and_proba(df)
    
    print("Audio clasificado como:", predicted_label)
    print("Probabilidades de predicción:")
    for label, prob in probabilities:
        print(f"{label}: {prob:.4f}")
    
     # Guardar el audio en un archivo WAV
    output_filename = "captured_audio.wav"
    sf.write(output_filename, audio, sr)
    print("Audio guardado como: ", output_filename)

classify_microphone_audio()

Capturando audio del micrófono...
Preprocesando audio...
Realizando predicción...
Audio clasificado como: ['Derecha']
Probabilidades de predicción:
Adelante: 0.2050
Atrás: 0.2290
Derecha: 0.3480
Izquierda: 0.2180
Audio guardado como:  captured_audio.wav


In [26]:
import joblib
#import sounddevice as sd
import numpy as np
import librosa
from sklearn import preprocessing
import soundfile as sf
import pandas as pd

# Cargar el modelo RandomForest
model_filename = './best_model.pkl'
loaded_model = joblib.load(model_filename)

# Función para preprocesar el audio
def preprocess_audio(audio):
    data = []
    
    y, sr = librosa.load(audio)

    # Calcular las características de audio utilizando librosa
    chroma_stft_mean = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    chroma_stft_var = np.var(librosa.feature.chroma_stft(y=y, sr=sr))
    rms_mean = np.mean(librosa.feature.rms(y=y))
    rms_var = np.var(librosa.feature.rms(y=y))
    spectral_centroid_mean = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_centroid_var = np.var(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_bandwidth_mean = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    spectral_bandwidth_var = np.var(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    # Agregar más características aquí...

    # Calcular las nuevas características
    rolloff_mean = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    rolloff_var = np.var(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zero_crossing_rate_mean = np.mean(librosa.feature.zero_crossing_rate(y))
    zero_crossing_rate_var = np.var(librosa.feature.zero_crossing_rate(y))
    harmony_mean = np.mean(librosa.effects.harmonic(y))
    harmony_var = np.var(librosa.effects.harmonic(y))
    perceptr_mean = np.mean(librosa.effects.percussive(y))
    perceptr_var = np.var(librosa.effects.percussive(y))
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Calcular los coeficientes MFCC
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfcc_means = np.mean(mfccs, axis=1)
    mfcc_vars = np.var(mfccs, axis=1)

    # Agregar los valores a la lista de datos
    data.append([chroma_stft_mean, chroma_stft_var, rms_mean, rms_var,
                 spectral_centroid_mean, spectral_centroid_var, spectral_bandwidth_mean, spectral_bandwidth_var,
                 rolloff_mean, rolloff_var, zero_crossing_rate_mean, zero_crossing_rate_var,
                 harmony_mean, harmony_var, perceptr_mean, perceptr_var,
                 tempo] + list(mfcc_means) + list(mfcc_vars))
    return data

# Función para predecir la etiqueta del audio y sus probabilidades
def predict_audio_label_and_proba(features):
    label = loaded_model.predict(features)
    proba = loaded_model.predict_proba(features)
    classes_mapping = loaded_model.classes_
    label_with_proba = list(zip(classes_mapping, proba[0]))
    return label, label_with_proba

# Función para capturar y clasificar el audio del micrófono
def classify_microphone_audio():
    sr = 16000  # Frecuencia de muestreo
    duration = 6  # Duración en segundos
    
    # Crear DataFrame con los datos preprocesados
    feature_names = ["chroma_stft_mean", "chroma_stft_var", "rms_mean", "rms_var",
                 "spectral_centroid_mean", "spectral_centroid_var", "spectral_bandwidth_mean", "spectral_bandwidth_var",
                 "rolloff_mean", "rolloff_var", "zero_crossing_rate_mean", "zero_crossing_rate_var",
                 "harmony_mean", "harmony_var", "perceptr_mean", "perceptr_var",
                 "tempo"] + [f'mfcc{i}_mean' for i in range(1, 21)] + [f'mfcc{i}_var' for i in range(1, 21)]
    
    print("Capturando audio del micrófono...")
    #audio = sd.rec(int(sr * duration), samplerate=sr, channels=1)
    #sd.wait()
    audio='./Prueba03/recording3.wav'
    #audio='./Audios-MI/Cou/recording11.wav'

    print("Preprocesando audio...")
    features = preprocess_audio(audio)
    
    df = pd.DataFrame(features, columns=feature_names)
    
    print("Realizando predicción...")
    predicted_label, probabilities = predict_audio_label_and_proba(df)
    
    print("Audio clasificado como:", predicted_label)
    print("Probabilidades de predicción:")
    for label, prob in probabilities:
        print(f"{label}: {prob:.4f}")
        
classify_microphone_audio()

Capturando audio del micrófono...
Preprocesando audio...
Realizando predicción...
Audio clasificado como: ['Derecha']
Probabilidades de predicción:
Adelante: 0.1470
Atrás: 0.1410
Derecha: 0.3890
Izquierda: 0.3230
