In [7]:
import numpy as np
import librosa
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.sequence import pad_sequences
import sounddevice as sd

# Load the trained model
model = load_model('model.h5')

# Load the label encoder classes
label_encoder_classes = np.load('label_encoder_classes.npy', allow_pickle=True)
label_encoder = LabelEncoder()
label_encoder.classes_ = label_encoder_classes

# Assuming you have a function to extract features from audio
def extract_features_from_audio(audio_data):
    mfccs = librosa.feature.mfcc(y=audio_data, sr=22050, n_mfcc=13, n_fft=1024)
    mfccs_padded = pad_sequences([mfccs.T], maxlen=100, padding='post', truncating='post')[0].T
    return mfccs_padded

# Function to handle microphone input
def callback(indata, frames, time, status):
    if status:
        print(status, flush=True)
    if indata.any():
        live_data.append(indata.copy())

# Function to process the recorded audio and make predictions
def process_audio():
    print("Recording stopped. Processing audio...")
    live_data_concatenated = np.concatenate(live_data, axis=0)
    live_data_features = extract_features_from_audio(live_data_concatenated[:, 0])
    live_data_features = np.expand_dims(live_data_features, axis=0)
    predictions = model.predict(live_data_features)
    predicted_labels = label_encoder.inverse_transform(np.argmax(predictions, axis=1))
    print(f"Predicted gender: {predicted_labels[0]}")

# Start microphone input stream
input("Press Enter to start recording...")
live_data = []
with sd.InputStream(callback=callback):
    print('Speak now...')
    sd.sleep(5000)  # Adjust the duration (in milliseconds) as needed
    print('Recording stopped.')
    process_audio()


Speak now...
Recording stopped.
Recording stopped. Processing audio...
Predicted gender: male
