In [None]:
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
import torch
import librosa

# Define the inverse label map
inverse_label_map = {0: 'happy', 1: 'sad', 2: 'angry', 3: 'fearful', 4: 'disgusted', 5: 'surprised', 6: 'neutral'}

# Load the saved model and processor
model = Wav2Vec2ForSequenceClassification.from_pretrained("my_model")
processor = Wav2Vec2Processor.from_pretrained("my_model")

model.to('cuda')

def predict_emotion(audio_path):
    # Load the audio file
    speech, sr = librosa.load(audio_path, sr=16000)
    
    # Preprocess the audio file
    inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
    
    # Move the input to the appropriate device
    input_values = inputs.input_values.to('cuda')
    
    # Make predictions
    with torch.no_grad():
        logits = model(input_values).logits
    
    # Get the predicted class
    predicted_class = torch.argmax(logits, dim=1).item()
    
    # Map the predicted class to the emotion label
    predicted_emotion = inverse_label_map[predicted_class]
    
    return predicted_emotion


In [10]:
# Example usage
audio_path = 'recorded_audio.wav'
predicted_emotion = predict_emotion(audio_path)
print(f"The predicted emotion is: {predicted_emotion}")

The predicted emotion is: disgusted


In [None]:
import sounddevice as sd
import numpy as np
import wavio

def record_audio(duration, fs=16000):
    print("Recording...")
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
    sd.wait()  # Wait until recording is finished
    print("Recording finished.")
    audio = audio.flatten()
    return audio, fs

def save_audio(audio, fs, filename):
    wavio.write(filename, audio, fs, sampwidth=2)
    print(f"Audio saved as {filename}")

# Record audio for 5 seconds
duration = 5  # seconds
audio, fs = record_audio(duration)

# Save the recorded audio to a file
audio_path = 'recorded_audio.wav'
save_audio(audio, fs, audio_path)

# Move the model to the appropriate device
model.to('cuda')

# Predict the emotion of the recorded audio
predicted_emotion = predict_emotion(audio_path)
print(f"The predicted emotion is: {predicted_emotion}")


Recording...
Recording finished.
Audio saved as recorded_audio.wav
The predicted emotion is: disgusted
