<a href="https://colab.research.google.com/github/DimosAndronoudis/RT-Speech-Emotion-Recognition/blob/main/Real_Time_Speech_Emotion_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install sounddevice librosa scikit-learn joblib --quiet

import os
import librosa
import numpy as np
import sounddevice as sd
from scipy.io.wavfile import write
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import joblib


In [None]:
# Emotion label map based on RAVDESS naming convention
emotion_map = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

In [None]:
# Feature extraction
def extract_features(file):
    y, sr = librosa.load(file)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfccs.T, axis=0)

# Train the model using RAVDESS

def train_model():
    print("📁 Training model from uploaded RAVDESS samples...")
    data_dir = "ravdess"
    features, labels = [], []
    for file in os.listdir(data_dir):
        if file.endswith(".wav"):
            emotion_code = file.split("-")[2]
            emotion = emotion_map.get(emotion_code)
            if emotion:
                path = os.path.join(data_dir, file)
                feat = extract_features(path)
                features.append(feat)
                labels.append(emotion)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
    clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500)
    clf.fit(X_train, y_train)
    print("Accuracy on test set:")
    print(classification_report(y_test, clf.predict(X_test)))
    joblib.dump(clf, "emotion_model.pkl")
    print("Model saved as emotion_model.pkl")

# Real-time recording

def record_audio(duration=3, fs=22050, filename='realtime.wav'):
    print(f"🎙 Recording for {duration} seconds...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    write(filename, fs, recording)
    print("✅ Done recording.")

# Real-time emotion prediction

def predict_emotion():
    record_audio()
    features = extract_features('realtime.wav').reshape(1, -1)
    model = joblib.load('emotion_model.pkl')
    prediction = model.predict(features)[0]
    print(f'Predicted emotion: {prediction}')

# Uploading files
from google.colab import files

def upload_files():
    print("Upload your RAVDESS .wav files into a 'ravdess' folder.")
    os.makedirs("ravdess", exist_ok=True)
    uploaded = files.upload()
    for name in uploaded.keys():
        os.rename(name, os.path.join("ravdess", name))
    print("Files uploaded and moved to 'ravdess/'")

# Entry point for Colab
print("\n Ready for real-time emotion recognition in Colab")
print("1. Run upload_files() to upload RAVDESS WAVs")
print("2. Run train_model() to build your model")
print("3. Run predict_emotion() to start live predictions")
