# Emotion Detection from Voice using RAVDESS
This notebook loads audio files, extracts features, trains a model, and performs predictions.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Installing dependencies
!pip install librosa scikit-learn resampy

Collecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)
Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: resampy
Successfully installed resampy-0.4.3


In [4]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

In [5]:
# Feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

In [6]:

emotions = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}
X, y = [], []

dataset_path = '/content/drive/MyDrive/emotion_detection_notebook/ravdess'

print(f"Attempting to load data from: {dataset_path}")

if not os.path.exists(dataset_path):
    print(f"Error: Dataset path not found at {dataset_path}")

else:
    processed_count = 0
    skipped_count = 0
    error_count = 0
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                try:

                    parts = file.split('-')
                    if len(parts) > 2:
                        emotion_code = parts[2]
                        emotion = emotions.get(emotion_code)
                        if emotion:
                            features = extract_features(file_path)
                            X.append(features)
                            y.append(emotion)
                            processed_count += 1

                        else:

                            print(f"Skipping file with unknown emotion code {emotion_code}: {file}")
                            skipped_count += 1
                    else:
                        print(f"Skipping file with unexpected name format: {file}")
                        skipped_count += 1
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")
                    error_count += 1

    print(f"\nFinished data loading:")
    print(f"  Processed files: {processed_count}")
    print(f"  Skipped files: {skipped_count}")
    print(f"  Files with errors: {error_count}")
    print(f"  Total samples loaded (X, y): {len(X)}")

# Check if any data was loaded
if len(X) == 0:
    print("\nNo audio files loaded or processed.")
    print("Please check your dataset_path and file naming conventions.")
    print("Ensure the RAVDESS dataset is correctly placed in the specified path.")
elif len(X) < 2:
     print("\nOnly one sample loaded. Cannot perform train/test split.")
     print("Please ensure your dataset contains more than one audio file.")
else:
    # Train the model
    print("\nData loaded successfully. Proceeding with model training.")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Train set size: {len(X_train)}")
    print(f"Test set size: {len(X_test)}")

    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    joblib.dump(model, "emotion_model.pkl")
    print("Model trained and saved to emotion_model.pkl")

Attempting to load data from: /content/drive/MyDrive/emotion_detection_notebook/ravdess

Finished data loading:
  Processed files: 1440
  Skipped files: 0
  Files with errors: 0
  Total samples loaded (X, y): 1440

Data loaded successfully. Proceeding with model training.
Train set size: 1152
Test set size: 288
Accuracy: 0.6701388888888888
Model trained and saved to emotion_model.pkl


In [7]:

!pip install librosa scikit-learn resampy



In [8]:

!pip install librosa scikit-learn resampy streamlit

Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [9]:

!pip install librosa scikit-learn resampy streamlit



In [10]:
import streamlit as st
import numpy as np
import librosa
import joblib
import os
import soundfile as sf
from sklearn.preprocessing import LabelEncoder

# Loading the trained model using joblib
model = joblib.load("emotion_model.pkl")

# Emotion labels used during training
EMOTIONS = ['angry', 'calm', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
encoder = LabelEncoder()
encoder.fit(EMOTIONS)

def extract_features(file_path):
    try:
        X, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        result = []

        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(y=X, sr=sample_rate).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T, axis=0)

        result.extend(mfccs)
        result.extend(chroma)
        result.extend(mel)

        return np.array(result)
    except Exception as e:
        print(f"Error processing audio file: {e}")
        return None

In [None]:
import IPython.display as ipd
from google.colab import files

def extract_features(file_path):
    # Loading the audio file
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

uploaded = files.upload()

for file_name in uploaded.keys():
    temp_path = file_name
    print(f"Playing {temp_path}")
    ipd.display(ipd.Audio(temp_path))
    features = extract_features(temp_path)

    if features is not None:

        features = features.reshape(1, -1)
        prediction = model.predict(features)
        predicted_emotion = prediction[0]
        print(f"Predicted Emotion: {predicted_emotion.capitalize()}")
    else:
        print("Could not extract features.")

Saving 03-01-02-02-02-02-24.wav to 03-01-02-02-02-02-24 (1).wav
Playing 03-01-02-02-02-02-24 (1).wav


Predicted Emotion: Calm
