In [1]:
!pip install librosa gradio scikit-learn matplotlib


Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [3]:
from google.colab import files
files.upload()  # Select the `kaggle.json` file


Saving kaggle (3).json to kaggle (3).json


{'kaggle (3).json': b'{"username":"cyberwarriyo","key":"d5ef2841e7b558d30aaf60e19bd02ea8"}'}

In [7]:
# Step 1: Rename the uploaded file for compatibility
!mv "kaggle(3).json" kaggle.json

# Step 2: Create the .kaggle folder and move the token there
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Step 3: Install the Kaggle CLI if not already installed
!pip install -q kaggle

# Step 4: Download the RAVDESS dataset from Kaggle
!kaggle datasets download -d uwrfkaggler/ravdess-emotional-speech-audio

# Step 5: Unzip the downloaded dataset
!unzip -q ravdess-emotional-speech-audio.zip -d ravdess

# Optional: List files to verify
!ls ravdess


mv: cannot stat 'kaggle (3).json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio
License(s): CC-BY-NC-SA-4.0
ravdess-emotional-speech-audio.zip: Skipping, found more recently modified local copy (use --force to force download)
replace ravdess/Actor_01/03-01-01-01-01-01-01.wav? [y]es, [n]o, [A]ll, [N]one, [r]ename: Actor_01  Actor_06  Actor_11  Actor_16	Actor_21
Actor_02  Actor_07  Actor_12  Actor_17	Actor_22
Actor_03  Actor_08  Actor_13  Actor_18	Actor_23
Actor_04  Actor_09  Actor_14  Actor_19	Actor_24
Actor_05  Actor_10  Actor_15  Actor_20	audio_speech_actors_01-24


In [8]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [9]:
# Emotion codes from RAVDESS filenames
emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}


In [10]:
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, duration=3, offset=0.5)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean


In [11]:
def load_data(base_dir='ravdess'):
    X, y = [], []
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".wav"):
                emotion_code = file.split("-")[2]
                emotion = emotion_map.get(emotion_code)
                if emotion:
                    file_path = os.path.join(root, file)
                    features = extract_features(file_path)
                    X.append(features)
                    y.append(emotion)
    return np.array(X), np.array(y)

X, y = load_data()
print(f"Loaded {len(X)} samples.")


Loaded 2880 samples.


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# Accuracy report
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

       angry       1.00      0.94      0.97        67
        calm       0.82      1.00      0.90        74
     disgust       0.93      0.95      0.94        83
     fearful       0.95      0.95      0.95        75
       happy       1.00      0.92      0.96        72
     neutral       0.93      0.69      0.79        39
         sad       0.95      0.92      0.93        76
   surprised       0.89      0.93      0.91        90

    accuracy                           0.93       576
   macro avg       0.93      0.91      0.92       576
weighted avg       0.93      0.93      0.93       576



In [13]:
import gradio as gr

def predict_emotion(audio_file):
    features = extract_features(audio_file)
    prediction = model.predict([features])[0]
    return f"Predicted Emotion: {prediction}"

interface = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Audio(type="filepath", label="Upload or Record Your Voice"),
    outputs="text",
    title="🎤 Voice Emotion Detector",
    description="Detect emotions like happy, sad, angry, etc. from your voice!"
)

interface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4d0b0439ec156ce87d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


