In [2]:
pip install tensorflow librosa numpy pandas scikit-learn gradio

Collecting tensorflow
  Using cached tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting librosa
  Using cached librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting gradio
  Downloading gradio-5.20.0-py3-none-any.whl.metadata (16 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Using cached tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting 

In [8]:
pip install numpy==1.26.4

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Downloading numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
   ---------------------------------------- 0.0/15.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.5 MB ? eta -:--:--
    --------------------------------------- 0.3/15.5 MB ? eta -:--:--
   -- ------------------------------------- 0.8/15.5 MB 1.7 MB/s eta 0:00:09
   --- ------------------------------------ 1.3/15.5 MB 2.4 MB/s eta 0:00:06
   ---- ----------------------------------- 1.8/15.5 MB 2.4 MB/s eta 0:00:06
   ----- ---------------------------------- 2.1/15.5 MB 2.3 MB/s eta 0:00:06
   ------ --------------------------------- 2.6/15.5 MB 2.1 MB/s eta 0:00:07
   ------- -------------------------------- 2.9/15.5 MB 2.1 MB/s eta 0:00:07
   -------- ------------------------------- 3.1/15.5 MB 2.1 MB/s eta 0:00:07
   --------- ------------------------------ 3.7/15.5 MB 2.0 MB/s eta 0:00:06
   ---------- ----

  You can safely remove it manually.
  You can safely remove it manually.

[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
import gradio as gr
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from collections import Counter

# Configuration
DATA_PATH = "C:\\Users\\Sakthi\\Downloads\\archive"  # Update with your RAVDESS dataset path
MODEL_PATH = "emotion_voicemodel_s1.h5"
SAMPLE_RATE = 48000  # RAVDESS uses 48kHz
DURATION = 3  # seconds (adjust as needed)
NUM_MFCC = 40
MAX_PAD_LEN = 174  # Adjust based on your dataset

# RAVDESS emotion mapping
EMOTIONS = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

# Create a label encoder for emotions
EMOTION_TO_INT = {emotion: idx for idx, emotion in enumerate(EMOTIONS.values())}
INT_TO_EMOTION = {idx: emotion for emotion, idx in EMOTION_TO_INT.items()}

# Function to extract MFCC features
def extract_mfcc(file_path):
    audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=NUM_MFCC)
    pad_width = MAX_PAD_LEN - mfccs.shape[1]
    if pad_width < 0:
        mfccs = mfccs[:, :MAX_PAD_LEN]
    else:
        mfccs = np.pad(mfccs, pad_width=((0,0), (0, pad_width)), mode='constant')
    # Normalize MFCCs
    mfccs = (mfccs - np.mean(mfccs)) / np.std(mfccs)
    return mfccs

# Function to load RAVDESS dataset
def load_dataset():
    features = []
    labels = []
    
    # Loop through each actor folder
    for actor_folder in os.listdir(DATA_PATH):
        actor_path = os.path.join(DATA_PATH, actor_folder)
        if os.path.isdir(actor_path):  # Ensure it's a folder
            print(f"Processing actor: {actor_folder}")
            
            # Loop through each audio file in the actor folder
            for audio_file in os.listdir(actor_path):
                if audio_file.endswith('.wav'):
                    try:
                        # Parse filename
                        parts = audio_file.split('-')
                        print(f"Filename parts: {parts}")  # Debug print
                        modality = parts[0]
                        vocal_channel = parts[1]
                        emotion_code = parts[2]
                        intensity = parts[3]
                        statement = parts[4]
                        repetition = parts[5]
                        actor = parts[6].split('.')[0]  # Remove .wav extension
                        
                        # Only use speech audio files
                        if modality == '03' and vocal_channel == '01':
                            emotion = EMOTIONS.get(emotion_code, 'unknown')
                            if emotion != 'unknown':
                                file_path = os.path.join(actor_path, audio_file)
                                print(f"Processing file: {file_path} -> Emotion: {emotion}")
                                mfcc = extract_mfcc(file_path)
                                features.append(mfcc)
                                labels.append(EMOTION_TO_INT[emotion])  # Convert emotion to integer
                    except Exception as e:
                        print(f"Error processing {audio_file}: {str(e)}")
    
    print(f"Total files processed: {len(features)}")
    return np.array(features), np.array(labels)

# Function to create the model
def create_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    return model

# Train and save model
def train_model():
    X, y = load_dataset()
    X = X[..., np.newaxis]  # Add channel dimension
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = create_model((NUM_MFCC, MAX_PAD_LEN, 1), len(EMOTIONS))
    history = model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test))
    
    # Check predictions on the test set
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)
    for i in range(10):  # Print first 10 predictions
        print(f"Predicted: {INT_TO_EMOTION[y_pred_labels[i]]}, Actual: {INT_TO_EMOTION[y_test[i]]}")
    
    model.save(MODEL_PATH)

# Uncomment to train the model
train_model()

# Load pre-trained model
model = models.load_model(MODEL_PATH)

# Function to predict emotion
def predict_emotion(audio_file):
    # Process audio
    mfcc = extract_mfcc(audio_file)
    mfcc = mfcc[np.newaxis, ..., np.newaxis]  # Add batch and channel dimensions
    
    # Predict
    predictions = model.predict(mfcc)
    predicted_index = np.argmax(predictions)
    emotion = INT_TO_EMOTION[predicted_index]  # Convert integer to emotion string
    return emotion

# Create Gradio interface
interface = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Label(num_top_classes=3),
    title="RAVDESS Speech Emotion Recognition",
    description="Upload an audio file (~3s speech) or record using microphone to detect emotion.",
    examples=[
        ["examples/03-01-03-01-02-01-12.wav"],  # Happy example
        ["examples/03-01-05-02-01-02-07.wav"],  # Angry example
        ["examples/03-01-04-01-02-01-18.wav"]   # Sad example
    ]
)

interface.launch()

Processing actor: Actor_01
Filename parts: ['03', '01', '01', '01', '01', '01', '01.wav']
Processing file: C:\Users\Sakthi\Downloads\archive\Actor_01\03-01-01-01-01-01-01.wav -> Emotion: neutral
Filename parts: ['03', '01', '01', '01', '01', '02', '01.wav']
Processing file: C:\Users\Sakthi\Downloads\archive\Actor_01\03-01-01-01-01-02-01.wav -> Emotion: neutral
Filename parts: ['03', '01', '01', '01', '02', '01', '01.wav']
Processing file: C:\Users\Sakthi\Downloads\archive\Actor_01\03-01-01-01-02-01-01.wav -> Emotion: neutral
Filename parts: ['03', '01', '01', '01', '02', '02', '01.wav']
Processing file: C:\Users\Sakthi\Downloads\archive\Actor_01\03-01-01-01-02-02-01.wav -> Emotion: neutral
Filename parts: ['03', '01', '02', '01', '01', '01', '01.wav']
Processing file: C:\Users\Sakthi\Downloads\archive\Actor_01\03-01-02-01-01-01-01.wav -> Emotion: calm
Filename parts: ['03', '01', '02', '01', '01', '02', '01.wav']
Processing file: C:\Users\Sakthi\Downloads\archive\Actor_01\03-01-02-01-0



Predicted: disgust, Actual: disgust
Predicted: calm, Actual: calm
Predicted: fearful, Actual: calm
Predicted: calm, Actual: calm
Predicted: angry, Actual: angry
Predicted: calm, Actual: calm
Predicted: sad, Actual: calm
Predicted: fearful, Actual: fearful
Predicted: fearful, Actual: surprised
Predicted: calm, Actual: calm




* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\Sakthi\anaconda3\Lib\site-packages\gradio\queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Sakthi\anaconda3\Lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Sakthi\anaconda3\Lib\site-packages\gradio\blocks.py", line 2108, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Sakthi\anaconda3\Lib\site-packages\gradio\blocks.py", line 1655, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Sakthi\anaconda3\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thre

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
