# Import Libraries and mount a drive

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')
import torchaudio
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import Wav2Vec2Model, Wav2Vec2Processor, Trainer , TrainingArguments, Wav2Vec2BertForSequenceClassification

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
folder_path = "/content/drive/MyDrive/voice/archive (9)/TESS Toronto emotional speech set data"


In [None]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Set dataset path
dataset_path = "/content/drive/MyDrive/voice/archive (9)/TESS Toronto emotional speech set data"

# Extract features

In [None]:
# Function to extract MFCC features
def extract_features(file_path, max_len=40):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfcc = np.mean(mfcc.T, axis=0)
    return mfcc

In [None]:
# Load dataset
emotions = []
features = []

for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            if file.endswith(".wav"):
                file_path = os.path.join(folder_path, file)
                feature = extract_features(file_path)
                features.append(feature)
                emotions.append(folder.split('_')[1])  # Extract emotion from folder name

# Covnert DataFrame

In [None]:
# Convert to DataFrame
data = pd.DataFrame(features)
data['emotion'] = emotions

# Encode target labels
le = LabelEncoder()
data['emotion'] = le.fit_transform(data['emotion'])

# Split Data

In [None]:
# Split data
X = data.drop(columns=['emotion'])
y = data['emotion']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
import joblib
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [None]:
# Train Models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=500),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}


# Model Train

In [None]:
# Model Training & Evaluation
import joblib
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n{name} Accuracy: {acc * 100:.2f}%")
    print(classification_report(y_test, y_pred, target_names=le.classes_))

    # Save model
    model_filename = f"{name.replace(' ', '_')}.pkl"
    joblib.dump(model, model_filename)
    print(f"{name} model saved as {model_filename}")



Random Forest Accuracy: 99.46%
              precision    recall  f1-score   support

        Fear       1.00      0.97      0.99        40
    Pleasant       1.00      0.96      0.98        47
         Sad       1.00      1.00      1.00        49
       angry       0.99      1.00      0.99        70
     disgust       1.00      1.00      1.00        89
        fear       1.00      1.00      1.00        30
       happy       0.97      1.00      0.99        77
     neutral       1.00      1.00      1.00        82
    pleasant       1.00      1.00      1.00        37
         sad       1.00      1.00      1.00        39

    accuracy                           0.99       560
   macro avg       1.00      0.99      0.99       560
weighted avg       0.99      0.99      0.99       560

Random Forest model saved as Random_Forest.pkl

Logistic Regression Accuracy: 99.46%
              precision    recall  f1-score   support

        Fear       1.00      1.00      1.00        40
    Pleasant   

In [None]:
# Function to predict emotion from new audio file
def predict_emotion(audio_file, model):
    feature = extract_features(audio_file).reshape(1, -1)
    feature = scaler.transform(feature)
    pred = model.predict_proba(feature)[0] * 100  # Get probability
    emotion_confidence = dict(zip(le.classes_, pred))
    return emotion_confidence

# Model Testing

In [None]:
emotion_scores = predict_emotion("//content/drive/MyDrive/voice/archive (9)/TESS Toronto emotional speech set data/OAF_disgust/OAF_back_disgust.wav", models['XGBoost'])
print(emotion_scores)


{'Fear': np.float32(0.0018480427), 'Pleasant': np.float32(0.0010227247), 'Sad': np.float32(0.0030001756), 'angry': np.float32(0.00082780677), 'disgust': np.float32(99.98108), 'fear': np.float32(0.0058437856), 'happy': np.float32(0.0012023264), 'neutral': np.float32(0.0019782935), 'pleasant': np.float32(0.0014203935), 'sad': np.float32(0.001788409)}


In [None]:
pip install gradio

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

# Gradio Dashboard

In [None]:
# Install dependencies if needed
# !pip install gradio librosa xgboost scikit-learn matplotlib

import gradio as gr
import librosa
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

# from google.colab import drive
# drive.mount('/content/drive')

# # Set dataset path from Google Drive
# dataset_path = "/content/drive/MyDrive/voice/archive (9)/TESS Toronto emotional speech set data"

# ----------- Feature Extraction ------------
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=3, offset=0.5)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

# ----------- Load and Process Dataset ------------
def load_data():
    features = []
    emotions = []

    for dirpath, _, filenames in os.walk(dataset_path):
        for file in filenames:
            if file.endswith('.wav'):
                emotion = file.split('_')[-1].replace('.wav', '')
                file_path = os.path.join(dirpath, file)
                feature = extract_features(file_path)
                features.append(feature)
                emotions.append(emotion)

    X = np.array(features)
    y = np.array(emotions)

    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42), le, scaler

# Load dataset and train models
(X_train, X_test, y_train, y_test), label_encoder, scaler = load_data()

models = {
    "Random Forest": RandomForestClassifier(),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

trained_models = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    trained_models[name] = model

# ----------- Prediction Function ------------
import pandas as pd

def predict_emotion(audio_file, model_name):
    if model_name not in trained_models:
        return "Model not found.", None

    model = trained_models[model_name]
    feature = extract_features(audio_file)
    feature_scaled = scaler.transform([feature])
    proba = model.predict_proba(feature_scaled)[0]
    emotion_labels = label_encoder.inverse_transform(np.arange(len(proba)))

    # Create DataFrame for sorting and table output
    df = pd.DataFrame({
        "Emotion": emotion_labels,
        "Confidence (%)": (proba * 100).round(2)
    }).sort_values(by="Confidence (%)", ascending=False).reset_index(drop=True)

    # Bar chart with unique colors
    fig, ax = plt.subplots()
    bar_colors = plt.cm.Set3(np.linspace(0, 1, len(proba)))
    ax.bar(df["Emotion"], df["Confidence (%)"], color=bar_colors)
    ax.set_ylabel('Probability (%)')
    ax.set_title('Emotion Prediction Confidence')
    ax.set_ylim([0, 100])
    plt.xticks(rotation=45)

    return df, fig

# ----------- Gradio Interface ------------
interface = gr.Interface(
    fn=predict_emotion,
    inputs=[
        gr.Audio(type="filepath", label="Upload Audio (.wav)"),
        gr.Dropdown(choices=list(trained_models.keys()), label="Select Model")
    ],
    outputs=[
        gr.Text(label="Prediction Table"),
        gr.Plot(label="Emotion Probabilities (Bar Chart)")
    ],
    title="🎧 Voice Emotion Recognition Dashboard",
    description="Upload a .wav file and select a model to get emotion prediction with confidence chart."
)


interface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://85b27ffcc415367e1c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


