In [13]:
# Core Audio Processing
import librosa
import soundfile as sf
import pyAudioAnalysis
from pyAudioAnalysis import audioTrainTest as aT
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import MidTermFeatures

# Numerical Processing
import numpy as np
import pandas as pd

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Visualization
import matplotlib.pyplot as plt

# System utilities
import os
import warnings
warnings.filterwarnings('ignore')

# Emotion (02): Encoded as:
# 01 = neutral
# 02 = calm
# 03 = happy
# 04 = sad
# 05 = angry
# 06 = fearful
# 07 = disgust
# 08 = surprised.

In [15]:
# Define data path
data_path = "data/audio/actor1"
files = []

# Iterate through each file and extract features
for file in os.listdir(data_path):
    if file.endswith(".wav"):
        file_path = os.path.join(data_path, file)
        # Parse labels from filename
        parts = file.split("-")
        emotion = int(parts[2])  # Extract emotion from filename
        actor = int(parts[-1].split(".")[0])  # Extract actor ID
        files.append({"file_path": file_path, "emotion": emotion, "actor": actor})

# Create DataFrame
data = pd.DataFrame(files)
print(data.head(20))

                                     file_path  emotion  actor
0   data/audio/actor1/03-01-08-02-02-01-01.wav        8      1
1   data/audio/actor1/03-01-08-01-01-01-01.wav        8      1
2   data/audio/actor1/03-01-05-01-02-01-01.wav        5      1
3   data/audio/actor1/03-01-06-01-02-02-01.wav        6      1
4   data/audio/actor1/03-01-06-02-01-02-01.wav        6      1
5   data/audio/actor1/03-01-05-02-01-01-01.wav        5      1
6   data/audio/actor1/03-01-07-01-01-01-01.wav        7      1
7   data/audio/actor1/03-01-04-01-01-02-01.wav        4      1
8   data/audio/actor1/03-01-04-02-02-02-01.wav        4      1
9   data/audio/actor1/03-01-07-02-02-01-01.wav        7      1
10  data/audio/actor1/03-01-03-02-02-02-01.wav        3      1
11  data/audio/actor1/03-01-03-01-01-02-01.wav        3      1
12  data/audio/actor1/03-01-02-02-01-01-01.wav        2      1
13  data/audio/actor1/03-01-01-01-02-02-01.wav        1      1
14  data/audio/actor1/03-01-02-01-02-01-01.wav        2

In [16]:
# this will give you the exact type of emotion the audio is displaying

emotion_dict = {
    1: "neutral",
    2: "calm",
    3: "happy",
    4: "sad",
    5: "angry",
    6: "fearful",
    7: "disgust",
    8: "surprised"
}

data["emotion_label"] = data["emotion"].map(emotion_dict)


In [17]:
print(data.head())

                                    file_path  emotion  actor emotion_label
0  data/audio/actor1/03-01-08-02-02-01-01.wav        8      1     surprised
1  data/audio/actor1/03-01-08-01-01-01-01.wav        8      1     surprised
2  data/audio/actor1/03-01-05-01-02-01-01.wav        5      1         angry
3  data/audio/actor1/03-01-06-01-02-02-01.wav        6      1       fearful
4  data/audio/actor1/03-01-06-02-01-02-01.wav        6      1       fearful


In [21]:
# used to gather feautrues that will be used on the X test model
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, duration=2.5, offset=0.5)
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    # Compute mean of each coefficient
    mfccs_mean = mfccs.mean(axis=1)
    return mfccs_mean

# Add features to DataFrame
data["features"] = data["file_path"].apply(extract_features)

print(data.head())

                                    file_path  emotion  actor emotion_label  \
0  data/audio/actor1/03-01-08-02-02-01-01.wav        8      1     surprised   
1  data/audio/actor1/03-01-08-01-01-01-01.wav        8      1     surprised   
2  data/audio/actor1/03-01-05-01-02-01-01.wav        5      1         angry   
3  data/audio/actor1/03-01-06-01-02-02-01.wav        6      1       fearful   
4  data/audio/actor1/03-01-06-02-01-02-01.wav        6      1       fearful   

                                            features  
0  [-514.731, 62.937397, -6.146049, 10.8779125, 0...  
1  [-602.8393, 72.09309, -0.67978925, 11.220813, ...  
2  [-506.93576, 63.804733, -2.4667234, 16.526533,...  
3  [-527.7185, 80.91389, -7.1170683, 21.611485, 7...  
4  [-330.14752, 54.85853, -23.322374, -1.3208342,...  


In [22]:

# Prepare features and labels
X = np.array(data["features"].tolist())
y = data["emotion"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [20]:
# Train the model
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=list(emotion_dict.values())))

              precision    recall  f1-score   support

     neutral       0.00      0.00      0.00         1
        calm       0.67      1.00      0.80         2
       happy       0.00      0.00      0.00         1
         sad       0.25      1.00      0.40         1
       angry       0.50      1.00      0.67         1
     fearful       0.00      0.00      0.00         1
     disgust       1.00      1.00      1.00         1
   surprised       1.00      0.50      0.67         4

    accuracy                           0.58        12
   macro avg       0.43      0.56      0.44        12
weighted avg       0.59      0.58      0.53        12



Understanding the Model Used Chat to get a simple paragraph below

Your model is trying to recognize emotions like "neutral," "calm," and "happy," but it’s only getting some of them right. It’s great at recognizing "disgust" and okay with "calm" and "angry," but it completely failed to identify "neutral," "happy," and "fearful." Overall, it got 58% of things correct, which isn’t bad but shows room for improvement. The model struggles with emotions that might sound similar or didn’t have enough examples during training. To make it better, you might need more balanced data or better features to help the model understand tricky emotions.
