In [1]:
!pip install librosa soundfile numpy pandas scikit-learn matplotlib


Defaulting to user installation because normal site-packages is not writeable
Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting soundfile
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.1.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-1.0.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
   ---------------------------------------- 0.0/260.7 kB ? eta -:--:--
   ------------------ --------------------- 122.9/260.7 kB 3.6 MB/s eta 0:00:01
   ------------------------------- -------- 204.8/260.7 kB 2.5 MB/s eta 0:00:01
   ---------------------------------------- 260.7/260.7 kB 2.0 MB/s eta 0:00:00
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
  

In [6]:
import os
import librosa
import numpy as np
import pandas as pd
import glob

emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fear',
    '07': 'disgust',
    '08': 'surprise'
}

def extract_mfcc(path):
    audio, sr = librosa.load(path, duration=3, offset=0.5)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    return np.mean(mfcc.T, axis=0)

rows = []

folder = "audio/" 

files = glob.glob(f"{folder}/**/*.wav", recursive=True)
print("Total wav files found:", len(files))

for file_path in files:
    file = os.path.basename(file_path)

    parts = file.replace("_", "-").split('-')

    emotion_code = parts[2]
    intensity = parts[3]
    statement = parts[4]
    repetition = parts[5]
    actor_id = parts[6].split('.')[0]

    gender = "female" if int(actor_id) % 2 == 0 else "male"

    mfcc_features = extract_mfcc(file_path)

    row = {
        "emotion": emotion_map[emotion_code],
        "gender": gender,
        "actor_id": int(actor_id),
        "intensity": intensity,
        "statement": statement,
        "repetition": repetition
    }

    for i, val in enumerate(mfcc_features):
        row[f"mfcc_{i+1}"] = val

    rows.append(row)

df = pd.DataFrame(rows)
print(df.head())


Total wav files found: 2880


  "class": algorithms.Blowfish,


   emotion gender  actor_id intensity statement repetition      mfcc_1  \
0  neutral   male         1        01        01         01 -670.195435   
1  neutral   male         1        01        01         02 -660.230347   
2  neutral   male         1        01        02         01 -661.964478   
3  neutral   male         1        01        02         02 -657.722351   
4     calm   male         1        01        01         01 -694.579590   

      mfcc_2    mfcc_3     mfcc_4  ...   mfcc_31   mfcc_32   mfcc_33  \
0  65.063850  0.888954  14.715979  ... -2.351098 -2.504727 -3.151507   
1  63.325817 -2.630457  17.983355  ... -1.786414 -3.113372 -2.556752   
2  66.655869 -0.932158  14.899042  ... -2.264493 -2.643650 -2.937167   
3  65.035187  3.148672  15.666511  ... -2.918577 -2.849612 -3.591487   
4  72.531715  3.104562  17.112118  ... -2.507130 -1.405873 -2.290346   

    mfcc_34   mfcc_35   mfcc_36   mfcc_37   mfcc_38   mfcc_39   mfcc_40  
0 -2.190899 -3.801760 -1.813088 -1.261222 -2.144

In [14]:
df.to_csv("AudioToDataset.csv")

In [15]:
df.isna().sum()


emotion       0
gender        0
actor_id      0
intensity     0
statement     0
repetition    0
mfcc_1        0
mfcc_2        0
mfcc_3        0
mfcc_4        0
mfcc_5        0
mfcc_6        0
mfcc_7        0
mfcc_8        0
mfcc_9        0
mfcc_10       0
mfcc_11       0
mfcc_12       0
mfcc_13       0
mfcc_14       0
mfcc_15       0
mfcc_16       0
mfcc_17       0
mfcc_18       0
mfcc_19       0
mfcc_20       0
mfcc_21       0
mfcc_22       0
mfcc_23       0
mfcc_24       0
mfcc_25       0
mfcc_26       0
mfcc_27       0
mfcc_28       0
mfcc_29       0
mfcc_30       0
mfcc_31       0
mfcc_32       0
mfcc_33       0
mfcc_34       0
mfcc_35       0
mfcc_36       0
mfcc_37       0
mfcc_38       0
mfcc_39       0
mfcc_40       0
dtype: int64

In [None]:
from sklearn.preprocessing import LabelEncoder
X = df.filter(like="mfcc").values   
y = df["emotion"].values            


le = LabelEncoder()
y = le.fit_transform(y)
