In [None]:
pip install numpy librosa tqdm

Note: you may need to restart the kernel to use updated packages.


**Imports**

In [None]:
import librosa
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

**Feature Extraction & Labelling**

In [None]:
def extract_mfcc_from_folder(audio_folder, label, n_mfcc=13, save_folder='mfcc_features'):
    os.makedirs(save_folder, exist_ok=True)
    mfcc_features = []
    mfcc_labels = []
    mfcc_filenames = []

    audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.wav')]

    for idx, filename in tqdm(enumerate(audio_files), total=len(audio_files)):
        file_path = os.path.join(audio_folder, filename)
        try:
            y, sr = librosa.load(file_path, sr=None)


            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            delta = librosa.feature.delta(mfcc)
            delta2 = librosa.feature.delta(mfcc, order=2)

            combined = np.vstack([mfcc, delta, delta2])
            combined_mean = np.mean(combined.T, axis=0)

            mfcc_features.append(combined_mean)
            mfcc_labels.append(label)
            mfcc_filenames.append(filename)

            # np.save(os.path.join(save_folder, f"{label}_{idx}.npy"), combined_mean)

        except Exception as e:
            print(f"Error with file {filename}: {e}")

    return np.array(mfcc_features), np.array(mfcc_labels), mfcc_filenames


In [None]:
from google.colab import drive
drive.mount('/content/drive')

**`Check if the path has the files ot not `**

In [None]:
path = r"C:\Users\N\Desktop\Haneen\semester_6\machine\Neutral_segmented-20250528T193702Z-1-001\Neutral_segmented" #change it to your own unzip folder path
print(os.listdir(path))

**Run The Function**

In [None]:
mfcc_features_N, mfcc_labels_N, mfcc_filenames_N = extract_mfcc_from_folder(
    audio_folder=r"C:\Users\N\Desktop\Haneen\semester_6\machine\Neutral_segmented-20250528T193702Z-1-001\Neutral_segmented",#change it to your own unzip folder path
    label="Neutral")#change el label to your emotion name

['neutral_aehhs_2492.wav', 'neutral_aehhs_2496.wav', 'neutral_aehhs_2497.wav', 'neutral_aehhs_2498.wav', 'neutral_aehhs_2501.wav', 'neutral_aehhs_2502.wav', 'neutral_aehhs_2503.wav', 'neutral_aehhs_2504.wav', 'neutral_aehhs_2510.wav', 'neutral_aehhs_2512.wav', 'neutral_aehhs_2517.wav', 'neutral_aehhs_2519.wav', 'neutral_aehhs_2520.wav', 'neutral_aehhs_2521.wav', 'neutral_aehhs_2522.wav', 'neutral_aehhs_2523.wav', 'neutral_aehhs_2525.wav', 'neutral_aehhs_2526.wav', 'neutral_aehhs_2527.wav', 'neutral_aehhs_2528.wav', 'neutral_aehhs_2530.wav', 'neutral_aehhs_2531.wav', 'neutral_aehhs_2532.wav', 'neutral_aehhs_2533.wav', 'neutral_aehhs_2534.wav', 'neutral_aehhs_2535.wav', 'neutral_aehhs_2536.wav', 'neutral_aehhs_2537.wav', 'neutral_aehhs_2538.wav', 'neutral_aehhs_2539.wav', 'neutral_aehhs_2540.wav', 'neutral_aehhs_2541.wav', 'neutral_aehhs_2542.wav', 'neutral_aehhs_2543.wav', 'neutral_aehhs_2544.wav', 'neutral_aehhs_2545.wav', 'neutral_aehhs_2546.wav', 'neutral_aehhs_2547.wav', 'neutral_ae

100%|██████████████████████████████████████████████████████████████████████████████| 5377/5377 [04:42<00:00, 19.03it/s]


**Make DataFrame**

In [None]:
df_N = pd.DataFrame(mfcc_features_N)
df_N['Label'] = mfcc_labels_N
df_N['File_Name']= mfcc_filenames_N
df_N

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,Label
0,-287.913910,137.617432,-48.041115,38.547523,-30.853277,14.626295,-19.773479,10.971231,2.434737,-2.971322,...,-0.072514,-0.007822,-0.056774,-0.032238,-0.000300,-0.042035,-0.024750,0.041571,0.029707,Neutral
1,-298.555847,156.171265,-44.378727,45.230450,-20.420759,15.900339,-8.539274,6.644521,6.259509,5.089580,...,0.024012,-0.025456,0.003031,-0.026426,-0.030050,-0.025796,-0.005405,0.024771,-0.031764,Neutral
2,-290.951080,153.542328,-45.234261,52.115536,-17.793053,22.480532,-0.441563,10.776849,7.421404,-0.233248,...,0.039484,-0.026120,0.011511,-0.034229,-0.003729,-0.004957,-0.017778,0.010978,-0.033453,Neutral
3,-247.839432,140.404282,-46.182117,54.252762,-36.094250,26.255520,-7.008230,11.256819,0.517748,2.440896,...,-0.018490,-0.019183,0.011821,0.032661,0.002949,-0.007409,-0.012526,0.019094,0.001497,Neutral
4,-263.575897,152.486343,-47.668419,43.964325,-25.803938,24.279560,-4.939576,11.105044,2.900494,0.552982,...,0.023680,-0.007798,-0.011303,-0.019882,-0.024838,-0.013442,-0.022744,-0.016016,0.003047,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5372,-498.436279,159.237000,7.903052,20.321167,25.864687,-1.748588,-7.840082,-10.808754,-7.788122,3.985581,...,-0.027343,0.039525,0.013109,0.090988,0.085414,-0.018278,-0.004096,0.030486,0.016958,Neutral
5373,-486.132080,170.607025,-13.703200,15.693749,30.111231,-6.446225,-10.353929,-8.388937,-7.234899,9.532722,...,-0.063137,0.071231,0.022671,0.023750,0.079690,0.020236,-0.025677,-0.016374,-0.003171,Neutral
5374,-486.100220,169.614944,1.905995,19.538179,34.465797,-2.659995,-14.332887,-11.590310,-5.092456,4.568852,...,-0.173831,0.020889,0.080085,0.031625,0.009646,-0.029498,-0.040934,-0.000679,0.007721,Neutral
5375,-561.171631,101.968582,12.137526,27.572105,35.925968,7.706303,-3.144443,-1.490591,3.301049,8.773347,...,-0.091937,-0.002488,0.000100,-0.030865,0.007337,0.031579,0.013130,0.019702,0.016550,Neutral


**Save the csv file**

In [None]:
output_csv = "SER_N.csv" #change the name of the csv _emotion first char
df_N.to_csv(output_csv, index=False, encoding='utf-8')
print(f"CSV saved to: {output_csv}")
#download the csv that in jupyter and upload it on our Machine Learning Project on drive