# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import librosa
import os

In [2]:
data = pd.read_csv('Data\\Audio_Paths.csv')

In [3]:
data.head()

Unnamed: 0,path,aggressiveness,gender
0,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
1,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
2,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
3,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
4,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0


In [4]:
data.shape

(3737, 3)

In [5]:
def extract_mfcc(signal, sample_rate=22050, n_mfcc=13, n_fft=2048, hop_length=512):
    mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return np.mean(mfccs.T, axis=0)

In [12]:
# Create an empty list to store DataFrames
dfs = []

# Loop through each row in the 'data' DataFrame
for index, row in data.iterrows():
    audio_path = row['path']
    emotion_label = row['aggressiveness']
    
    # Load audio file using librosa
    signal, sample_rate = librosa.load(audio_path, sr=None)
    
    # Extract MFCCs
    mfccs = extract_mfcc(signal)
    
    # Create a DataFrame for the current row
    mfcc_row = dict(zip([f'mfcc_{i+1}' for i in range(13)], mfccs))
    mfcc_row['aggressiveness'] = emotion_label
    df_row = pd.DataFrame([mfcc_row])
    
    # Append the DataFrame to the list
    dfs.append(df_row)

# Concatenate all DataFrames in the list
result_df = pd.concat(dfs, ignore_index=True)

In [13]:
result_df.head()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,aggressiveness
0,-581.183044,61.775311,0.334518,8.486256,0.788418,11.279878,-6.216891,0.534891,2.526564,-0.257104,-0.805913,-0.91409,-2.37179,1
1,-574.668396,56.525585,-2.960935,8.754062,1.412463,7.794606,-5.879593,-1.438899,0.365388,-1.025561,-1.278394,-1.151001,-9.105648,1
2,-628.37793,69.173576,-5.179301,4.144491,-2.561467,12.802252,-1.245774,0.469771,1.913076,-0.159788,-1.885625,-7.068508,-4.552937,1
3,-577.23938,67.032509,5.281436,11.227827,3.101099,18.982237,-5.887117,1.355586,6.083599,1.585905,2.887235,3.16688,-1.952211,1
4,-540.839233,89.908417,8.10426,13.206378,1.85744,19.507902,-5.437987,3.933926,7.808056,0.80663,1.848405,-6.200817,-10.122912,1


In [14]:
result_df.tail()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,aggressiveness
3732,-661.857849,126.180717,32.795216,27.33172,38.340611,27.26009,20.444166,11.823015,3.446167,5.137197,6.194908,5.017681,4.732757,0
3733,-474.226379,121.006851,36.91412,23.742472,26.392221,22.669628,29.842886,24.106079,6.605823,1.159015,1.772958,1.598283,1.133266,0
3734,-475.885284,126.34127,21.797455,29.238136,42.882542,26.361355,28.54917,25.477819,8.707728,3.775768,1.275185,-1.682813,1.236163,0
3735,-437.635437,110.068451,21.290802,25.345015,42.493927,29.043299,14.357561,4.049996,3.74454,7.662204,1.872068,-0.824959,1.152825,0
3736,-687.12146,122.328445,36.989338,29.465334,33.644844,22.4174,21.621902,16.884426,7.653037,5.970741,3.264637,1.300912,3.126155,0


In [None]:
# Save the final DataFrame to a CSV file
result_df.to_csv('Data\\Extracted_MFCCs.csv', index=False)