### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import librosa

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data = pd.read_csv('Data\\Audio_Paths.csv')
data.head()

Unnamed: 0,path,aggressiveness,gender
0,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
1,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
2,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
3,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
4,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0


In [3]:
data.shape

(3737, 3)

In [4]:
def extract_mfcc(signal, sample_rate=22050, n_mfcc=13, n_fft=2048, hop_length=512):
    mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return np.mean(mfccs.T, axis=0)

In [5]:
# Create an empty list to store DataFrames
dfs = []

# Loop through each row in the 'data' DataFrame
for index, row in data.iterrows():
    audio_path = row['path']
    emotion_label = row['aggressiveness']
    
    # Load audio file using librosa
    signal,_ = librosa.load(audio_path, duration=3, offset=0.5, res_type='kaiser_fast')
    #signal, sample_rate = librosa.load(audio_path, sr=None)
    
    # Extract MFCCs
    mfccs = extract_mfcc(signal)
    
    # Create a DataFrame for the current row
    mfcc_row = dict(zip([f'mfcc_{i+1}' for i in range(13)], mfccs))

    # Extract Prosodic Features
    mfcc_row['Energy'] = np.sum(np.abs(signal)**2)
    mfcc_row['RMS Energy'] = librosa.feature.rms(y=signal)[0].mean()
    mfcc_row['Intensity'] = np.max(np.abs(signal))
    mfcc_row['Tempo'] = librosa.beat.beat_track(y=signal, sr=22050)[0]
    mfcc_row['aggressiveness'] = emotion_label
    df_row = pd.DataFrame([mfcc_row])
    
    # Append the DataFrame to the list
    dfs.append(df_row)

# Concatenate all DataFrames in the list
result_df = pd.concat(dfs, ignore_index=True)

In [6]:
result_df.head()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,Energy,RMS Energy,Intensity,Tempo,aggressiveness
0,-529.325562,56.663666,-16.689976,10.855888,-11.516223,-2.507931,-7.963574,-9.494851,-8.698105,-0.036879,-6.527728,-0.497543,-8.345603,11.823792,0.008446,0.158349,129.199219,1
1,-524.566345,46.049992,-16.909632,8.005425,-15.289064,-5.817219,-12.548594,-13.388479,-13.296664,-3.257975,-8.200781,-7.511373,-6.971085,16.5928,0.009717,0.193986,103.359375,1
2,-578.781677,55.583351,-28.597879,14.529802,-9.815604,-9.733731,-16.221111,-8.463774,-11.442408,3.533854,-13.033244,-1.500476,-7.094681,3.107532,0.00466,0.073526,129.199219,1
3,-528.167175,66.707397,-13.227976,17.01947,-7.077435,0.111632,1.81646,-6.667839,-7.203587,-0.767913,-7.369389,-2.316086,-6.510938,20.11928,0.011225,0.160906,151.999081,1
4,-478.662994,87.138832,-18.95657,15.749694,-7.600982,-5.742515,-18.325741,-18.957066,-12.290488,0.335987,-11.2963,-1.78571,-2.543035,19.658274,0.011913,0.105626,95.703125,1


In [7]:
result_df.tail()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,Energy,RMS Energy,Intensity,Tempo,aggressiveness
3732,-613.702148,139.660461,20.008741,51.572868,7.680692,1.41538,-2.625942,4.672389,-5.61787,-9.063158,-3.266775,-1.783394,1.193115,2.549872,0.006316,0.056227,89.102909,0
3733,-390.29303,138.370102,13.124186,45.005394,23.323807,0.625612,-28.448282,-6.591035,0.921653,-8.413386,-4.140822,-1.116925,-2.868539,352.784302,0.06678,0.366272,198.768029,0
3734,-399.292175,133.214966,27.326626,65.400734,13.676147,1.513119,-21.796337,-8.974982,-2.538327,-2.774736,0.336699,1.082639,0.850621,271.614258,0.06047,0.291087,135.999178,0
3735,-350.537628,127.240997,13.690526,54.141895,-4.421245,-12.553501,-4.786981,-6.029202,-17.027908,-1.308019,-1.887837,-1.837568,-2.846936,303.919342,0.053687,0.515085,143.554688,0
3736,-626.564514,137.321152,31.624714,38.048393,11.922296,6.42589,-5.977657,-7.365975,-10.130214,-11.877729,-7.921663,-0.222549,2.371424,2.297165,0.004622,0.036617,215.332031,0


In [8]:
# Save the final DataFrame to a CSV file
result_df.to_csv('Data\\Extracted_Features.csv', index=False)