### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import librosa

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data = pd.read_csv('Data\\Audio_Paths.csv')
data.head()

Unnamed: 0,path,aggressiveness,gender
0,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
1,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
2,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
3,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0
4,Audio Dataset/Female/Aggressive\03-01-05-01-01...,1,0


In [3]:
data.shape

(3737, 3)

In [4]:
# data augmentation
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def shift(data):
    s_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, s_range)

def pitch(data, sampling_rate, n_steps=0.01):
    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=n_steps)

def stretch(data, rate=0.9):
    return librosa.effects.time_stretch(y=data, rate=rate)

In [5]:
from IPython.display import Audio

In [6]:
path = 'denoised__agg_audio.wav'
audio_data, sampling_rate = librosa.load(path)
Audio(path)

In [7]:
noise_data = noise(audio_data)
Audio(noise_data, rate=sampling_rate)

In [8]:
shift_data = shift(audio_data)
Audio(shift_data, rate=sampling_rate)

In [9]:
pitch_data = pitch(audio_data,22050)
Audio(pitch_data, rate=sampling_rate)

In [10]:
stretch_data = stretch(audio_data)
Audio(stretch_data, rate=sampling_rate)

In [11]:
def extract_mfcc(signal, sample_rate=22050, n_mfcc=13, n_fft=2048, hop_length=512):
    mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return np.mean(mfccs.T, axis=0)

In [12]:
# Create an empty list to store DataFrames
dfs = []

# Loop through each row in the 'data' DataFrame
for index, row in data.iterrows():
    audio_path = row['path']
    emotion_label = row['aggressiveness']
    
    # Load audio file using librosa
    signal,_ = librosa.load(audio_path, duration=3, offset=0.5, res_type='kaiser_fast', sr=22050)

    # Data Augmentation
    noise_data = noise(signal)
    shift_data = shift(signal)
    pitch_data = pitch(signal, 22050)
    stretch_data = stretch(signal)

    for i, audio in enumerate([signal, noise_data, shift_data, pitch_data, stretch_data]):
        # Extract MFCCs
        mfccs = extract_mfcc(audio)

        # Create a DataFrame for the current row
        mfcc_row = dict(zip([f'mfcc_{i+1}' for i in range(13)], mfccs))

        # Extract Prosodic Features
        mfcc_row['Energy'] = np.sum(np.abs(audio)**2)
        mfcc_row['RMS Energy'] = librosa.feature.rms(y=audio)[0].mean()
        mfcc_row['Intensity'] = np.max(np.abs(audio))
        mfcc_row['Tempo'] = librosa.beat.beat_track(y=audio, sr=22050)[0]
        mfcc_row['aggressiveness'] = emotion_label
        df_row = pd.DataFrame([mfcc_row])

        # Append the DataFrame to the list
        dfs.append(df_row)

# Concatenate all DataFrames in the list
result_df = pd.concat(dfs, ignore_index=True)

In [13]:
result_df.head()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,Energy,RMS Energy,Intensity,Tempo,aggressiveness
0,-529.325562,56.663666,-16.689976,10.855888,-11.516223,-2.507931,-7.963574,-9.494851,-8.698105,-0.036879,-6.527728,-0.497543,-8.345603,11.823792,0.008446,0.158349,129.199219,1
1,-291.862355,13.692,0.721914,0.840946,-2.631608,-3.273398,-3.800516,-5.06351,-4.178277,-1.223973,-1.139944,-2.740711,-1.950979,13.449061,0.011196,0.161483,123.046875,1
2,-530.501648,57.16465,-16.692572,10.879096,-11.512814,-2.454381,-8.066333,-9.450629,-8.646166,0.025993,-6.529346,-0.396756,-8.387289,11.823791,0.008424,0.158349,129.199219,1
3,-541.681519,57.992699,-17.564571,11.296426,-11.792108,-2.114906,-8.049675,-9.406855,-8.593161,0.401956,-6.543359,-0.257982,-8.379411,8.426271,0.007133,0.157719,123.046875,1
4,-562.736572,57.957283,-18.055918,12.096211,-13.936737,-2.030675,-9.9125,-9.270649,-9.474069,0.901388,-6.269834,0.47265,-8.715702,5.479378,0.005587,0.093388,112.347147,1


In [14]:
result_df.tail()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,Energy,RMS Energy,Intensity,Tempo,aggressiveness
18680,-626.564514,137.321152,31.624714,38.048393,11.922296,6.42589,-5.977657,-7.365975,-10.130214,-11.877729,-7.921663,-0.222549,2.371424,2.297165,0.004622,0.036617,215.332031,0
18681,-434.613305,39.089725,26.419656,17.740579,8.198065,3.774308,-2.462914,-6.844988,-6.805207,-7.007169,-5.219066,-2.211095,-1.189206,2.36434,0.004794,0.036995,103.359375,0
18682,-624.590515,137.568665,31.128231,38.535488,11.72743,6.689298,-6.101525,-7.44786,-10.076317,-11.998583,-8.065612,-0.314167,2.396529,2.297165,0.00463,0.036617,89.102909,0
18683,-642.973816,139.158493,30.014904,38.826839,9.479768,6.076139,-8.152767,-9.016376,-10.832283,-13.136108,-8.191937,0.664295,2.437212,1.633471,0.003907,0.032603,73.828125,0
18684,-647.0224,139.629807,30.507399,37.194473,8.938483,6.816449,-7.243575,-9.673912,-10.650782,-12.975872,-8.053271,1.128771,2.852309,1.092219,0.003172,0.02482,67.999589,0


In [15]:
result_df.shape

(18685, 18)

In [16]:
# Save the final DataFrame to a CSV file
result_df.to_csv('Data\\Extracted_Features(Augmented).csv', index=False)