In [1]:
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

In [2]:
file_path = 'B_Data_Frame.xlsx'
sheet_name = 'B-Ser'

df = pd.read_excel(file_path, sheet_name=sheet_name)

#df = df.iloc[1:5]


In [3]:
df

Unnamed: 0,File Path,File Name,Emotion
0,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-01-01-01-01-01.wav,4
1,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-01-01-01-02-01.wav,4
2,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-01-01-01-03-01.wav,4
3,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-01-01-02-01-01.wav,4
4,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-01-01-02-02-01.wav,4
...,...,...,...
1462,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-05-01-02-02-34.wav,7
1463,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-05-01-02-03-34.wav,7
1464,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-05-01-03-01-34.wav,7
1465,D:\Code\Project\BW\Beyond_Words3.0\dataset_ban...,03-01-05-01-03-02-34.wav,7


In [4]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate=rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor)



In [5]:
def extract_features(data, sample_rate):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    #Chroma_cqt
    chroma_cqt = np.mean(librosa.feature.chroma_cqt(y=data, sr=sample_rate))
    result = np.hstack((result, chroma_cqt)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally
    
    return result

def get_features(path):
    data, sample_rate = librosa.load(path)
    
    # without augmentation
    res1 = extract_features(data=data, sample_rate=sample_rate)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(data=noise_data, sample_rate=sample_rate)
    result = np.vstack((result, res2)) # stacking vertically
    
    # data with stretching 
    stretch_data = stretch(data)
    res3 = extract_features(data=stretch_data, sample_rate=sample_rate)
    result = np.vstack((result, res3)) # stacking vertically

    #data with shift
    shift_data =  shift(data)
    res4 = extract_features(data=shift_data, sample_rate=sample_rate)
    result = np.vstack((result, res4)) # stacking vertically

    #data with pitch
    pitch_data = pitch(data, sampling_rate=sample_rate)
    res5 = extract_features(data=pitch_data, sample_rate=sample_rate)
    np.vstack((result, res5)) # stacking vertically
    
    return result

In [6]:
from tqdm import tqdm
extracted_feature, emotion = [], []

import time
start_time = time.time()
for i, j in tqdm( zip(df['File Path'], df['Emotion'])):
    feature = get_features(i)
    for k in feature:
        extracted_feature.append(k)
        emotion.append(j)

1467it [20:03,  1.22it/s]


In [7]:
len(extracted_feature[0])

163

In [8]:
end_time = time.time()

execution_time = end_time - start_time
print(f"Execution Time: {execution_time} seconds")

Execution Time: 1203.8066682815552 seconds


In [9]:
extracted_feature

[array([ 1.81929467e-01,  5.97161710e-01,  5.84451973e-01,  6.63637280e-01,
         7.38466740e-01,  6.84250534e-01,  5.92273712e-01,  6.05217338e-01,
         6.11733735e-01,  7.13361204e-01,  6.67309105e-01,  6.07958674e-01,
         6.17211342e-01,  5.01041830e-01, -5.09084290e+02,  3.85184555e+01,
        -2.52129817e+00,  1.40845025e+00,  1.10969276e+01, -6.42604494e+00,
        -1.16085577e+01, -8.25067997e+00, -8.73206425e+00, -6.36668873e+00,
        -2.96399522e+00, -9.46931839e+00, -6.37726307e+00, -3.69154334e+00,
        -2.59065700e+00,  3.52499038e-01, -6.59188604e+00, -3.67755949e-01,
        -6.18988705e+00, -2.22454286e+00,  2.21263152e-02,  7.09228379e-06,
         2.90403823e-05,  8.98791128e-04,  6.11701198e-02,  3.65081459e-01,
         2.69366235e-01,  4.42066461e-01,  7.24815011e-01,  2.30968669e-01,
         4.51332301e-01,  7.82239318e-01,  5.54717600e-01,  8.00691307e-01,
         1.99947047e+00,  3.86300611e+00,  6.12721539e+00,  7.13093424e+00,
         3.2

In [10]:
extracted_feature[0]

array([ 1.81929467e-01,  5.97161710e-01,  5.84451973e-01,  6.63637280e-01,
        7.38466740e-01,  6.84250534e-01,  5.92273712e-01,  6.05217338e-01,
        6.11733735e-01,  7.13361204e-01,  6.67309105e-01,  6.07958674e-01,
        6.17211342e-01,  5.01041830e-01, -5.09084290e+02,  3.85184555e+01,
       -2.52129817e+00,  1.40845025e+00,  1.10969276e+01, -6.42604494e+00,
       -1.16085577e+01, -8.25067997e+00, -8.73206425e+00, -6.36668873e+00,
       -2.96399522e+00, -9.46931839e+00, -6.37726307e+00, -3.69154334e+00,
       -2.59065700e+00,  3.52499038e-01, -6.59188604e+00, -3.67755949e-01,
       -6.18988705e+00, -2.22454286e+00,  2.21263152e-02,  7.09228379e-06,
        2.90403823e-05,  8.98791128e-04,  6.11701198e-02,  3.65081459e-01,
        2.69366235e-01,  4.42066461e-01,  7.24815011e-01,  2.30968669e-01,
        4.51332301e-01,  7.82239318e-01,  5.54717600e-01,  8.00691307e-01,
        1.99947047e+00,  3.86300611e+00,  6.12721539e+00,  7.13093424e+00,
        3.24963379e+00,  

In [11]:
extracted_feature[0][0]

0.18192946742957747

In [12]:
type(extracted_feature[0][0])

numpy.float64

In [13]:
len(extracted_feature)

5868

In [14]:
emotion

[4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,


In [15]:
len(emotion)

5868

In [16]:
#df['Features'] = extracted_feature
feature_set = pd.DataFrame(extracted_feature)
feature_set['Emotion_Label'] = emotion
feature_set

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,154,155,156,157,158,159,160,161,162,Emotion_Label
0,0.181929,0.597162,0.584452,0.663637,0.738467,0.684251,0.592274,0.605217,0.611734,0.713361,...,0.001424,0.000824,2.245705e-03,4.730351e-03,7.843456e-03,6.678044e-03,2.589605e-03,1.258858e-03,3.716425e-05,4
1,0.373793,0.752836,0.767736,0.811567,0.837361,0.801382,0.736421,0.722412,0.721140,0.749309,...,0.002378,0.001764,3.191589e-03,5.683203e-03,8.739583e-03,7.548534e-03,3.495810e-03,2.288241e-03,1.007357e-03,4
2,0.179078,0.581407,0.576625,0.674005,0.743081,0.647708,0.559811,0.605573,0.612860,0.715582,...,0.000581,0.000415,8.819485e-04,1.978945e-03,3.881180e-03,3.024464e-03,9.673847e-04,6.371708e-04,1.669109e-05,4
3,0.181940,0.609392,0.576482,0.649354,0.732143,0.703664,0.598831,0.607627,0.609744,0.704340,...,0.001424,0.000824,2.245646e-03,4.730394e-03,7.843437e-03,6.678062e-03,2.589596e-03,1.258859e-03,3.716428e-05,4
4,0.430646,0.690136,0.681142,0.712062,0.746599,0.718833,0.536348,0.523392,0.554430,0.522560,...,0.006053,0.014728,2.603143e-02,2.871043e-02,8.294137e-03,7.164728e-03,2.395335e-03,6.251118e-04,1.206434e-05,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5863,0.050528,0.671133,0.653239,0.666003,0.645347,0.701951,0.763060,0.787043,0.806889,0.784755,...,0.000024,0.000003,7.980836e-07,9.887120e-07,8.714226e-07,7.584952e-07,8.125273e-07,4.392478e-07,3.485117e-08,7
5864,0.152854,0.693109,0.692830,0.696784,0.694129,0.656598,0.631821,0.698296,0.724781,0.722576,...,0.000081,0.000035,4.049098e-05,8.601131e-06,3.937939e-06,2.715689e-06,3.084966e-06,2.419554e-06,3.091766e-08,7
5865,0.344638,0.736154,0.749546,0.767200,0.778888,0.770688,0.709407,0.764440,0.763983,0.760305,...,0.000425,0.000380,4.006812e-04,3.504132e-04,3.588180e-04,3.639401e-04,3.567266e-04,3.588423e-04,3.368800e-04,7
5866,0.156242,0.676020,0.659184,0.661553,0.672642,0.637998,0.627909,0.673449,0.698129,0.690269,...,0.000034,0.000014,1.760217e-05,4.374423e-06,1.535481e-06,1.221578e-06,1.183883e-06,1.036524e-06,1.610952e-08,7


In [17]:
feature_set.to_csv('B-Ser.csv', index=False)