In [32]:
import os
import random
import sys
import glob
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from tqdm import tqdm
import plotly.offline as py

In [33]:
pd.set_option('display.max_columns', None)
py.init_notebook_mode(connected=True)

In [34]:
def load_data(path):
    dir_list = os.listdir(path) #devuelve los nombres 'audio.wav' en una lista
    dir_list.sort()
    ## DataFrame to save metadata of Ravdess audio files
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    ## Iterating through the directory, reading audio files, and extracting information from the file name
    for f in os.listdir(path):
        filename = f.split('.')[0].split('-')
        if(len(filename)==7):
            path = path + f
            src = int(filename[1])
            actor = int(filename[-1])
            emotion = int(filename[2])
            if int(actor)%2 == 0:
                gender = "female"
            else:
                gender = "male"

            if filename[3] == '01':
                intensity = 0
            else:
                intensity = 1

            if filename[4] == '01':
                statement = 0
            else:
                statement = 1

            if filename[5] == '01':
                repeat = 0
            else:
                repeat = 1

        df.loc[count] = [path, src, actor, gender, intensity, statement, repeat, emotion]
        count += 1

    labels = []
    for i in range(len(df)):
        if df.emotion[i] == 1:
            label = "_neutral"
        elif df.emotion[i] == 3:
            label = "_happy"
        elif df.emotion[i] == 4:
            label = "_sad"
        elif df.emotion[i] == 5:
            label = "_angry"
        else:
            label = "_none"

        # Add gender to the label 
        labels.append(df.loc[i,'gender'] + label)

    df['label'] = labels
    return df

In [35]:
#os.getcwd()
df = load_data("C:\\Users\\HP\\Music\\audios\\")

In [36]:
sss = StratifiedShuffleSplit(n_splits=2, random_state=11, test_size=0.2)
for train_index, test_index in sss.split(df, df.label):
    df_train, df_test = df.iloc[train_index,:], df.iloc[test_index,:]

In [37]:
# df_train.index = df_train.path
# df_train = df_train.drop("path", axis=1)
# df_test.index = df_test.path
# df_test = df_test.drop("path", axis=1)
df_test

Unnamed: 0,path,source,actor,gender,intensity,statement,repetition,emotion,label
272,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,11,male,1,1,1,3,male_happy
162,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,20,female,0,1,0,3,female_happy
16,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,17,male,0,0,0,1,male_neutral
504,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,3,male,0,0,1,5,male_angry
585,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,12,female,1,0,0,5,female_angry
...,...,...,...,...,...,...,...,...,...
295,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,10,female,0,0,0,4,female_sad
34,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,11,male,0,0,1,1,male_neutral
134,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,15,male,0,0,1,3,male_happy
506,C:\Users\HP\Music\audios\03-01-01-01-01-01-01....,1,5,male,0,0,1,5,male_angry


In [38]:
def get_features(df):
    data = pd.DataFrame(columns=['feature'])
    label = pd.DataFrame(columns=['label'])
    name = pd.DataFrame(columns=['name'])
    for i in tqdm(range(df.shape[0])):
        try:
            x, sample_rate = librosa.load(df.path[i])

            ## Numpy array that will store all the features
            result=np.array([])

            ## MFCCs
            mfccs=np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))

            ## Chroma
            stft=np.abs(librosa.stft(x))
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))

            ## Mel Scale
            mel=np.mean(librosa.feature.melspectrogram(x, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
            label.at[i,'label'] = df.loc[i,'label']
            data.loc[i] = [result]
            name.at[i,'name'] = df.path[i].split('/')[-1]
        except:
            pass

    final_data = pd.DataFrame(data['feature'].values.tolist())
    final_data = pd.concat([final_data,label,name], axis=1)
    return final_data

In [39]:
train_data = get_features(df_train)


PySoundFile failed. Trying audioread instead.

100%|██████████| 536/536 [00:06<00:00, 83.32it/s] 


In [40]:
test_data = get_features(df_test)

100%|██████████| 134/134 [00:00<00:00, 287.55it/s]


In [41]:
train_data.to_csv(".\\train_data_set.csv", index=False)
test_data.to_csv(".\\test_data_set.csv", index=False)

In [28]:
train_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,label,name
0,-697.984192,55.228489,0.323863,12.775377,7.396148,0.866224,-3.550276,-2.828331,-11.305533,-2.524927,0.496104,-2.722926,1.6458,-3.431964,0.140357,1.073159,-4.355672,-1.583771,-1.593009,-0.659361,-6.200733,-0.260331,-2.573981,-4.295348,-1.897931,-0.609855,-5.058815,0.446144,-2.329739,0.316533,-2.197814,-1.967064,-2.85539,-1.703427,-3.328374,-1.431488,-1.195484,-1.674807,-3.587147,-1.461006,0.750464,0.789711,0.777893,0.782118,0.739672,0.680501,0.683999,0.728767,0.755843,0.7463,0.744283,0.722983,1e-06,5.6e-05,0.003625,0.020952,0.031409,0.01769,0.003454,0.018615,0.024681,0.020463,0.021967,0.010748,0.00795,0.007304,0.015113,0.036422,0.049862,0.012948,0.009532,0.0183,0.038848,0.013682,0.002041,0.003925,0.01242,0.018457,0.002774,0.000719,0.000857,0.000819,0.000315,0.000187,0.000199,0.000303,0.00063,0.00093,0.00063,0.000433,0.000215,0.000535,0.001823,0.001103,0.001451,0.003504,0.004621,0.001577,0.00056,0.000451,0.000814,0.000644,0.0004,0.000797,0.00089,0.000626,0.001031,0.000896,0.000849,0.000519,0.000437,0.000318,0.00025,0.001067,0.000913,6.4e-05,6.2e-05,0.000131,9.7e-05,0.000133,0.00011,0.000385,0.000655,0.000572,0.000628,0.000309,0.000189,8.7e-05,9.6e-05,8.9e-05,9.6e-05,0.000146,0.000175,0.000193,0.000269,0.000363,0.000307,0.000444,0.000116,5.6e-05,1.9e-05,1.2e-05,1.5e-05,2.3e-05,3.8e-05,4.9e-05,3.9e-05,8.3e-05,7.8e-05,4.6e-05,5.9e-05,3.1e-05,2.3e-05,2.7e-05,3.7e-05,3.2e-05,2e-05,1.6e-05,1.1e-05,8e-06,6e-06,4e-06,4e-06,5e-06,5e-06,7e-06,5e-06,3e-06,2e-06,3e-06,3e-06,3e-06,2e-06,2e-06,2e-06,4e-06,6e-06,4e-06,2e-06,1.240418e-07,male_neutral,C:\Users\HP\Music\audios\03-01-01-01-01-01-01.wav
