Import of libraries

In [8]:
from __future__ import print_function
# We'll need numpy for some mathematical operations
import numpy as np

# matplotlib for displaying the output
import matplotlib.pyplot as plt
%matplotlib inline

# and IPython.display for audio output
import IPython.display

# Librosa for audio
import librosa
# And the display module for visualization
import librosa.display

import parselmouth

from parselmouth.praat import call
from sklearn.decomposition import PCA
import statistics
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

Descriptive statistics for RAVDESS dataset

In [11]:
import pandas as pd
import os
parent_dir = "ravdess" #folder wich contain RAVDESS audio
def prepare_datadf(parent_dir): # a function whose parameter is the audio folder
    df = pd.DataFrame(columns = ['audio', 'emotion']) #dataframe columns
    
    for  fichier_audio in os.listdir(parent_dir): # for each element in the audio folder
        folder_path = os.path.join(parent_dir, fichier_audio) # path of each item  in the audio folder
        
       
        
        if(os.path.isdir(folder_path)): 
            audios = os.listdir(folder_path) #content of each emotional file
            for i in audios:
                if i.endswith('outNoise.wav'):
                    df = df.append(pd.DataFrame({'audio':[os.path.join(fichier_audio, i)], 'emotion':[fichier_audio]}), 
                           ignore_index=True) # here at df defined, with the columns we add the values:
                                            #the audio column will take the audios_path, 
                                            #and the emotion column will take the corresponding emotion, ie the name of the folder
    # Shuffling for randomness
    df = df.sample(frac=1.0).reset_index(drop=True)
    return df
datadf = prepare_datadf(parent_dir) #function call
display(datadf.head()) #dataframe display


array=datadf.values
audios=array[:,0]
emotions=array[:,1]
print(len(audios))

Unnamed: 0,audio,emotion
0,surprised/03-01-08-01-02-02-03_norm_outNoise.wav,surprised
1,fearfull/03-01-06-02-02-01-09_norm_outNoise.wav,fearfull
2,happy/03-01-03-01-01-01-01_norm_outNoise.wav,happy
3,sad/03-01-04-02-02-01-22_norm_outNoise.wav,sad
4,sad/03-01-04-01-01-02-15_norm_outNoise.wav,sad


1245


Table of features:

In [12]:
folder = parent_dir
def tableau(folder):
    daf = pd.DataFrame(columns = ['audio', 'pitch', 'duration', 'energy', 'zcr', 'mfccs', 'lpc', 'HNR',
                                  'localJitter', 'localabsoluteJitter',  'localShimmer', 'localdbShimmer',
                                  'f1_mean', 'f2_mean', 'f3_mean', 'f4_mean', 'rapJitter', 'ppq5Jitter', 'ddpJitter',
                                  'apq3Shimmer', 'aqpq5Shimmer', 'apq11Shimmer', 'ddaShimmer'])
    for audio_file in array[:,0]:
        if audio_file.endswith('.wav'):
            
            #features = extract_features(folder+'/'+audio_file)
            #all_features.append(features)
            y,sr = librosa.load(folder+'/'+ audio_file)
            
            zcr = librosa.feature.zero_crossing_rate(y)
            mean_zcr = np.mean(zcr)
            
            ############################coefficients cepstraux de merl##################################
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            
            mean_mfccs = np.mean(mfccs)
            
            duration = librosa.get_duration(y=y, sr=sr)
            energy = np.sum(y ** 2) / np.float64(len(y))
            
            lpc = librosa.core.lpc(y,16)
            mean_lpc = np.mean(lpc)
            
            sound = parselmouth.Sound(folder+'/'+ audio_file)
            pitch = call(sound, "To Pitch", 0.0, 75, 600)
            mean_pitch = call(pitch, "Get mean", 0, 0, "Hertz")
            
            
            harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
            HNR = call(harmonicity, "Get mean", 0, 0)
            
            pointProcess = call(sound, "To PointProcess (periodic, cc)", 75, 500)
            localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
            localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)

            localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
            localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
                
            formants = call(sound, "To Formant (burg)", 0.0, 5, 5500, 0.025, 100)
            numPoints = call(pointProcess, "Get number of points")

            f1_list = []
            f2_list = []
            f3_list = []
            f4_list = []
    
            # Measure formants only at glottal pulses
            for point in range(0, numPoints):
                point += 1
                t = call(pointProcess, "Get time from index", point)
                f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
                f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
                f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
                f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
                f1_list.append(f1)
                f2_list.append(f2)
                f3_list.append(f3)
                f4_list.append(f4)
    
            f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
            f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
            f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
            f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']

            f1_mean = statistics.mean(f1_list)
            f2_mean = statistics.mean(f2_list)
            f3_mean = statistics.mean(f3_list)
            f4_mean = statistics.mean(f4_list)

            rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
            ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
            ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)   
            
            apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
            aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
            apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
            ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
            
            
        
        daf = daf.append(pd.DataFrame({'audio':[audio_file], 'pitch':[ mean_pitch], 
                                       'duration':[duration], 'energy' : [energy], 'zcr':[mean_zcr], 'mfccs':[mean_mfccs]
                                      , 'lpc' : [mean_lpc], 'HNR':[HNR], 
                                       'localJitter':[localJitter], 
                                       'localabsoluteJitter':[localabsoluteJitter],
                                       ' localShimmer':[ localShimmer], 
                                       'localdbShimmer ':[localdbShimmer], 'f1_mean': [f1_mean],
                                       'f2_mean': [f2_mean],  'f3_mean': [f3_mean],  'f4_mean': [f4_mean], 
                                       'rapJitter': [rapJitter], 'ppq5Jitter': [ppq5Jitter], 'ddpJitter': [ddpJitter],
                                      'apq3Shimmer': [apq3Shimmer], 'aqpq5Shimmer': [aqpq5Shimmer], 
                                       'apq11Shimmer': [apq11Shimmer], 'ddaShimmer': [ddaShimmer]}), 
                           ignore_index=True)
            
    daf = daf.sample(frac=1.0).reset_index(drop=True)
    return daf
datatab = tableau(folder) #function call
display(datatab.head()) #dataframe display

Unnamed: 0,audio,pitch,duration,energy,zcr,mfccs,lpc,HNR,localJitter,localabsoluteJitter,localShimmer,localdbShimmer,f1_mean,f2_mean,f3_mean,f4_mean,rapJitter,ppq5Jitter,ddpJitter,apq3Shimmer,aqpq5Shimmer,apq11Shimmer,ddaShimmer,localShimmer.1,localdbShimmer.1
0,happy/03-01-03-01-01-01-05_norm_outNoise.wav,312.884995,1.810023,0.00431,0.135473,-32.590759,0.012534,8.8602,0.026093,8.4e-05,,,764.739682,1430.784478,2811.970884,3884.923028,0.010365,0.013553,0.031094,0.04218,0.070405,0.147482,0.12654,0.130082,1.24558
1,angry/03-01-05-01-01-02-12_norm_outNoise.wav,421.676706,1.8,0.009174,0.2344,-29.843567,0.018213,9.646011,0.019639,5.2e-05,,,905.935889,1529.973238,2894.39221,3878.988298,0.008719,0.011385,0.026158,0.032729,0.056485,0.096604,0.098188,0.099149,1.07177
2,fearfull/03-01-06-02-02-01-08_norm_outNoise.wav,456.014742,1.750023,0.012485,0.154425,-30.855129,0.042919,12.726632,0.022635,5.6e-05,,,947.236633,1845.41748,2854.216811,3894.362678,0.008979,0.010229,0.026937,0.046715,0.079627,0.148138,0.140145,0.117336,1.122364
3,sad/03-01-04-02-02-02-11_norm_outNoise.wav,407.720607,1.510023,0.007935,0.166622,-31.849775,0.012379,8.834389,0.025657,6.7e-05,,,734.005782,1368.903973,2828.733649,3614.887371,0.009147,0.012589,0.027442,0.051377,0.082659,0.146088,0.154132,0.143107,1.160057
4,neutral/03-01-01-01-02-01-19_norm_outNoise.wav,264.3199,1.510023,0.019521,0.118001,-27.608801,0.012139,10.03586,0.022129,8.4e-05,,,800.221933,1210.292109,2843.735328,3470.265974,0.009831,0.011373,0.029493,0.03561,0.058161,0.083126,0.10683,0.100567,0.935237


Statistics

In [13]:


datatab.describe()



Unnamed: 0,pitch,duration,energy,zcr,mfccs,lpc,HNR,localJitter,localabsoluteJitter,f1_mean,f2_mean,f3_mean,f4_mean,rapJitter,ppq5Jitter,ddpJitter,apq3Shimmer,aqpq5Shimmer,apq11Shimmer,ddaShimmer,localShimmer,localdbShimmer
count,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0,1245.0
mean,396.134219,1.668629,0.010578,0.212129,-32.849146,0.028529,8.064108,0.026656,8.1e-05,876.279565,1567.838498,2907.739835,3887.755256,0.011784,0.01461,0.035351,0.051839,0.08058,0.140275,0.155517,0.13236,1.243623
std,79.842374,0.153923,0.004407,0.068399,4.26004,0.032158,2.35506,0.006581,2.9e-05,118.77931,187.471757,119.488063,150.281238,0.003728,0.004363,0.011184,0.012298,0.018278,0.043556,0.036895,0.021996,0.162034
min,169.498332,1.34,0.001651,0.052841,-42.839535,0.000508,1.600499,0.011455,2.5e-05,572.660676,1147.873846,2467.416287,3470.265974,0.003954,0.004554,0.011862,0.020258,0.031929,0.024427,0.060774,0.070948,0.758375
25%,339.088207,1.570023,0.007353,0.154948,-35.897427,0.013012,6.365071,0.022,6.2e-05,802.253562,1435.268719,2832.708847,3790.730061,0.009162,0.011578,0.027485,0.043656,0.068313,0.111723,0.130967,0.117299,1.12705
50%,421.326223,1.650023,0.009742,0.21439,-33.484226,0.019046,7.988277,0.026041,7.6e-05,864.589528,1542.040375,2909.015398,3888.290996,0.011364,0.014148,0.034092,0.051034,0.07951,0.133579,0.153102,0.132169,1.243422
75%,455.987887,1.750023,0.013338,0.264804,-30.188225,0.030821,9.666191,0.030344,9.3e-05,931.376332,1675.471146,2984.416065,3978.751344,0.013745,0.016923,0.041235,0.059272,0.09171,0.162461,0.177816,0.146595,1.351834
max,548.059431,2.4,0.027607,0.390638,-15.22758,0.294416,15.454258,0.063272,0.000277,1606.01582,2311.022117,3343.201503,4577.867169,0.039255,0.048262,0.117764,0.118027,0.158641,0.499732,0.354082,0.207887,1.764108
