In [1]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import librosa
import opensmile

In [2]:
def process_audio(audio, sra, feat, database):
    # CHANGE CONFIG OPTIONS
    # path: C:\Users\lugoza\AppData\Local\Continuum\anaconda3\envs\Helios\Lib\site-packages\opensmile\core\config\shared
    # file: FrameModeFunctionals.conf.inc
    # path: C:\Users\lugoza\AppData\Local\Continuum\anaconda3\envs\Helios\Lib\site-packages\opensmile\core\config\egemaps\v02
    # features with functionals frameMode=full 
    # default (short-term): frameMode = full, frameSize = 0, frameStep = 0 
    # mid-term: frameMode = fixed, frameSize = 5, frameStep = 3
    # details here: https://audeering.github.io/opensmile/get-started.html (search frameModeFunctionalsConf)
    
    # Resample audio
    if database == 'vosome':
        fm = 44100
        audio = librosa.resample(audio, sra, fm) 
    elif database == 'ravdess':
        fm = sra
    elif database == 'iemocap':
        fm = sra
    
    # Normalize audio
    audio = librosa.util.normalize(audio)
    
    # Feature extraction
    if feat == 'long term':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals,
            )
        df_features = smile.process_signal(audio, fm)
    
    elif feat == 'short term':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
            )
        df_features = smile.process_signal(audio, fm)      
        
    return df_features

## Long-term features

### EMOVOME

In [49]:
# path = "F:/Proyectos/1-HELIOS/0-Estudio-emociones-voz/0-Datos/Audios/BBBDD-Estudio completo/Completo-1000" 
path = 'data/VOSOME/audios'
feat = 'long term'
database = 'vosome'

print('[INFO] Computing long term features...')
tic = time.time()
names = []
for count, audio in enumerate(os.listdir(path)):
    names.append(int(audio.split('.')[0]))
    signal_audio, sra = librosa.load(path+'/'+audio, sr=None)
    if count == 0:
        df_features = process_audio(signal_audio, sra, feat, database)
    else:
        y = process_audio(signal_audio, sra, feat, database)
        df_features = pd.concat([df_features.reset_index(drop=True),y.reset_index(drop=True)],axis=0)
df_features.insert(0,'id',names)
df_features.to_excel('data/VOSOME/features/eGeMAPS_functionals.xlsx',index=False)
toc = time.time()
print('Duration:',round((toc-tic)/60,2),'min')

[INFO] Computing long term features...
Duration: 19.99 min


### RAVDESS

In [15]:
data_path = 'data/RAVDESS/audios/'
files = []
for actor in os.listdir(data_path):
    for audio_file in os.listdir(data_path + actor + '/'):
        files.append(data_path + actor + '/' + audio_file)

In [16]:
path = "data/RAVDESS/audios" 
feat = 'long term'
database = 'ravdess'

print('[INFO] Computing long term features...')
tic = time.time()
names = []
for count, audio in enumerate(files):
    names.append(audio.split('/')[-1].split('.')[0])
    signal_audio, sra = librosa.load(audio, sr=None)
    if count == 0:
        df_features = process_audio(signal_audio, sra, feat, database)
    else:
        y = process_audio(signal_audio, sra, feat, database)
        df_features = pd.concat([df_features.reset_index(drop=True),y.reset_index(drop=True)],axis=0)
df_features.insert(0,'id',names)
df_features.to_excel('data/RAVDESS/features/eGeMAPS_functionals.xlsx',index=False)
toc = time.time()
print('Duration:',round((toc-tic)/60,2),'min')

[INFO] Computing long term features...
Duration: 3.45 min


### IEMOCAP

In [13]:
path = 'D:/lugoza/IEMOCAP/'
feat = 'long term'
database = 'iemocap'

print('[INFO] Computing long term features...')
tic = time.time()
names = []
count = 0
for session in [folder for folder in os.listdir(path) if folder.startswith('Session')]:
    print('- Session: ',session[-1])
    for improv in os.listdir(path + session +'/sentences/wav/'):
        for audio in [f for f in os.listdir(path + session +'/sentences/wav/'+improv+'/') if f.endswith('.wav')]:
            path_file = path + session +'/sentences/wav/'+improv+'/'+audio    
            names.append(audio.split('.')[0])
            signal_audio, sra = librosa.load(path_file, sr=None)
            if sra != 16000:
                print(sra)
            if count == 0:
                df_features = process_audio(signal_audio, sra, feat, database)
                count += 1
            else:
                y = process_audio(signal_audio, sra, feat, database)
                df_features = pd.concat([df_features.reset_index(drop=True),y.reset_index(drop=True)],axis=0)
df_features.insert(0,'id',names)
df_features.reset_index(inplace=True, drop=True)
df_features.to_excel('data/IEMOCAP/features/eGeMAPS_functionals.xlsx',index=False)
toc = time.time()
print('Duration:',round((toc-tic)/60,2),'min')            

[INFO] Computing long term features...
- Session:  1
- Session:  2
- Session:  3
- Session:  4
- Session:  5
Duration: 45.22 min
