# Solution Design - Feature Extraction

For this task, the following features are extracted from the dataset
> Mel (128) Features

> MFCC (40) Features

> Chroma(12) Features

## Environment Creation
(Kindly ignore if already performed)
* The code is designed to run in Intel DevCloud Jupyter Notebook environment which comes with predefined conda environments



In [6]:
# Cloning the existing Intel AI Analytics Toolkit Tensorflow environment
! conda create --name ser --clone tensorflow

# Activating the new virtual environment
! source activate ser

# Installing Python's Audio Processing Library LibROSA

# Upgrading NumPy to latest version to avoid conflicts
! pip install --user --upgrade numpy

# Installing LibROSA and checking version
! pip install --user librosa --force-reinstall
! python -c 'import librosa; print(librosa.__version__)'

# Installing the package plotly
! pip install --user plotly

# Creating a IPython kernel using the new conda environment
! python -m ipykernel install --user --name=ser

Source:      /glob/development-tools/versions/oneapi/2023.0/oneapi/intelpython/latest/envs/tensorflow
Destination: /home/u94139/.conda/envs/ser
Packages: 181
Files: 5328
Preparing transaction: done
Verifying transaction: done
Executing transaction: - 

    Installed package of scikit-learn can be accelerated using scikit-learn-intelex.
    More details are available here: https://intel.github.io/scikit-learn-intelex

    For example:

        $ conda install scikit-learn-intelex
        $ python -m sklearnex my_application.py

    

done
#
# To activate this environment, use
#
#     $ conda activate ser
#
# To deactivate an active environment, use
#
#     $ conda deactivate

Retrieving notices: ...working... done


In [1]:
## Importing libraries for dataset processing and feature extraction

import os
import random
import sys
import glob
import time
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import warnings
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from tqdm import tqdm
import plotly.offline as py

Once LibROSA is installed, load the required libraries for performing feature extraction by running the below cell:

In [2]:
# Notebook Settings

# Supress Warnings
warnings.simplefilter("ignore")
# Display all columns of the dataframe
pd.set_option('display.max_columns', None)

### Loading Dataset and Extraction Emotion and Gender from filename

In [3]:
# Function to load dataset and extracting information from the filename

# Loading Data
def labeler(base_dir_path):
    dir_list = os.listdir(base_dir_path)
    dir_list.sort()

    # RAVDESS dataset format
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    # Extracting information from filename
    for file in os.listdir(base_dir_path):
        filename = file.split('.')[0].split('-')
        if(len(filename)==7):
            path = base_dir_path + file
            src = int(filename[1])
            actor = int(filename[-1])
            emotion = int(filename[2])
            if int(actor)%2 == 0:
                gender = "female"
            else:
                gender = "male"

            if filename[3] == '01':
                intensity = 0
            else:
                intensity = 1

            if filename[4] == '01':
                statement = 0
            else:
                statement = 1

            if filename[5] == '01':
                repeat = 0
            else:
                repeat = 1

        df.loc[count] = [path, src, actor, gender, intensity, statement, repeat, emotion]
        count += 1

    labels = []
    for i in range(len(df)):
        if df.emotion[i] == 1:
            label = "_neutral"
        elif df.emotion[i] == 2:
            label = "_calm"
        elif df.emotion[i] == 3:
            label = "_happy"
        elif df.emotion[i] == 4:
            label = "_sad"
        elif df.emotion[i] == 5:
            label = "_angry"
        elif df.emotion[i] == 6:
            label = "_fearful"
        elif df.emotion[i] == 7:
            label = "_disgust"
        elif df.emotion[i] == 8:
            label = "_surprised"
        else:
            label = "_none"

        # Add gender to the label 
        labels.append(df.loc[i,'gender'] + label)

    df['label'] = labels
    
    return df

In [4]:
# Calling the labeler function
df = labeler("./DATASET/")

In [5]:
# Display the DataFrame
df

Unnamed: 0,path,source,actor,gender,intensity,statement,repetition,emotion,label
0,./DATASET/03-02-05-01-02-01-05.wav,2,5,male,0,1,0,5,male_angry
1,./DATASET/77-52-03-11-51-62-25.wav,52,25,male,1,1,1,3,male_happy
2,./DATASET/03-01-04-01-01-01-14.wav,1,14,female,0,0,0,4,female_sad
3,./DATASET/16-93-07-69-25-53-25.wav,93,25,male,1,1,1,7,male_disgust
4,./DATASET/24-77-01-74-45-69-25.wav,77,25,male,1,1,1,1,male_neutral
...,...,...,...,...,...,...,...,...,...
5247,./DATASET/03-02-03-01-02-01-07.wav,2,7,male,0,1,0,3,male_happy
5248,./DATASET/53-55-06-62-98-35-26.wav,55,26,female,1,1,1,6,female_fearful
5249,./DATASET/03-01-04-02-02-02-22.wav,1,22,female,1,1,1,4,female_sad
5250,./DATASET/53-33-03-90-81-83-25.wav,33,25,male,1,1,1,3,male_happy


In [6]:
# Stratified splitting into train and test sets
# Test size is 0.2

sss = StratifiedShuffleSplit(n_splits=2, random_state=11, test_size=0.2)
for train_index, test_index in sss.split(df, df.label):
    df_train, df_test = df.iloc[train_index,:], df.iloc[test_index,:]

In [7]:
#  Replace index of file with path to extract features from location

df_train.index = df_train.path
df_train = df_train.drop("path", axis=1)


df_test.index = df_test.path
df_test = df_test.drop("path", axis=1)

### Performing Feature Extraction

In [8]:
# Function to extract features such as MFCC, Mels and Chromas from audio

def extract_features(df):
    # features store all features extracted from the audio file
    # labels is the labels for the audio file
    # names is the name of the audio file
    features = pd.DataFrame(columns=['feature'])
    labels = pd.DataFrame(columns=['label'])
    names = pd.DataFrame(columns=['name'])
    
    for i in tqdm(range(df.shape[0])):
        x, sample_rate = librosa.load(df.index[i])
        
        # feature_set stores all features of the audio file
        feature_set = np.array([])
        
        # MFCC feature extraction
        # No. of MFCC Features = 40 (Default = 20)
        mfccs=np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
        feature_set=np.hstack((feature_set, mfccs))
        
        ## Chroma feature extraction
        # No. of Chroma Features = 12 (Always)
        stft=np.abs(librosa.stft(x))
        chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        feature_set=np.hstack((feature_set, chroma))
        
        ## Mel feature extraction
        # No. of Mel Features = 128 (Default = 128)
        mel=np.mean(librosa.feature.melspectrogram(x, sr=sample_rate).T,axis=0)
        feature_set=np.hstack((feature_set, mel))
        
        # Total features = MFCC(40) + Chroma(12) + Mels(128) = 180
        labels.at[i, 'label'] = df.iloc[i, df.columns.get_loc('label')]
        features.loc[i] = [feature_set]
        names.at[i,'name'] = df.index[i].split('/')[-1]
        
        
    final_data = pd.DataFrame(features['feature'].values.tolist())
    final_data = pd.concat([final_data,labels,names], axis=1)
    
    return final_data

In [9]:
# Function to extract features from the audio files
# Note: This wil take some time since feature extraction is done from 5252 audio files

train_data = extract_features(df_train)
test_data = extract_features(df_test)

100%|██████████| 4201/4201 [11:59<00:00,  5.84it/s]
100%|██████████| 1051/1051 [02:49<00:00,  6.22it/s]


In [10]:
# Prining the shape of the extracted feature set

print(train_data.shape)
print(test_data.shape)

(4201, 182)
(1051, 182)


In [11]:
# Saving the features as CSV for easily executing and experimenting

train_data.to_csv("train_features.csv", index=False)
test_data.to_csv("test_features.csv", index=False)

#### END OF NOTEBOOK

#### _Citations_

###### Audio Features
* https://towardsdatascience.com/how-i-understood-what-features-to-consider-while-training-audio-files-eedfb6e9002b
* https://www.codespeedy.com/speech-emotion-recognition-in-python/

###### Base Paper
* http://www.ijasret.com/VolumeArticles/FullTextPDF/830_34.SPEECH_BASED_EMOTION_RECOGNITION.pdf