In [1]:
import sys
import os
# go to upper diretory
sys.path.append(os.path.abspath('./../../'))
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Audio_Sentiment_Analysis.utils.Configuration import Configuration
from sklearn.preprocessing import minmax_scale
import seaborn as sns
from tabulate import tabulate
import csv

In [35]:
AUDIO_DIR = f"{os.path.abspath('./../../')}/eNTERFACE05_Dataset/*/*/*/*.avi"
CONFIG_FILE = f"{os.path.abspath('./../../')}/Audio_Sentiment_Analysis/data/config.json"
EXTRACTED_FEATURES_FILE = 'extracted_features_ent05.csv'
FEATURES_TABLE_FILE = 'features_table_ent05.csv'
config = Configuration.load_json(CONFIG_FILE)

In [19]:
def load_data(proc_feat_dataset):
    sub_emo_data = defaultdict(lambda: defaultdict(list))

    with open(proc_feat_dataset) as dataset:
        reader = csv.DictReader(dataset)
        for row in reader:
            subject, emotion = row.pop("subject"), row.pop("emotion")
            sub_emo_data[subject][emotion].append(np.array(list(row.values()), dtype=np.float64))

    return sub_emo_data

In [30]:
# Load the processed features writen in the dataset
sub_emo_data = load_data(EXTRACTED_FEATURES_FILE)

In [47]:
def process_features(sub_emo_data):
    data = []

    for subject, sub_data in sub_emo_data.items():
        for emotion, features in sub_data.items():
            # mel_mean mel_min mel_max mel_var mel_std chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate mfcc1 mfcc2...
            features_arr = np.array(features)

            mean_feature_values = [
                int(subject),
                emotion,
                np.mean(features_arr[:,0]),
                np.mean(features_arr[:,1]),
                np.mean(features_arr[:,2]),
                np.mean(features_arr[:,3]),
                np.mean(features_arr[:,4]),
                np.mean(features_arr[:,5]),
                np.mean(features_arr[:,6]),
                np.mean(minmax_scale(features_arr[:,7], config.scale_range)),
                np.mean(features_arr[:,8]),
                np.mean(features_arr[:,9]),
                np.mean(minmax_scale(features_arr[:,10], config.scale_range)),
                np.mean(features_arr[:,11]),
                np.mean(features_arr[:,12]),
                np.mean(features_arr[:,13]),
            ]

            data.append(mean_feature_values)

    return data

In [111]:
# process the extracted features, group and scale them
data = process_features(sub_emo_data)
df = pd.DataFrame(data, columns=["Subject", "Emotion", "mel_mean", "mel_min", "mel_max", "mel_var", "mel_std", "chroma_stft", "rmse", "spectral_centroid", "spectral_bandwidth", "rolloff", "zero_crossing_rate", "mfcc1", "mfcc2", "mfcc3"])
df = df.sort_values(["Subject", "Emotion"], ascending = (True, True))
df = df.set_index('Subject')

df.head()

Unnamed: 0_level_0,Emotion,mel_mean,mel_min,mel_max,mel_var,mel_std,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,anger,5.448993,3.696495e-10,2593.246057,3196.644043,52.88504,0.351984,0.128167,0.503678,1734.487459,3468.808031,0.569907,-189.488507,119.051169,-38.191239
1,disgust,2.203773,3.467299e-10,1275.522046,629.788232,23.311563,0.396743,0.073791,0.557522,1952.310647,4287.188569,0.55296,-245.959027,100.330852,-27.615436
1,fear,2.062133,3.833569e-10,1171.522095,953.791917,24.186283,0.347003,0.06943,0.440377,1852.591857,3838.981285,0.393695,-261.97077,108.969914,-24.113346
1,happiness,3.596385,4.827979e-10,1385.045239,1395.55708,34.941695,0.373238,0.101833,0.27357,1917.113821,3988.430984,0.269389,-200.898444,107.469267,-30.913523
1,sadness,0.860076,3.66128e-10,618.971106,97.350327,9.813401,0.40585,0.043551,0.537237,1961.970025,4058.874966,0.57843,-302.475494,110.68093,-13.636983


In [102]:
headers = 'subject emotion mel_mean mel_min mel_max mel_var mel_std chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 4):
    headers += f' mfcc{i}'
headers = headers.split()

features_table = tabulate(df, headers=headers, tablefmt="tsv")

with open(FEATURES_TABLE_FILE, "w") as file:
    file.write(features_table)

In [110]:
df = pd.read_csv(FEATURES_TABLE_FILE, index_col="  subject", sep='\t')  
df.head()

Unnamed: 0_level_0,emotion,mel_mean,mel_min,mel_max,mel_var,mel_std,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,anger,5.44899,3.69649e-10,2593.25,3196.64,52.885,0.351984,0.128167,0.503678,1734.49,3468.81,0.569907,-189.489,119.051,-38.1912
1,disgust,2.20377,3.4673e-10,1275.52,629.788,23.3116,0.396743,0.073791,0.557522,1952.31,4287.19,0.55296,-245.959,100.331,-27.6154
1,fear,2.06213,3.83357e-10,1171.52,953.792,24.1863,0.347003,0.06943,0.440377,1852.59,3838.98,0.393695,-261.971,108.97,-24.1133
1,happiness,3.59639,4.82798e-10,1385.05,1395.56,34.9417,0.373238,0.101833,0.27357,1917.11,3988.43,0.269389,-200.898,107.469,-30.9135
1,sadness,0.860076,3.66128e-10,618.971,97.3503,9.8134,0.40585,0.043551,0.537237,1961.97,4058.87,0.57843,-302.475,110.681,-13.637
