In [1]:
import sys
import os
# go to upper diretory
sys.path.append(os.path.abspath('./../../'))
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Audio_Sentiment_Analysis.utils.Configuration import Configuration
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import minmax_scale
from tabulate import tabulate
import csv

In [2]:
AUDIO_DIR = f"{os.path.abspath('./../../')}/eNTERFACE05_Dataset/*/*/*/*.avi"
CONFIG_FILE = f"{os.path.abspath('./../../')}/Audio_Sentiment_Analysis/data/config.json"
EXTRACTED_FEATURES_FILE = 'extracted_features_ent05.csv'
FEATURES_TABLE_FILE = 'features_table_ent05.csv'
config = Configuration.load_json(CONFIG_FILE)

In [3]:
def load_data(proc_feat_dataset):
    sub_emo_data = defaultdict(lambda: defaultdict(list))

    with open(proc_feat_dataset) as dataset:
        reader = csv.DictReader(dataset)
        for row in reader:
            subject, emotion = row.pop("subject"), row.pop("emotion")
            sub_emo_data[subject][emotion].append(np.array(list(row.values()), dtype=np.float64))

    return sub_emo_data

In [4]:
# Load the processed features writen in the dataset
sub_emo_data = load_data(EXTRACTED_FEATURES_FILE)

In [5]:
def process_features(sub_emo_data):
    data = []

    for subject, sub_data in sub_emo_data.items():
        for emotion, features in sub_data.items():
            # mel_mean mel_min mel_max mel_var mel_std chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate mfcc1 mfcc2...
            features_arr = np.array(features)

            mean_feature_values = [
                int(subject),
                emotion,
                np.mean(features_arr[:,0]),
                np.mean(features_arr[:,1]),
                np.mean(features_arr[:,2]),
                np.mean(features_arr[:,3]),
                np.mean(features_arr[:,4]),
                np.mean(features_arr[:,5]),
                np.mean(features_arr[:,6]),
                np.mean(features_arr[:,7]),
                np.mean(features_arr[:,8]),
                np.mean(features_arr[:,9]),
                np.mean(features_arr[:,10]),
                np.mean(features_arr[:,11]),
                np.mean(features_arr[:,12]),
                np.mean(features_arr[:,13]),
            ]

            data.append(mean_feature_values)

    return data

In [6]:
# process the extracted features and group them
data = process_features(sub_emo_data)
df = pd.DataFrame(data, columns=["Subject", "Emotion", "mel_mean", "mel_min", "mel_max", "mel_var", "mel_std", "chroma_stft", "rmse", "spectral_centroid", "spectral_bandwidth", "rolloff", "zero_crossing_rate", "mfcc1", "mfcc2", "mfcc3"])
df = df.sort_values(["Subject", "Emotion"], ascending = (True, True))
df = df.set_index('Subject')

df.head()

Unnamed: 0_level_0,Emotion,mel_mean,mel_min,mel_max,mel_var,mel_std,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,anger,5.448993,3.696495e-10,2593.246057,3196.644043,52.88504,0.351984,0.128167,1696.680819,1734.487459,3468.808031,0.083923,-189.488507,119.051169,-38.191239
1,disgust,2.203773,3.467299e-10,1275.522046,629.788232,23.311563,0.396743,0.073791,2206.716963,1952.310647,4287.188569,0.137325,-245.959027,100.330852,-27.615436
1,fear,2.062133,3.833569e-10,1171.522095,953.791917,24.186283,0.347003,0.06943,1877.335301,1852.591857,3838.981285,0.095356,-261.97077,108.969914,-24.113346
1,happiness,3.596385,4.827979e-10,1385.045239,1395.55708,34.941695,0.373238,0.101833,1988.367154,1917.113821,3988.430984,0.1097,-200.898444,107.469267,-30.913523
1,sadness,0.860076,3.66128e-10,618.971106,97.350327,9.813401,0.40585,0.043551,1918.577138,1961.970025,4058.874966,0.104058,-302.475494,110.68093,-13.636983


In [7]:
headers = 'subject emotion mel_mean mel_min mel_max mel_var mel_std chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 4):
    headers += f' mfcc{i}'
headers = headers.split()

features_table = tabulate(df, headers=headers, tablefmt="tsv")

with open(FEATURES_TABLE_FILE, "w") as file:
    file.write(features_table)

In [13]:
df = pd.read_csv(FEATURES_TABLE_FILE, index_col="  subject", sep='\t')
df.columns = df.columns.str.replace(' ','')
df

Unnamed: 0_level_0,emotion,mel_mean,mel_min,mel_max,mel_var,mel_std,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,anger,5.448990,3.696490e-10,2593.2500,3196.64000,52.88500,0.351984,0.128167,1696.68,1734.49,3468.81,0.083923,-189.489,119.0510,-38.191200
1,disgust,2.203770,3.467300e-10,1275.5200,629.78800,23.31160,0.396743,0.073791,2206.72,1952.31,4287.19,0.137325,-245.959,100.3310,-27.615400
1,fear,2.062130,3.833570e-10,1171.5200,953.79200,24.18630,0.347003,0.069430,1877.34,1852.59,3838.98,0.095356,-261.971,108.9700,-24.113300
1,happiness,3.596390,4.827980e-10,1385.0500,1395.56000,34.94170,0.373238,0.101833,1988.37,1917.11,3988.43,0.109700,-200.898,107.4690,-30.913500
1,sadness,0.860076,3.661280e-10,618.9710,97.35030,9.81340,0.405850,0.043551,1918.58,1961.97,4058.87,0.104058,-302.475,110.6810,-13.637000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,disgust,0.589217,4.727090e-10,274.5870,48.64010,6.68675,0.365766,0.034502,2408.79,2211.46,4791.71,0.121484,-380.336,82.3475,-12.955400
44,fear,0.335655,5.098100e-10,306.4600,51.12740,6.06937,0.301355,0.024703,2673.86,2477.59,5385.03,0.121884,-421.365,52.6023,0.081014
44,happiness,0.822203,5.282050e-10,371.6600,79.57380,8.59276,0.374071,0.044010,2351.53,2084.56,4502.45,0.130231,-341.476,89.9554,-22.907800
44,sadness,0.104873,4.691030e-10,97.5576,3.24531,1.73329,0.341370,0.013326,2547.64,2432.42,5242.05,0.121909,-488.584,69.1828,4.590280


In [14]:
# Min Max Scaling all features
x = df.iloc[:,1:] #returns a numpy array
x_scaled = minmax_scale(x)
df.iloc[:,1:] = x_scaled
df

Unnamed: 0_level_0,emotion,mel_mean,mel_min,mel_max,mel_var,mel_std,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,anger,1.000000,0.128249,1.000000,1.000000,1.000000,0.282748,0.979684,0.120934,0.191202,0.207083,0.056304,0.932059,0.770914,0.406461
1,disgust,0.403131,0.108585,0.491141,0.197012,0.439705,0.524359,0.545622,0.460289,0.407633,0.496262,0.531329,0.756084,0.553731,0.522689
1,fear,0.377080,0.140011,0.450980,0.298370,0.456277,0.255861,0.510809,0.241136,0.308549,0.337885,0.158009,0.706186,0.653958,0.561177
1,happiness,0.659265,0.225332,0.533437,0.436568,0.660048,0.397478,0.769470,0.315010,0.372658,0.390693,0.285599,0.896506,0.636544,0.486443
1,sadness,0.155995,0.125228,0.237606,0.030450,0.183970,0.573518,0.304226,0.268575,0.417231,0.415584,0.235413,0.579965,0.673808,0.676311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,disgust,0.106178,0.216675,0.104617,0.015212,0.124733,0.357144,0.231990,0.594736,0.665130,0.674536,0.390420,0.337329,0.345093,0.683802
44,fear,0.059542,0.248508,0.116925,0.015990,0.113036,0.009452,0.153770,0.771100,0.929562,0.884188,0.393978,0.209472,0.000000,0.827072
44,happiness,0.149030,0.264291,0.142103,0.024889,0.160844,0.401975,0.307888,0.556638,0.539039,0.572325,0.468227,0.458427,0.433357,0.574425
44,sadness,0.017096,0.213581,0.036255,0.001011,0.030885,0.225454,0.062956,0.687120,0.884680,0.833665,0.394201,0.000000,0.192361,0.876629


In [15]:
# Querying dataframe grouping by emotions and calculating the respective mean values
df.groupby(['emotion']).mean()

Unnamed: 0_level_0,mel_mean,mel_min,mel_max,mel_var,mel_std,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3
emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
anger,0.359134,0.238364,0.3015,0.147347,0.309974,0.392969,0.49741,0.323288,0.379976,0.381431,0.224227,0.654736,0.621375,0.519308
disgust,0.197462,0.27539,0.185761,0.070037,0.195528,0.501445,0.327195,0.580165,0.561718,0.623529,0.517813,0.552483,0.434143,0.631407
fear,0.200796,0.285783,0.181468,0.093825,0.204595,0.390285,0.315311,0.495348,0.551179,0.569126,0.359803,0.505276,0.443825,0.648128
happiness,0.305899,0.284859,0.265305,0.142139,0.290448,0.446978,0.440928,0.498976,0.473157,0.527227,0.441551,0.624773,0.481167,0.55812
sadness,0.078426,0.267301,0.09004,0.017543,0.08559,0.558361,0.166214,0.489597,0.60251,0.601826,0.360235,0.362435,0.506211,0.729172
surprise,0.214658,0.30384,0.189707,0.077574,0.207815,0.502336,0.339113,0.381036,0.474921,0.46618,0.275215,0.526239,0.593515,0.639589
