In [2]:
%%capture
import sys
import os
# go to upper diretory
sys.path.append(os.path.abspath('./../../../'))
import matplotlib.pyplot as plt
import pandas as pd
from xgboost import XGBClassifier

AUDIO_DIR = f"{os.path.abspath('./../../../')}/IEMOCAP_Dataset"
EXTRACTED_FEATURES_FILE = f"{os.path.abspath('./../../../')}/Audio_Sentiment_Analysis/iemocap/data/preprocessed_extracted_features_iemocap.csv"
from joblib import dump

plt.rcParams['figure.dpi'] = 300

In [3]:
df = pd.read_csv(EXTRACTED_FEATURES_FILE)
df = df.set_index('File')
df['Valence'] = df['Valence'].apply(lambda x: x if x <= 5 else 5)

print(f"Number of Audio Files: {df.shape[0]}")
df.groupby(['Emotion', 'Emotion_Id']).agg({'Emotion': ['count']}).sort_values("Emotion_Id")

Number of Audio Files: 5531


Unnamed: 0_level_0,Unnamed: 1_level_0,Emotion
Unnamed: 0_level_1,Unnamed: 1_level_1,count
Emotion,Emotion_Id,Unnamed: 2_level_2
angry,0,1103
happy,1,1636
sad,2,1084
neutral,3,1708


In [4]:
X = df.iloc[:, 9:]
y = df.iloc[:, 5:6]

model = XGBClassifier(random_state=1, colsample_bytree=0.8, colsample_bylevel=0.8, subsample=0.9,
                      n_estimators=512, max_depth=8, learning_rate=0.1, n_jobs=-1)

model.fit(X.values, y.values.ravel())

dump(model, 'traditional_model.pkl')


['traditional_model.pkl']

Data Stratification New Data

In [8]:
df = pd.read_csv(EXTRACTED_FEATURES_FILE)
df = df.sort_values(['Emotion_Id', 'Gender'], ascending=(True, True))
df = df.set_index('File')
df['Valence'] = df['Valence'].apply(lambda x: x if x <= 5 else 5)
df = df[((df['Activation'] >= 2.5)
        & (df['Valence'] >= 3)
        & (df['Dominance'] >= 2.5)
        ) | (df['Emotion'] != 'happy')]

df = df[((df['Activation'] >= 1.5)
        & (df['Valence'] <= 2.5)
        & (df['Dominance'] <= 3.5)
        ) | (df['Emotion'] != 'sad')]

df = df[((df['Activation'] >= 3)
        & (df['Valence'] <= 4.5)
        ) | (df['Emotion'] != 'angry')]

df = df[((df['Activation'] >= 1) & (df['Activation'] < 4)
        & (df['Valence'] >= 2.5)
        & (df['Dominance'] > 2)
        ) | (df['Emotion'] != 'neutral')]

df = df[df["Duration"] >= 2]

print(f"Number of Audio Files: {df.shape[0]}")
print(df.groupby(['Emotion']).agg(
    {'Activation': ['mean'], 'Valence': ['mean'], 'Dominance': ['mean']}))
print(df.groupby(['Emotion', 'Emotion_Id']).agg({'Emotion': ['count']}))

X = df.iloc[:, 9:]
y = df.iloc[:, 5:6]

model = XGBClassifier(random_state=1, colsample_bytree=0.8, colsample_bylevel=0.8, subsample=0.9,
                      n_estimators=512, max_depth=8, learning_rate=0.1, n_jobs=-1)

model.fit(X.values, y.values.ravel())

dump(model, 'stratified_traditional_model.pkl')


Number of Audio Files: 3347
        Activation   Valence Dominance
              mean      mean      mean
Emotion                               
angry     3.865258  1.832395  4.096009
happy     3.546173  3.993541  3.426157
neutral   2.788662  3.009868  3.016146
sad       2.484163  2.048013  2.649824
                   Emotion
                     count
Emotion Emotion_Id        
angry   0              710
happy   1             1045
neutral 3              929
sad     2              663


['stratified_traditional_model.pkl']