In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import ast
import IPython
from sklearn.model_selection import GridSearchCV
from IPython.display import Audio, display
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.naive_bayes import GaussianNB
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler

**importing features from preprocessing notebook**

In [None]:
df = pd.read_csv('../../dataset/features.csv')

print(df.head(1))  
print('------------------------------------------------')
print(df.columns)  
print('------------------------------------------------')
print(df.dtypes)   

In [None]:
print('na values\n', df.isna().sum())
print('------------------------------------------------')
print("number of duplicated values:\n", df.duplicated().sum())
print('------------------------------------------------')
print("shape:\n", df.shape)

In [None]:
print('Emotion level value counts\n', df['Emotion levels'].value_counts())

#dropping where emotion level is x
idx = df[df['Emotion levels'] == "X"].index
print(idx)

df = df.drop(idx)

In [None]:
df['Emotion levels'].value_counts()

#Currently, the data will remain unbalanced; later, emotion levels will be categorized as specified/unspecified

In [None]:
df['Emotion'].value_counts()

#it can be concluded that the data is approximately evenly distributed

**encoding**

In [None]:
df['Gender'] = df['Gender'].apply(lambda x: 1 if x == 'Male' else 0)

df = pd.get_dummies(df, columns=['Emotion levels'], drop_first=True)
dummy_cols = [col for col in df.columns if col.startswith('Emotion levels_')]
df[dummy_cols] = df[dummy_cols].astype(int)

emotion_mapping = {
    'ANG': 0,
    'DIS': 1,
    'FEA': 2,
    'HAP': 3,
    'SAD': 4,
    'NEU': 5
}

df['emotion_numeric'] = df['Emotion'].map(emotion_mapping)


print(df.head())


In [None]:
print(df.loc[1])

In [None]:
#dropping the 'emotion' column, since we included dummy variables
df = df.drop(columns=['Emotion'])

In [None]:
print(df.dtypes)

In [None]:
#expanding columns that contain lists as values
list_columns = ['mfcc_mean', 'mfcc_delta_mean', 'mfcc_delta2_mean', 'mel_spec_db_mean']

for col in list_columns:
    if col not in df.columns:
        print(f"skipping {col}, not found")
        continue
    
    print(f"\nworking on: {col}")
    
    # sting -> list
    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    
    # num -> list (if num is somewhere)
    df[col] = df[col].apply(lambda x: [x] if isinstance(x, (float, int)) else x)
        
    # max list lenght in the column
    max_len = df[col].apply(len).max()
    
    # filling lists with zeros if neccessary
    df[col] = df[col].apply(lambda x: x + [0]*(max_len - len(x)))
    
    #expanding the df
    expanded = pd.DataFrame(df[col].tolist(), index=df.index)
    expanded.columns = [f'{col}_{i}' for i in range(max_len)]
    
    #dropping org columns
    df = df.drop(columns=[col])
    df = pd.concat([df, expanded], axis=1)
    
    print(f"expanding {max_len} new columns.")

print("\nall columns edited properly")


In [None]:
print(df.loc[0])

In [None]:
print(df.dtypes)

Standardization

In [None]:
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns

#numeric_cols will be predicted -> no standardization
numeric_cols = numeric_cols.drop('emotion_numeric') if 'emotion_numeric' in numeric_cols else numeric_cols

print("cols for standardization:", list(numeric_cols))

scaler = StandardScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

print("\ndata after the standardization process:")
print(df[numeric_cols].head())

In [None]:
print(df.loc[0])

In [None]:
feature_cols = ['spectral_centroid_mean', 'zcr_mean'] + \
               [f'mfcc_delta_mean_{i}' for i in range(3)] + \
               [f'mel_spec_db_mean_{i}' for i in range(3)]

plt.figure(figsize=(15, 10))
for i, col in enumerate(feature_cols, 1):
    plt.subplot(3, 3, i)
    sns.histplot(df[col], kde=True, bins=30)
    plt.title(f'Original {col}')
plt.tight_layout()
plt.show()

In [None]:
#unnecessary column 'location'
df = df.drop(columns=['Location'])

In [None]:
X = df.drop(columns=['emotion_numeric'])
y = df['emotion_numeric']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
param_grid = {
    'n_estimators': [100, 150, 200, 220, 250, 270, 300, 305, 310, 320],
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,  
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))


#Best parameters: {'n_estimators': 310}
#Best cross-validation score: 0.4450619139343395
#Test Accuracy: 0.4432505036937542

In [None]:
# Random Forest
rf1 = RandomForestClassifier(n_estimators=310, random_state=42)
rf1.fit(X_train, y_train)

# Evaluation
y_pred = rf1.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
cmrf1 = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cmrf1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Random Forest Confusion Matrix")
plt.show()

In [None]:
nb_model1 = GaussianNB()
nb_model1.fit(X_train, y_train)

y_pred = nb_model1.predict(X_test)

print("accuracy:", accuracy_score(y_test, y_pred))
print("classification report:\n", classification_report(y_test, y_pred))

In [None]:
cmnb1 = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cmnb1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("GaussianNB Matrix")
plt.show()

In [None]:
input_dim = X_train.shape[1]  

model1 = Sequential([
    Dense(256, activation='relu', input_shape=(input_dim,)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(6, activation='softmax')  
])


class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))

model1.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model1.fit(X_train, y_train, validation_data=(X_test, y_test),
          epochs=50, batch_size=32, callbacks=[early_stop],
          class_weight=class_weights)


history = model1.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

y_pred = model1.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nclassification report:\n")
print(classification_report(y_test, y_pred_classes))


cmnn1 = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(8,6))
sns.heatmap(cmnn1, annot=True, fmt="d", cmap="Blues",
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("NN Matrix")
plt.show()


In [None]:
param_grid = {
    'C': [0.1, 1, 5, 10, 15],
    'gamma': ['scale', 'auto', 0.01, 0.001],
    'kernel': ['rbf', 'poly', 'sigmoid']  
}

svc = SVC()

grid_search = GridSearchCV(
    estimator=svc,
    param_grid=param_grid,
    cv=5,  
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

best_svc = grid_search.best_estimator_
y_pred_svm = best_svc.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

cmsvc1 = confusion_matrix(y_test, y_pred_svm)
plt.figure(figsize=(8,6))
sns.heatmap(cmsvc1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('SVM Confusion Matrix')
plt.show()


#Best parameters: {'C': 15, 'gamma': 0.001, 'kernel': 'rbf'}
#Best cross-validation score: 0.48437892033387664
#Test Accuracy: 0.5023505708529215

In [None]:
svm_model1 = SVC(kernel='rbf', C=15, gamma=0.001)
svm_model1.fit(X_train, y_train)

y_pred_svm = svm_model1.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

cmsvc1 = confusion_matrix(y_test, y_pred_svm)
plt.figure(figsize=(8,6))
sns.heatmap(cmsvc1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('SVM Confusion Matrix')
plt.show()

In [None]:
lda_model1 = LinearDiscriminantAnalysis()
lda_model1.fit(X_train, y_train)

y_pred_lda = lda_model1.predict(X_test)

print("accuraccy LDA:", accuracy_score(y_test, y_pred_lda))
print(classification_report(y_test, y_pred_lda))

cmlda1 = confusion_matrix(y_test, y_pred_lda)
plt.figure(figsize=(8,6))
sns.heatmap(cmlda1, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('LDA Confusion Matrix')
plt.show()


In the next segment, different data will be used — columns where the values were lists will be modified so that their value is the average of the list, as there is a possibility that dimensionality affects the model’s results. Models of the same architecture will be trained on such data, and then I will attempt to find the optimal parameters.

In [None]:
df = pd.read_csv('../../dataset/features.csv')

idx = df[df['Emotion levels'] == "X"].index
print(idx)

df = df.drop(idx)

In [None]:
df['Gender'] = df['Gender'].apply(lambda x: 1 if x == 'Male' else 0)
df = pd.get_dummies(df, columns=['Emotion levels'], drop_first=True)
dummy_cols = [col for col in df.columns if col.startswith('Emotion levels_')]

df[dummy_cols] = df[dummy_cols].astype(int)


emotion_mapping = {
    'ANG': 0,
    'DIS': 1,
    'FEA': 2,
    'HAP': 3,
    'SAD': 4,
    'NEU': 5
}

df['emotion_numeric'] = df['Emotion'].map(emotion_mapping)


print(df.head())

In [None]:
df = df.drop(columns=['Emotion'])

In [None]:
df.loc[0]

In [None]:
#all cols thah contain lists: replace the list with the list's mean value
list_columns = ['mfcc_mean', 'mfcc_delta_mean', 'mfcc_delta2_mean', 'mel_spec_db_mean']

for col in list_columns:
    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)


for col in list_columns:
    df[col] = df[col].apply(lambda x: np.mean(x) if isinstance(x, list) and len(x) > 0 else 0)

In [None]:
df.loc[0]

In [None]:
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns

numeric_cols = numeric_cols.drop('emotion_numeric') if 'emotion_numeric' in numeric_cols else numeric_cols

print("cols for standardization:", list(numeric_cols))

scaler = StandardScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

print("\ndata after the standardization process:")
print(df[numeric_cols].head())

In [None]:
df = df.drop(columns=['Location'])

In [None]:
X = df.drop(columns=['emotion_numeric'])
y = df['emotion_numeric']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Random Forest
rf2 = RandomForestClassifier(n_estimators=310, random_state=42)
rf2.fit(X_train, y_train)

# Evaluation
y_pred = rf2.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
cmrf2 = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cmrf2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Random Forest Confusion Matrix")
plt.show()

In [None]:
nb_model2 = GaussianNB()
nb_model2.fit(X_train, y_train)

y_pred = nb_model2.predict(X_test)

print("accuracy:", accuracy_score(y_test, y_pred))
print("classification report:\n", classification_report(y_test, y_pred))

In [None]:
cmnb2 = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cmnb2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("GaussianNB Matrix")
plt.show()

In [None]:
input_dim = X_train.shape[1]  

model2 = Sequential([
    Dense(256, activation='relu', input_shape=(input_dim,)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(6, activation='softmax')  
])


class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))

model2.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model2.fit(X_train, y_train, validation_data=(X_test, y_test),
          epochs=50, batch_size=32, callbacks=[early_stop],
          class_weight=class_weights)


history = model2.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

y_pred = model2.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nclassification report:\n")
print(classification_report(y_test, y_pred_classes))


cmnn2 = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(8,6))
sns.heatmap(cmnn2, annot=True, fmt="d", cmap="Blues",
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("NN Matrix")
plt.show()


In [None]:
svm_model2 = SVC(kernel='rbf', C=15, gamma=0.001)
svm_model2.fit(X_train, y_train)

y_pred_svm = svm_model2.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

cmsvc2 = confusion_matrix(y_test, y_pred_svm)
plt.figure(figsize=(8,6))
sns.heatmap(cmsvc2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('SVM Confusion Matrix')
plt.show()

In [None]:
lda_model2 = LinearDiscriminantAnalysis()
lda_model2.fit(X_train, y_train)

y_pred_lda = lda_model2.predict(X_test)

print("accuraccy LDA:", accuracy_score(y_test, y_pred_lda))
print(classification_report(y_test, y_pred_lda))

cmlda2 = confusion_matrix(y_test, y_pred_lda)
plt.figure(figsize=(8,6))
sns.heatmap(cmlda2, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('LDA Confusion Matrix')
plt.show()

Comparing results

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmrf1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("Random Forest - First Confusion Matrix")

sns.heatmap(cmrf2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("Random Forest - Second Confusion Matrix")

plt.tight_layout()
plt.show()


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmnb1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("Naive Bayes - First Confusion Matrix")

sns.heatmap(cmnb2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("Naive Bayes - Second Confusion Matrix")

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmnn1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("NN - First Confusion Matrix")

sns.heatmap(cmnn2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("NN - Second Confusion Matrix")

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmsvc1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("SVC - First Confusion Matrix")

sns.heatmap(cmsvc2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("SVC - Second Confusion Matrix")

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmlda1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("LDA - First Confusion Matrix")

sns.heatmap(cmlda2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("LDA - Second Confusion Matrix")

plt.tight_layout()
plt.show()

Finding optimal parameters for the updated dataset

In [None]:
param_grid = {
    'n_estimators': [100, 150, 200, 220, 250, 270, 300, 305, 310, 320],
}

rf2opt = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator=rf2opt,
    param_grid=param_grid,
    cv=5,  
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

best_rf2opt = grid_search.best_estimator_
y_pred = best_rf2opt.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
cmrf2opt = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cmrf2opt, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Random Forest Confusion Matrix")
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmrf2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("Random forest - First Confusion Matrix")

sns.heatmap(cmrf2opt, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("Random forest - Optimal Confusion Matrix")

plt.tight_layout()
plt.show()

In [None]:
param_grid = {
    'C': [0.1, 1, 5, 10, 15],
    'gamma': ['scale', 'auto', 0.01, 0.001],
    'kernel': ['rbf', 'poly', 'sigmoid']  
}

svc2opt = SVC()

grid_search = GridSearchCV(
    estimator=svc,
    param_grid=param_grid,
    cv=5,  
    scoring='accuracy',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

best_svc2opt = grid_search.best_estimator_
y_pred_svm = best_svc2opt.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

cmsvc2opt = confusion_matrix(y_test, y_pred_svm)
plt.figure(figsize=(8,6))
sns.heatmap(cmsvc2opt, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('SVM Confusion Matrix')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmsvc2, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("SVC - First Confusion Matrix")

sns.heatmap(cmsvc2opt, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("SVC - Optimal Confusion Matrix")

plt.tight_layout()
plt.show()

balancing emotion levels data

In [None]:
df = pd.read_csv('../../dataset/features.csv')

idx = df[df['Emotion levels'] == "X"].index
print(idx)

df = df.drop(idx)

df['Gender'] = df['Gender'].apply(lambda x: 1 if x == 'Male' else 0)
df['Emotion levels'] = df['Emotion levels'].apply(lambda x: 0 if x == 'XX' else 1)                  #specified/unspecified


emotion_mapping = {
    'ANG': 0,
    'DIS': 1,
    'FEA': 2,
    'HAP': 3,
    'SAD': 4,
    'NEU': 5
}

df['emotion_numeric'] = df['Emotion'].map(emotion_mapping)

df = df.drop(columns=['Emotion'])

list_columns = ['mfcc_mean', 'mfcc_delta_mean', 'mfcc_delta2_mean', 'mel_spec_db_mean']

for col in list_columns:
    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)


for col in list_columns:
    df[col] = df[col].apply(lambda x: np.mean(x) if isinstance(x, list) and len(x) > 0 else 0)


numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns

numeric_cols = numeric_cols.drop('emotion_numeric') if 'emotion_numeric' in numeric_cols else numeric_cols

print("cols for standardization:", list(numeric_cols))

scaler = StandardScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

print("\ndata after the standardization process:")
print(df[numeric_cols].head())

df = df.drop(columns=['Location'])

In [None]:
input_dim = X_train.shape[1]  

model3 = Sequential([
    Dense(256, activation='relu', input_shape=(input_dim,)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(6, activation='softmax')  
])


class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))

model3.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model3.fit(X_train, y_train, validation_data=(X_test, y_test),
          epochs=50, batch_size=32, callbacks=[early_stop],
          class_weight=class_weights)


history = model3.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

y_pred = model3.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nclassification report:\n")
print(classification_report(y_test, y_pred_classes))


cmnn3 = confusion_matrix(y_test, y_pred_classes)

plt.figure(figsize=(8,6))
sns.heatmap(cmnn3, annot=True, fmt="d", cmap="Blues",
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("NN Matrix")
plt.show()


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

sns.heatmap(cmnn1, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[0])
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("NN - First Confusion Matrix")

sns.heatmap(cmnn3, annot=True, fmt='d', cmap='Blues',
            xticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            yticklabels=['ANG','DIS','FEA','HAP','SAD','NEU'],
            ax=axes[1])
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("Actual")
axes[1].set_title("NN - Updated Data Confusion Matrix")

plt.tight_layout()
plt.show()