In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import seaborn as sns
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense,BatchNormalization, Dropout
from keras.optimizers import SGD

In [2]:
dataset_path = '../../data/features_age_emotion.csv'
data = pd.read_csv(dataset_path)

In [3]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2853,2854,2855,2856,2857,2858,2859,age,gender,emotion
0,0.01709,0.024902,0.028809,0.04541,0.074707,0.100098,0.119141,0.11084,0.088867,0.067383,...,,,,,,,,51,male,anger
1,0.182129,0.256836,0.350098,0.351074,0.281738,0.240234,0.17041,0.11084,0.089844,0.068359,...,,,,,,,,51,male,anger
2,0.015625,0.029297,0.070801,0.104004,0.132324,0.149414,0.131836,0.109863,0.092773,0.07666,...,,,,,,,,51,male,anger
3,0.07666,0.11377,0.167969,0.180176,0.17334,0.166992,0.136719,0.109863,0.091309,0.075684,...,,,,,,,,51,male,anger
4,0.015137,0.031738,0.036621,0.041504,0.037109,0.026855,0.029297,0.029297,0.035645,0.067383,...,,,,,,,,51,male,disgust


In [4]:
data['age'] = pd.cut(data['age'], bins=[20, 29, 39, 49, 59, 69, 79],
                           labels=['twenties', 'thirties', 'forties', 'fifties', 'sixties', 'seventies'],
                           right=False)

In [5]:
data = data.replace(np.nan, 0)

In [6]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2853,2854,2855,2856,2857,2858,2859,age,gender,emotion
0,0.01709,0.024902,0.028809,0.04541,0.074707,0.100098,0.119141,0.11084,0.088867,0.067383,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,male,anger
1,0.182129,0.256836,0.350098,0.351074,0.281738,0.240234,0.17041,0.11084,0.089844,0.068359,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,male,anger
2,0.015625,0.029297,0.070801,0.104004,0.132324,0.149414,0.131836,0.109863,0.092773,0.07666,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,male,anger
3,0.07666,0.11377,0.167969,0.180176,0.17334,0.166992,0.136719,0.109863,0.091309,0.075684,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,male,anger
4,0.015137,0.031738,0.036621,0.041504,0.037109,0.026855,0.029297,0.029297,0.035645,0.067383,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,male,disgust


In [7]:
data['gender'] = data['gender'].replace("female",0)
data['gender'] = data['gender'].replace("male",1)

In [8]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2853,2854,2855,2856,2857,2858,2859,age,gender,emotion
0,0.01709,0.024902,0.028809,0.04541,0.074707,0.100098,0.119141,0.11084,0.088867,0.067383,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,1,anger
1,0.182129,0.256836,0.350098,0.351074,0.281738,0.240234,0.17041,0.11084,0.089844,0.068359,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,1,anger
2,0.015625,0.029297,0.070801,0.104004,0.132324,0.149414,0.131836,0.109863,0.092773,0.07666,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,1,anger
3,0.07666,0.11377,0.167969,0.180176,0.17334,0.166992,0.136719,0.109863,0.091309,0.075684,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,1,anger
4,0.015137,0.031738,0.036621,0.041504,0.037109,0.026855,0.029297,0.029297,0.035645,0.067383,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,fifties,1,disgust


In [9]:
data.emotion.value_counts()

emotion
anger           5592
happiness       5368
anxiety/fear    5360
sadness         5332
disgust         5268
neutral         4664
Name: count, dtype: int64

In [10]:
# encoded_df = pd.get_dummies(data['emotion'])
# df_encoded = pd.concat([data, encoded_df], axis=1)
# df_encoded.head()

In [11]:
# encoded_df = pd.get_dummies(data['age'])
# df_encoded = pd.concat([df_encoded, encoded_df], axis=1)
# df_encoded.head()
df_encoded = data

In [12]:
df_encoded.drop(['age','emotion'],axis=1,inplace=True)

In [13]:
df_encoded.iloc[:,2860:].head()

Unnamed: 0,gender
0,1
1,1
2,1
3,1
4,1


In [14]:
df_encoded.shape

(31584, 2861)

In [16]:
X = df_encoded.drop(['gender'], axis=1).values

In [17]:
y = df_encoded[['gender']].values

In [18]:
# X = df_encoded.iloc[:, :-13].values  # Audio features
y_gender = df_encoded.iloc[:, -1:].values  # Gender labels
# y_emotion = df_encoded.iloc[:, -12:-6].values  # Emotion labels
# y_age = df_encoded.iloc[:, -6:].values  # Age labels

In [19]:
# y_emotion

In [20]:
y_gender

array([[1],
       [1],
       [1],
       ...,
       [0],
       [0],
       [0]])

In [21]:
# y_age

In [22]:
X_train, X_val_test, y_gender_train, y_gender_val_test = train_test_split(X, y_gender, test_size=0.3, random_state=30)
X_test, X_val, y_gender_test, y_gender_val = train_test_split(X_val_test, y_gender_val_test, test_size=0.5, random_state=30)


In [23]:
input_layer = Input(shape=(X_train.shape[1],))
hidden_layer = Dense(2048, activation='relu')(input_layer)
hidden_layer = BatchNormalization()(hidden_layer)
hidden_layer = Dense(1024, activation='relu')(hidden_layer)
hidden_layer = Dense(512, activation='relu')(hidden_layer)
hidden_layer = Dense(64, activation='relu')(hidden_layer)
hidden_layer = Dropout(0.25)(hidden_layer)

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-09-07 23:47:34.043964: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-09-07 23:47:34.043994: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [24]:
gender_output = Dense(y_gender_train.shape[1], activation='sigmoid', name='gender')(hidden_layer)

In [25]:
# emotion_output = Dense(y_emotion_train.shape[1], activation='softmax', name='emotion')(hidden_layer)

In [26]:
# age_output = Dense(y_age_train.shape[1], activation='softmax', name='age')(hidden_layer)

In [27]:
model = Model(inputs=input_layer, outputs=[gender_output])


In [28]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2860)]            0         
                                                                 
 dense (Dense)               (None, 2048)              5859328   
                                                                 
 batch_normalization (BatchN  (None, 2048)             8192      
 ormalization)                                                   
                                                                 
 dense_1 (Dense)             (None, 1024)              2098176   
                                                                 
 dense_2 (Dense)             (None, 512)               524800    
                                                                 
 dense_3 (Dense)             (None, 64)                32832     
                                                             

In [29]:
opt = SGD(learning_rate=0.0005, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=opt,
              loss={'gender': 'binary_crossentropy',
                  #   'emotion': 'categorical_crossentropy',
                  #   'age': 'categorical_crossentropy'
                    },
              metrics={'gender': 'accuracy',
                  #      'emotion': 'accuracy',
                  #      'age': 'accuracy'
                       })

In [30]:
model.fit(X_train, {'gender': y_gender_train,
                    # 'emotion': y_emotion_train,
                    # 'age': y_age_train
                    },
          validation_data=(X_val, {'gender': y_gender_val,
                                    # 'emotion': y_emotion_val,
                                    # 'age': y_age_val
                                    }),
          epochs=200, batch_size=32)

Epoch 1/200


2023-09-07 23:47:34.824023: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-09-07 23:47:35.081711: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-09-07 23:47:45.215710: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

<keras.callbacks.History at 0x2dbbbb6d0>

In [31]:
model.save("../../pre-trained_models/Individual Models/gender_MLP_model.keras")

In [33]:
# Evaluate the model
losses = model.evaluate(X_test, {'gender': y_gender_test,
                                 'emotion': y_emotion_test,
                                 'age': y_age_test})
print('Gender Loss:', losses[1])
print('Emotion Loss:', losses[2])
print('Age Loss:', losses[3])
print('Gender Accuracy:', losses[4])
print('Emotion Accuracy:', losses[5])
print('Age Accuracy:', losses[6])


NameError: name 'y_emotion_test' is not defined

In [None]:
from sklearn.metrics import confusion_matrix, f1_score
import numpy as np
import matplotlib.pyplot as plt

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_gender_pred = (y_pred[0] > 0.5).astype(int)
y_emotion_pred = np.argmax(y_pred[1], axis=1)
y_age_pred = np.argmax(y_pred[2], axis=1)


In [None]:
set(y_age_pred)

In [None]:
y_gender_test.shape

In [None]:
y_gender_pred.shape

In [None]:
gender_cm = confusion_matrix(y_gender_test, y_gender_pred)
emotion_cm = confusion_matrix(np.argmax(y_emotion_test, axis=1), y_emotion_pred)
age_cm = confusion_matrix(np.argmax(y_age_test, axis=1), y_age_pred)

In [None]:
gender_f1 = f1_score(y_gender_test, y_gender_pred, average='macro')
emotion_f1 = f1_score(np.argmax(y_emotion_test, axis=1), y_emotion_pred, average='macro')
age_f1 = f1_score(np.argmax(y_age_test, axis=1), y_age_pred, average='macro')

In [None]:
labels_gender = ['Male', 'Female']
labels_emotion = ['Anger', 'Anxiety/Fear', 'Disgust', 'Happiness', 'Neutral', 'Sadness']
labels_age = ['Twenties', 'Thirties', 'Forties', 'Fifties', 'Sixties', 'Seventies']


fig, axes = plt.subplots(1, 3, figsize=(20, 6))
sns.heatmap(gender_cm, annot=True, cmap='Blues', fmt='d', ax=axes[0])
axes[0].set_xticks(np.arange(len(labels_gender)) + 0.5)
axes[0].set_yticks(np.arange(len(labels_gender)) + 0.5)
axes[0].set_xticklabels(labels_gender)
axes[0].set_yticklabels(labels_gender)
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('True')
axes[0].set_title('Gender Confusion Matrix')

sns.heatmap(emotion_cm, annot=True, cmap='Blues', fmt='d', ax=axes[1])
axes[1].set_xticks(np.arange(len(labels_emotion)) + 0.5)
axes[1].set_yticks(np.arange(len(labels_emotion)) + 0.5)
axes[1].set_xticklabels(labels_emotion, rotation=45)
axes[1].set_yticklabels(labels_emotion)
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('True')
axes[1].set_title('Emotion Confusion Matrix')

sns.heatmap(age_cm, annot=True, cmap='Blues', fmt='d', ax=axes[2])
axes[2].set_xticks(np.arange(len(labels_age)) + 0.5)
axes[2].set_yticks(np.arange(len(labels_age)) + 0.5)
axes[2].set_xticklabels(labels_age, rotation=45)
axes[2].set_yticklabels(labels_age)
axes[2].set_xlabel('Predicted')
axes[2].set_ylabel('True')
axes[2].set_title('Age Confusion Matrix')

plt.tight_layout()
plt.show()

In [None]:
print('Gender F1 Score:', gender_f1)
print('Emotion F1 Score:', emotion_f1)
print('Age F1 Score:', age_f1)