In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler  # Using RandomOverSampler
from collections import Counter

# Load the dataset
df = pd.read_csv("Raw_MAIN_DATASET_IMPUTATION_after_deleting_stai_badge_activity_step_goal.csv")

# Preprocess the data
df = df.drop(['id', 'date'], axis=1)

# Separate features and target
X = df.drop('Emotions', axis=1)
y = df['Emotions']

# Convert categorical target to numeric using label encoding
le = LabelEncoder()
y = le.fit_transform(y)

# Check the initial class distribution
print("Class distribution before oversampling:", Counter(y))

# Oversampling to handle class imbalance using RandomOverSampler
ros = RandomOverSampler(random_state=42)
X, y = ros.fit_resample(X, y)

# Check the class distribution after oversampling
print("Class distribution after oversampling:", Counter(y))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Feature Selection
selector = SelectKBest(f_classif, k=60)
X_train = selector.fit_transform(X_train, y_train)
X_val = selector.transform(X_val)
X_test = selector.transform(X_test)

# Define the model
model = Sequential([
    Dense(512, activation='linear', input_shape=(X_train.shape[1],)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(256, activation='linear'),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation='linear'),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(len(np.unique(y)), activation='softmax')  # Output layer (number of unique emotions)
])

# Compile the model with Adam optimizer
optimizer = Adam(learning_rate=0.00005)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, 
                    epochs=600,  # Increased epochs
                    validation_data=(X_val, y_val), 
                    batch_size=32, 
                    callbacks=[early_stopping])

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train)
val_loss, val_accuracy = model.evaluate(X_val, y_val)
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)

# Convert numeric predictions back to emotion labels
y_test_pred_labels = le.inverse_transform(y_test_pred_classes)
y_test_true_labels = le.inverse_transform(y_test)

# Calculate and print accuracy
test_acc = accuracy_score(y_test_true_labels, y_test_pred_labels)
print(f"Test Accuracy with emotion labels: {test_acc * 100:.2f}%")


2024-09-24 19:18:25.491243: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-24 19:18:25.570434: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-24 19:18:25.643910: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-24 19:18:25.665823: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-24 19:18:25.705821: I tensorflow/core/platform/cpu_feature_guar

Class distribution before oversampling: Counter({0: 5443, 3: 587, 5: 480, 4: 451, 8: 207, 7: 204, 6: 59, 1: 2, 2: 1})
Class distribution after oversampling: Counter({0: 5443, 3: 5443, 7: 5443, 8: 5443, 4: 5443, 5: 5443, 6: 5443, 1: 5443, 2: 5443})


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-09-24 19:18:33.360612: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 27ms/step - accuracy: 0.2681 - loss: 2.6107 - val_accuracy: 0.5304 - val_loss: 1.3980
Epoch 2/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 24ms/step - accuracy: 0.4305 - loss: 1.8353 - val_accuracy: 0.5820 - val_loss: 1.2252
Epoch 3/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 23ms/step - accuracy: 0.4633 - loss: 1.6448 - val_accuracy: 0.6185 - val_loss: 1.1267
Epoch 4/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 24ms/step - accuracy: 0.4952 - loss: 1.5095 - val_accuracy: 0.6415 - val_loss: 1.0612
Epoch 5/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 24ms/step - accuracy: 0.5084 - loss: 1.4209 - val_accuracy: 0.6611 - val_loss: 1.0057
Epoch 6/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - accuracy: 0.5345 - loss: 1.3287 - val_accuracy: 0.6759 - val_loss: 0.9631
Epoch 7/60

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler  # Using RandomOverSampler
from collections import Counter
from sklearn.metrics import classification_report, precision_score, f1_score, confusion_matrix

# Load the dataset
df = pd.read_csv("Raw_MAIN_DATASET_IMPUTATION_after_deleting_stai_badge_activity_step_goal.csv")

# Preprocess the data
df = df.drop(['id', 'date'], axis=1)

# Separate features and target
X = df.drop('Emotions', axis=1)
y = df['Emotions']

# Convert categorical target to numeric using label encoding
le = LabelEncoder()
y = le.fit_transform(y)

# Check the initial class distribution
print("Class distribution before oversampling:", Counter(y))

# Oversampling to handle class imbalance using RandomOverSampler
ros = RandomOverSampler(random_state=42)
X, y = ros.fit_resample(X, y)

# Check the class distribution after oversampling
print("Class distribution after oversampling:", Counter(y))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Feature Selection
selector = SelectKBest(f_classif, k=60)
X_train = selector.fit_transform(X_train, y_train)
X_val = selector.transform(X_val)
X_test = selector.transform(X_test)

# Define the model
model = Sequential([
    Dense(512, activation='linear', input_shape=(X_train.shape[1],)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(256, activation='linear'),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation='linear'),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(len(np.unique(y)), activation='softmax')  # Output layer (number of unique emotions)
])

# Compile the model with Adam optimizer
optimizer = Adam(learning_rate=0.00005)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, 
                    epochs=600,  # Increased epochs
                    validation_data=(X_val, y_val), 
                    batch_size=32, 
                    callbacks=[early_stopping])

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train)
val_loss, val_accuracy = model.evaluate(X_val, y_val)
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)

# Convert numeric predictions back to emotion labels
y_test_pred_labels = le.inverse_transform(y_test_pred_classes)
y_test_true_labels = le.inverse_transform(y_test)

# Calculate and print accuracy
test_acc = accuracy_score(y_test_true_labels, y_test_pred_labels)
print(f"Test Accuracy with emotion labels: {test_acc * 100:.2f}%")
precision = precision_score(y_test_true_labels, y_test_pred_labels, average='weighted')  # Weighted precision
f1 = f1_score(y_test_true_labels, y_test_pred_labels, average='weighted')  # Weighted F1 score

print(f"Weighted Precision: {precision * 100:.2f}%")
print(f"Weighted F1 Score: {f1 * 100:.2f}%")

# Detailed classification report
report = classification_report(y_test_true_labels, y_test_pred_labels)
print("\nClassification Report:\n", report)

# Confusion matrix to calculate TP and TN for each emotion class
conf_matrix = confusion_matrix(y_test_true_labels, y_test_pred_labels)
print("\nConfusion Matrix:\n", conf_matrix)

# Calculate TP, TN, FP, FN for each class
n_classes = len(le.classes_)
for idx, emotion in enumerate(le.classes_):
    # True Positives (TP): Diagonal elements
    TP = conf_matrix[idx, idx]

    # False Positives (FP): Sum of the current column except the diagonal element
    FP = conf_matrix[:, idx].sum() - TP

    # False Negatives (FN): Sum of the current row except the diagonal element
    FN = conf_matrix[idx, :].sum() - TP

    # True Negatives (TN): Total samples - (TP + FP + FN)
    TN = conf_matrix.sum() - (TP + FP + FN)

    print(f"Emotion: {emotion}")
    print(f"True Positives (TP): {TP}")
    print(f"True Negatives (TN): {TN}")
    print(f"False Positives (FP): {FP}")
    print(f"False Negatives (FN): {FN}")
    print("-" * 30)


2024-10-04 10:41:12.492720: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-04 10:41:12.573866: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-04 10:41:12.700507: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-04 10:41:12.724436: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-04 10:41:12.786765: I tensorflow/core/platform/cpu_feature_guar

Class distribution before oversampling: Counter({0: 5443, 3: 587, 5: 480, 4: 451, 8: 207, 7: 204, 6: 59, 1: 2, 2: 1})
Class distribution after oversampling: Counter({0: 5443, 3: 5443, 7: 5443, 8: 5443, 4: 5443, 5: 5443, 6: 5443, 1: 5443, 2: 5443})


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-10-04 10:41:19.231032: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 32ms/step - accuracy: 0.2734 - loss: 2.5893 - val_accuracy: 0.5131 - val_loss: 1.3697
Epoch 2/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 31ms/step - accuracy: 0.4249 - loss: 1.8404 - val_accuracy: 0.5773 - val_loss: 1.2007
Epoch 3/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 32ms/step - accuracy: 0.4648 - loss: 1.6500 - val_accuracy: 0.6183 - val_loss: 1.1102
Epoch 4/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 30ms/step - accuracy: 0.4975 - loss: 1.4844 - val_accuracy: 0.6401 - val_loss: 1.0499
Epoch 5/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 32ms/step - accuracy: 0.5126 - loss: 1.4009 - val_accuracy: 0.6545 - val_loss: 1.0005
Epoch 6/600
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 32ms/step - accuracy: 0.5331 - loss: 1.3305 - val_accuracy: 0.6724 - val_loss: 0.9584
Epoch 7/60