In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler  # Using RandomOverSampler
from collections import Counter

# Load the dataset
df = pd.read_csv("Raw_MAIN_DATASET_IMPUTATION_after_deleting_stai_badge_activity_step_goal.csv")

# Preprocess the data
df = df.drop(['id', 'date'], axis=1)

# Separate features and target
X = df.drop('Emotions', axis=1)
y = df['Emotions']

# Convert categorical target to numeric using label encoding
le = LabelEncoder()
y = le.fit_transform(y)

# Check the initial class distribution
print("Class distribution before oversampling:", Counter(y))

# Oversampling to handle class imbalance using RandomOverSampler
ros = RandomOverSampler(random_state=42)
X, y = ros.fit_resample(X, y)

# Check the class distribution after oversampling
print("Class distribution after oversampling:", Counter(y))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Feature Selection
selector = SelectKBest(f_classif, k=60)
X_train = selector.fit_transform(X_train, y_train)
X_val = selector.transform(X_val)
X_test = selector.transform(X_test)

# Define the model
model = Sequential([
    Dense(512, activation='linear', input_shape=(X_train.shape[1],)),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(256, activation='linear'),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation='linear'),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.4),
    Dense(len(np.unique(y)), activation='softmax')  # Output layer (number of unique emotions)
])

# Compile the model with Adam optimizer
optimizer = Adam(learning_rate=0.00005)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, 
                    epochs=400,  # Increased epochs
                    validation_data=(X_val, y_val), 
                    batch_size=32, 
                    callbacks=[early_stopping])

# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train, y_train)
val_loss, val_accuracy = model.evaluate(X_val, y_val)
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Make predictions on the test set
y_test_pred = model.predict(X_test)
y_test_pred_classes = np.argmax(y_test_pred, axis=1)

# Convert numeric predictions back to emotion labels
y_test_pred_labels = le.inverse_transform(y_test_pred_classes)
y_test_true_labels = le.inverse_transform(y_test)

# Calculate and print accuracy
test_acc = accuracy_score(y_test_true_labels, y_test_pred_labels)
print(f"Test Accuracy with emotion labels: {test_acc * 100:.2f}%")

2024-09-24 19:17:57.614164: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-24 19:17:57.691610: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-24 19:17:57.761716: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-24 19:17:57.789332: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-24 19:17:57.831786: I tensorflow/core/platform/cpu_feature_guar

Class distribution before oversampling: Counter({0: 5443, 3: 587, 5: 480, 4: 451, 8: 207, 7: 204, 6: 59, 1: 2, 2: 1})
Class distribution after oversampling: Counter({0: 5443, 3: 5443, 7: 5443, 8: 5443, 4: 5443, 5: 5443, 6: 5443, 1: 5443, 2: 5443})


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-09-24 19:18:05.590650: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/400
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 28ms/step - accuracy: 0.2635 - loss: 2.6603 - val_accuracy: 0.5227 - val_loss: 1.3624
Epoch 2/400
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 26ms/step - accuracy: 0.4179 - loss: 1.8901 - val_accuracy: 0.5791 - val_loss: 1.2079
Epoch 3/400
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 25ms/step - accuracy: 0.4629 - loss: 1.6434 - val_accuracy: 0.6078 - val_loss: 1.1276
Epoch 4/400
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 25ms/step - accuracy: 0.4866 - loss: 1.5285 - val_accuracy: 0.6295 - val_loss: 1.0671
Epoch 5/400
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - accuracy: 0.5117 - loss: 1.4161 - val_accuracy: 0.6574 - val_loss: 1.0152
Epoch 6/400
[1m980/980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - accuracy: 0.5336 - loss: 1.3340 - val_accuracy: 0.6683 - val_loss: 0.9764
Epoch 7/40