In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the processed data from your pipeline
#
try:
    X_train = pd.read_csv('X_train_final.csv')
    y_train = pd.read_csv('y_train_final.csv')
    X_test = pd.read_csv('X_test_final.csv')
    y_test = pd.read_csv('y_test_final.csv')

    print("Data loaded successfully!")
except FileNotFoundError:
    print("Error: Make sure the files 'X_train_final.csv', 'y_train_final.csv',")
    print("'X_test_final.csv', and 'y_test_final.csv' are in the same directory.")

# Get the number of features (input dimensions) for the model
# This is crucial for building the first layer
input_dim = X_train.shape[1]
print(f"Number of features (input dimension): {input_dim}")

Data loaded successfully!
Number of features (input dimension): 12


In [2]:
# Initialize the Sequential model
model = Sequential()

# Add the Input and First Hidden Layer
# 'input_shape' must match the number of features from Step 1
model.add(Dense(units=64, activation='relu', input_shape=(input_dim,)))
model.add(Dropout(0.3))  # Dropout helps prevent overfitting

# Add a Second Hidden Layer
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))

# Add the Output Layer
# We use 1 unit and 'sigmoid' activation for binary classification
# (it will output a probability between 0 and 1)
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
# 'adam' is an efficient optimizer
# 'binary_crossentropy' is the standard loss function for 2-class problems
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy']) # Monitor accuracy as required

# Print a summary of the model's architecture
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
# Train the model
# 'epochs' is the number of times to go through the entire dataset
# 'batch_size' is the number of samples to process before updating weights
print("\nStarting model training...")

history = model.fit(
    X_train,
    y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_test, y_test)  # Use test data for validation
)

print("Model training complete!")


Starting model training...
Epoch 1/50
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7656 - loss: 0.4806 - val_accuracy: 0.8334 - val_loss: 0.3815
Epoch 2/50
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8460 - loss: 0.3689 - val_accuracy: 0.8339 - val_loss: 0.3771
Epoch 3/50
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8460 - loss: 0.3604 - val_accuracy: 0.8326 - val_loss: 0.3762
Epoch 4/50
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8436 - loss: 0.3691 - val_accuracy: 0.8343 - val_loss: 0.3697
Epoch 5/50
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8493 - loss: 0.3588 - val_accuracy: 0.8335 - val_loss: 0.3735
Epoch 6/50
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8510 - loss: 0.3581 - val_accuracy: 0.8334 - val_loss: 0.3704


In [4]:
# 1. Get predictions (as probabilities)
y_pred_probs = model.predict(X_test)

# 2. Convert probabilities to binary class labels (0 or 1)
y_pred = (y_pred_probs > 0.5).astype(int)

# 3. Calculate and print the required metrics
print("\n--- Model Evaluation Results ---")

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification Report (includes Precision, Recall, F1-Score)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Depressed (0)', 'Depressed (1)']))

[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

--- Model Evaluation Results ---
Accuracy: 82.24%

Confusion Matrix:
[[1692  651]
 [ 340 2898]]

Classification Report:
                   precision    recall  f1-score   support

Not Depressed (0)       0.83      0.72      0.77      2343
    Depressed (1)       0.82      0.89      0.85      3238

         accuracy                           0.82      5581
        macro avg       0.82      0.81      0.81      5581
     weighted avg       0.82      0.82      0.82      5581

