In [3]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    log_loss,
    roc_curve,
    auc,
)
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [4]:
# Define the base directory for saving results
base_dir = "K_FOLD/NB/"
os.makedirs(base_dir, exist_ok=True)  # Create the directory if it doesn't exist

In [5]:
# Load dataset
df = pd.read_csv("features.csv")  # Replace with your actual file

In [4]:
# Separate features and labels
X = df.iloc[:, 2:].values  # Feature vectors
y = df.iloc[:, 1].values   # Labels (man/woman)

In [5]:
# Encode labels to binary (0 and 1)
label_encoder = LabelBinarizer()
y = label_encoder.fit_transform(y).ravel()

In [6]:
# Initialize KFold for 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [7]:
# Initialize lists to store metrics for each fold
accuracies = []
precisions = []
recalls = []
f1_scores = []
train_losses = []
test_losses = []
fprs = []
tprs = []
auc_scores = []

In [8]:
# Plotting setup
plt.figure(figsize=(18, 6))

<Figure size 1800x600 with 0 Axes>

<Figure size 1800x600 with 0 Axes>

In [9]:
def create_model(input_dim):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])
    return model

In [10]:
# Perform 5-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Create and train the neural network model
    model = create_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=0)

    # Plot accuracy and loss vs. epoch for this fold
    plt.figure(figsize=(8, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history["accuracy"], label="Train Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title(f"Fold {fold}: Accuracy vs. Epoch")
    plt.legend()
    plt.grid()

    plt.subplot(1, 2, 2)
    plt.plot(history.history["loss"], label="Train Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"Fold {fold}: Loss vs. Epoch")
    plt.legend()
    plt.grid()

    plt.tight_layout()
    plt.savefig(os.path.join(base_dir, f"fold_{fold}_accuracy_loss.png"))  # Save the plot as an image
    plt.close()  # Close the plot to free up memory

    # Predictions
    y_pred_proba = model.predict(X_test).ravel()
    y_pred = (y_pred_proba > 0.5).astype(int)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

    # Calculate precision, recall, and F1-score
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

    # Calculate training loss (log loss)
    train_loss = log_loss(y_train, model.predict(X_train))
    train_losses.append(train_loss)

    # Calculate test loss (log loss)
    test_loss = log_loss(y_test, y_pred_proba)
    test_losses.append(test_loss)

    # Calculate ROC curve and AUC
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    fprs.append(fpr)
    tprs.append(tpr)
    auc_scores.append(roc_auc)

    # Print results for each fold
    print(f"Fold {fold} Results:")
    print(f"Test Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"Training Loss: {train_loss:.4f}")
    print(f"Test Loss: {test_loss:.4f}")
    print(f"AUC: {roc_auc:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
    print("-" * 50)

    # Save classification report for this fold to a text file
    with open(os.path.join(base_dir, f"fold_{fold}_classification_report.txt"), "w") as f:
        f.write(f"Fold {fold} Results:\n")
        f.write(f"Test Accuracy: {accuracy:.2f}\n")
        f.write(f"Precision: {precision:.4f}\n")
        f.write(f"Recall: {recall:.4f}\n")
        f.write(f"F1-Score: {f1:.4f}\n")
        f.write(f"Training Loss: {train_loss:.4f}\n")
        f.write(f"Test Loss: {test_loss:.4f}\n")
        f.write(f"AUC: {roc_auc:.4f}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
        f.write("-" * 50 + "\n")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Fold 1 Results:
Test Accuracy: 0.87
Precision: 0.8592
Recall: 0.8894
F1-Score: 0.8741
Training Loss: 0.0000
Test Loss: 1.0015
AUC: 0.9456
Classification Report:
              precision    recall  f1-score   support

         man       0.89      0.86      0.87       202
       women       0.86      0.89      0.87       199

    accuracy                           0.87       401
   macro avg       0.87      0.87      0.87       401
weighted avg       0.87      0.87      0.87       401

--------------------------------------------------


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Fold 2 Results:
Test Accuracy: 0.89
Precision: 0.8585
Recall: 0.9239
F1-Score: 0.8900
Training Loss: 0.0009
Test Loss: 0.9317
AUC: 0.9451
Classification Report:
              precision    recall  f1-score   support

         man       0.92      0.85      0.89       204
       women       0.86      0.92      0.89       197

    accuracy                           0.89       401
   macro avg       0.89      0.89      0.89       401
weighted avg       0.89      0.89      0.89       401

--------------------------------------------------


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Fold 3 Results:
Test Accuracy: 0.86
Precision: 0.8610
Recall: 0.8519
F1-Score: 0.8564
Training Loss: 0.0000
Test Loss: 1.4098
AUC: 0.9285
Classification Report:
              precision    recall  f1-score   support

         man       0.87      0.88      0.87       211
       women       0.86      0.85      0.86       189

    accuracy                           0.86       400
   macro avg       0.86      0.86      0.86       400
weighted avg       0.86      0.86      0.86       400

--------------------------------------------------


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Fold 4 Results:
Test Accuracy: 0.88
Precision: 0.8744
Recall: 0.8832
F1-Score: 0.8788
Training Loss: 0.0006
Test Loss: 1.6168
AUC: 0.9290
Classification Report:
              precision    recall  f1-score   support

         man       0.89      0.88      0.88       203
       women       0.87      0.88      0.88       197

    accuracy                           0.88       400
   macro avg       0.88      0.88      0.88       400
weighted avg       0.88      0.88      0.88       400

--------------------------------------------------


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Fold 5 Results:
Test Accuracy: 0.92
Precision: 0.9426
Recall: 0.8995
F1-Score: 0.9206
Training Loss: 0.0000
Test Loss: 0.8061
AUC: 0.9531
Classification Report:
              precision    recall  f1-score   support

         man       0.88      0.93      0.91       181
       women       0.94      0.90      0.92       219

    accuracy                           0.92       400
   macro avg       0.91      0.92      0.91       400
weighted avg       0.92      0.92      0.92       400

--------------------------------------------------


In [11]:
# Plot ROC Curve
plt.figure(figsize=(8, 6))
for i in range(5):
    plt.plot(fprs[i], tprs[i], lw=2, label=f"Fold {i+1} (AUC = {auc_scores[i]:.2f})")
plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (5-Fold Cross-Validation)")
plt.legend(loc="lower right")
plt.grid()
plt.savefig(os.path.join(base_dir, "roc_curve.png"))
plt.close()

In [12]:
# Calculate average metrics across all folds
avg_accuracy = np.mean(accuracies)
avg_precision = np.mean(precisions)
avg_recall = np.mean(recalls)
avg_f1 = np.mean(f1_scores)
avg_train_loss = np.mean(train_losses)
avg_test_loss = np.mean(test_losses)
avg_auc = np.mean(auc_scores)

In [1]:
# Save results to a text file
with open(os.path.join(base_dir, "K_FOLD_NB_results.txt"), "w") as f:
    f.write("Average Results Across 5 Folds:\n")
    f.write(f"Average Test Accuracy: {avg_accuracy:.2f}\n")
    f.write(f"Average Precision: {avg_precision:.4f}\n")
    f.write(f"Average Recall: {avg_recall:.4f}\n")
    f.write(f"Average F1-Score: {avg_f1:.4f}\n")
    f.write(f"Average Training Loss: {avg_train_loss:.4f}\n")
    f.write(f"Average Test Loss: {avg_test_loss:.4f}\n")
    f.write(f"Average AUC: {avg_auc:.4f}\n")

NameError: name 'os' is not defined