In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    log_loss,
    roc_curve,
    auc,
)
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from sklearn.svm import SVC

In [3]:
# Define the base directory for saving results
base_dir = "K_FOLD/SVM/"
os.makedirs(base_dir, exist_ok=True)  # Create the directory if it doesn't exist

In [4]:
# Load dataset
df = pd.read_csv("features.csv")  # Replace with your actual file

In [5]:
# Separate features and labels
X = df.iloc[:, 2:].values  # Feature vectors
y = df.iloc[:, 1].values   # Labels (man/woman)

In [6]:
# Encode labels to binary (0 and 1)
label_encoder = LabelBinarizer()
y = label_encoder.fit_transform(y).ravel()

In [7]:
# Standardize features (important for SVM)
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
# Initialize KFold for 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [9]:
# Initialize lists to store metrics for each fold
accuracies = []
precisions = []
recalls = []
f1_scores = []
train_losses = []
test_losses = []
fprs = []
tprs = []
auc_scores = []

In [10]:
# Perform 5-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Create and train the SVM model
    model = SVC(kernel="linear", probability=True, random_state=42)  # You can change the kernel if needed
    model.fit(X_train, y_train)

    # Predictions
    y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class
    y_pred = model.predict(X_test)  # Binary predictions

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

    # Calculate precision, recall, and F1-score
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

    # Calculate training loss (log loss)
    train_loss = log_loss(y_train, model.predict_proba(X_train)[:, 1])
    train_losses.append(train_loss)

    # Calculate test loss (log loss)
    test_loss = log_loss(y_test, y_pred_proba)
    test_losses.append(test_loss)

    # Calculate ROC curve and AUC
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    fprs.append(fpr)
    tprs.append(tpr)
    auc_scores.append(roc_auc)

    # Print results for each fold
    print(f"Fold {fold} Results:")
    print(f"Test Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"Training Loss: {train_loss:.4f}")
    print(f"Test Loss: {test_loss:.4f}")
    print(f"AUC: {roc_auc:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
    print("-" * 50)

    # Save classification report for this fold to a text file
    with open(os.path.join(base_dir, f"fold_{fold}_classification_report.txt"), "w") as f:
        f.write(f"Fold {fold} Results:\n")
        f.write(f"Test Accuracy: {accuracy:.2f}\n")
        f.write(f"Precision: {precision:.4f}\n")
        f.write(f"Recall: {recall:.4f}\n")
        f.write(f"F1-Score: {f1:.4f}\n")
        f.write(f"Training Loss: {train_loss:.4f}\n")
        f.write(f"Test Loss: {test_loss:.4f}\n")
        f.write(f"AUC: {roc_auc:.4f}\n")
        f.write("Classification Report:\n")
        f.write(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
        f.write("-" * 50 + "\n")

Fold 1 Results:
Test Accuracy: 0.89
Precision: 0.8923
Recall: 0.8744
F1-Score: 0.8832
Training Loss: 0.0126
Test Loss: 0.2833
AUC: 0.9505
Classification Report:
              precision    recall  f1-score   support

         man       0.88      0.90      0.89       202
       women       0.89      0.87      0.88       199

    accuracy                           0.89       401
   macro avg       0.89      0.89      0.89       401
weighted avg       0.89      0.89      0.89       401

--------------------------------------------------


KeyboardInterrupt: 

In [None]:
# Plot ROC Curve
plt.figure(figsize=(8, 6))
for i in range(5):
    plt.plot(fprs[i], tprs[i], lw=2, label=f"Fold {i+1} (AUC = {auc_scores[i]:.2f})")
plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (5-Fold Cross-Validation)")
plt.legend(loc="lower right")
plt.grid()
plt.savefig(os.path.join(base_dir, "roc_curve.png"))
plt.close()

In [None]:
# Calculate average metrics across all folds
avg_accuracy = np.mean(accuracies)
avg_precision = np.mean(precisions)
avg_recall = np.mean(recalls)
avg_f1 = np.mean(f1_scores)
avg_train_loss = np.mean(train_losses)
avg_test_loss = np.mean(test_losses)
avg_auc = np.mean(auc_scores)

In [None]:
# Save results to a text file
with open(os.path.join(base_dir, "K_FOLD_results.txt"), "w") as f:
    f.write("Average Results Across 5 Folds:\n")
    f.write(f"Average Test Accuracy: {avg_accuracy:.2f}\n")
    f.write(f"Average Precision: {avg_precision:.4f}\n")
    f.write(f"Average Recall: {avg_recall:.4f}\n")
    f.write(f"Average F1-Score: {avg_f1:.4f}\n")
    f.write(f"Average Training Loss: {avg_train_loss:.4f}\n")
    f.write(f"Average Test Loss: {avg_test_loss:.4f}\n")
    f.write(f"Average AUC: {avg_auc:.4f}\n")