In [1]:
import os
import sys
import json
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Ensure the API folder is accessible for importing
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

from API.model.LogisticRegression import LogisticRegression


In [2]:
# Load datasets
train_path = r"..\API\data\processed\train_data.csv"
test_path = r"..\API\data\processed\test_data.csv"

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

# Split features and labels
X_train = train.drop(columns=["class"]).values
y_train = train["class"].values

X_test = test.drop(columns=["class"]).values
y_test = test["class"].values


In [3]:
param_grid = {
    "C": [0.01, 0.1, 1.0, 10.0, 100.0],
    "max_iter": [100, 300, 500, 1000]
}

best_score = 0
best_params = {}


In [11]:
for C_val in param_grid["C"]:
    for max_iter_val in param_grid["max_iter"]:
        print(f"Training with C={C_val}, max_iter={max_iter_val}...")

        # Initialize model
        model = LogisticRegression(C=C_val, max_iter=max_iter_val)
        model.train(X_train, y_train)

        # Predict
        y_pred = model.predict(X_test)

        # Evaluate
        acc = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average="weighted")
        recall = recall_score(y_test, y_pred, average="weighted")
        f1 = f1_score(y_test, y_pred, average="weighted")

        print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}\n")

        if acc > best_score:
            best_score = acc
            best_params = {"C": C_val, "max_iter": max_iter_val}


Training with C=0.01, max_iter=100...
Accuracy: 0.8855, Precision: 0.8855, Recall: 0.8855, F1: 0.8854

Training with C=0.01, max_iter=300...
Accuracy: 0.8855, Precision: 0.8855, Recall: 0.8855, F1: 0.8854

Training with C=0.01, max_iter=500...
Accuracy: 0.8855, Precision: 0.8855, Recall: 0.8855, F1: 0.8854

Training with C=0.01, max_iter=1000...
Accuracy: 0.8855, Precision: 0.8855, Recall: 0.8855, F1: 0.8854

Training with C=0.1, max_iter=100...
Accuracy: 0.8867, Precision: 0.8868, Recall: 0.8867, F1: 0.8866

Training with C=0.1, max_iter=300...
Accuracy: 0.8867, Precision: 0.8868, Recall: 0.8867, F1: 0.8866

Training with C=0.1, max_iter=500...
Accuracy: 0.8867, Precision: 0.8868, Recall: 0.8867, F1: 0.8866

Training with C=0.1, max_iter=1000...
Accuracy: 0.8867, Precision: 0.8868, Recall: 0.8867, F1: 0.8866

Training with C=1.0, max_iter=100...
Accuracy: 0.8871, Precision: 0.8872, Recall: 0.8871, F1: 0.8870

Training with C=1.0, max_iter=300...
Accuracy: 0.8871, Precision: 0.8872, Re

In [12]:
best_hparam_path = r"..\API\model\Hyperparams\logistic_regression_hparam.json"
os.makedirs(os.path.dirname(best_hparam_path), exist_ok=True)

with open(best_hparam_path, "w") as f:
    json.dump(best_params, f, indent=4)

print(f"Best hyperparameters saved to: {best_hparam_path}")
print(f"Best Parameters: {best_params} | Accuracy: {best_score:.4f}")


Best hyperparameters saved to: ..\API\model\Hyperparams\logistic_regression_hparam.json
Best Parameters: {'C': 10.0, 'max_iter': 100} | Accuracy: 0.8875


In [9]:
# Re-import the model class to ensure latest version is used
from API.model.LogisticRegression import LogisticRegression

# Initialize the model (which now uses best hyperparameters from JSON)
model = LogisticRegression()
model.train(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation metrics
from sklearn.metrics import classification_report, confusion_matrix

acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")

print(f"Final Evaluation with Best Hyperparameters:")
print(f"Test Accuracy: {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Final Evaluation with Best Hyperparameters:
Test Accuracy: 0.8871
Precision: 0.8872
Recall: 0.8871
F1 Score: 0.8870

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.86      0.88      2368
           1       0.88      0.91      0.89      2671

    accuracy                           0.89      5039
   macro avg       0.89      0.89      0.89      5039
weighted avg       0.89      0.89      0.89      5039

Confusion Matrix:
 [[2046  322]
 [ 247 2424]]
