In [1]:
"""
Logistic Regression - TensorFlow/Keras Implementation
Using Keras Sequential API for high-level model building.
"""

import numpy as np
import json
import sys
sys.path.append('../..')

# TensorFlow/Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


# Self created utilities
from utils.metrics import accuracy, precision, recall, f1_score, auc_score
from utils.performance import track_performance
from utils.visualization import (
    plot_cost_curve,
    plot_confusion_matrix,
    plot_roc_curve,
    plot_feature_importance
)

# Set random seed for reproducibility
tf.random.set_seed(113)

# Load preprocessed data (already scaled, SMOTE applied, 50/50 balanced)
X_train = np.load('../../data/processed/logistic_regression/X_train.npy')
X_test = np.load('../../data/processed/logistic_regression/X_test.npy')
y_train = np.load('../../data/processed/logistic_regression/y_train.npy')
y_test = np.load('../../data/processed/logistic_regression/y_test.npy')

# Load metadata for feature names
with open('../../data/processed/logistic_regression/preprocessing_info.json') as f:
    meta = json.load(f)
feature_names = meta['feature_names']

print(f"Training: {X_train.shape[0]:,} samples, {X_train.shape[1]} features")
print(f"Test: {X_test.shape[0]:,} samples")
print(f"Class balance - Train: {np.mean(y_train):.1%} fraud")
print(f"Class balance - Test: {np.mean(y_test):.1%} fraud")

Training: 454,902 samples, 30 features
Test: 56,962 samples
Class balance - Train: 50.0% fraud
Class balance - Test: 0.2% fraud


In [None]:
# Model Definition

# Keras Sequential API - simplest way to build a neural network
# Input layer + Dense layer with sigmoid activation = logistic regression
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(30,)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile: specify optimizer, loss, and metrics
# - SGD with lr=0.1 to match other implementations
# - binary_crossentropy is the standard loss for binary classification
model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.1),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Model Summary
model.summary()

# Training

# Train with performance tracking
with track_performance() as perf:
    history = model.fit(
        X_train, y_train,
        epochs=1000,
        batch_size=len(X_train),    # Full batch gradient descent (matches other implementations)
        verbose=0   # Supress output, printing summary after
    )

print(f"\nTraining complete!")
print(f"Time: {perf['time']:.2f} sec | Memory: {perf['memory']:.2f} MB")
print(f"Final loss: {history.history['loss'][-1]:.6f}")


Training complete!
Time: 52.61 sec | Memory: 108.13 MB
Final loss: 0.057443
