# Cleaned Data

In [None]:
import sys
sys.path.insert(0, '..')

from src.data_loader import load_clean_data

df = load_clean_data('../Cleaned Data/fraudClean.csv')
print(df.head().to_string())
print(df.shape)

# Prepare Training and Test Sets

In [None]:
from src.preprocessing import prepare_model_data

X_train_combined, X_test_combined, Y_train, Y_test = prepare_model_data(df)
print(X_train_combined.head().to_string())

# Anomaly Detection (IsolationForest)

In [None]:
# IsolationForest anomaly scores are already appended by prepare_model_data.
# The "isolation_forest" column in X_train_combined / X_test_combined holds the score.
print(X_train_combined["isolation_forest"].value_counts())

# Logistic Regression

In [None]:
from src.models import train_logistic_regression, evaluate_model
from src.visualization import plot_confusion_matrix

log_reg = train_logistic_regression(X_train_combined, Y_train)
log_accuracy = evaluate_model(log_reg, X_test_combined, Y_test, model_name="Logistic Regression")

Y_pred_log_reg = log_reg.predict(X_test_combined)
plot_confusion_matrix(Y_test, Y_pred_log_reg, title="Logistic Regression – Confusion Matrix")

# Decision Tree

In [None]:
from src.models import train_decision_tree

decision_tree = train_decision_tree(X_train_combined, Y_train)
tree_accuracy = evaluate_model(decision_tree, X_test_combined, Y_test, model_name="Decision Tree")

y_pred_dt = decision_tree.predict(X_test_combined)
plot_confusion_matrix(Y_test, y_pred_dt, title="Decision Tree – Confusion Matrix")

# MLP Classifier

In [None]:
from src.models import train_mlp

mlp = train_mlp(X_train_combined, Y_train)
mlp_accuracy = evaluate_model(mlp, X_test_combined, Y_test, model_name="MLP")

y_pred_mlp = mlp.predict(X_test_combined)
plot_confusion_matrix(Y_test, y_pred_mlp, title="MLP – Confusion Matrix")

# Random Forest

In [None]:
from src.models import train_random_forest

random_forest = train_random_forest(X_train_combined, Y_train)
rf_accuracy = evaluate_model(random_forest, X_test_combined, Y_test, model_name="Random Forest")

y_pred_rf = random_forest.predict(X_test_combined)
plot_confusion_matrix(Y_test, y_pred_rf, title="Random Forest – Confusion Matrix")

# Ensemble Model (DT + MLP + RF)

In [None]:
from src.models import train_ensemble

ensemble = train_ensemble(X_train_combined, Y_train, decision_tree, mlp, random_forest)
ensemble_accuracy = evaluate_model(ensemble, X_test_combined, Y_test, model_name="Ensemble")

y_pred_ensemble = ensemble.predict(X_test_combined)
plot_confusion_matrix(Y_test, y_pred_ensemble, title="Ensemble – Confusion Matrix")