# Loading Dataset

In [1]:
# Loading Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier, LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score, f1_score

# Load the first dataset
df = pd.read_csv('/content/drive/MyDrive/AAA_datasets/thesis/creditcard_2023.csv')

# Drop the 'id' column as it's not needed
df = df.drop('id', axis=1)

# Shuffle and split the dataset into training and testing sets
X = df.drop('Class', axis=1)
y = df['Class']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Scale the 'Amount' column
scaler = StandardScaler()
X_train['Amount'] = scaler.fit_transform(X_train[['Amount']])
X_test['Amount'] = scaler.transform(X_test[['Amount']])  # Use the same scaler for the test set

### Train supervised models

In [2]:
# Train supervised models
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



### Train unsupervised models

##### LOF only supports fit_predict; it's included in test prediction step

In [3]:
# Train unsupervised models
iso_forest = IsolationForest(contamination=0.01, random_state=42, n_estimators=100)
iso_forest.fit(X_train)

# Predict on training set for each model

In [4]:
# Predict on training set for each model
xgb_probs = xgb.predict_proba(X_train)[:, 1]  # Probability of class 1 (fraud)
xgb_preds = np.where(xgb_probs >= 0.9, 1, 0)
knn_preds = knn.predict(X_train)

iso_preds = iso_forest.predict(X_train)
iso_preds = np.where(iso_preds == -1, 1, 0)

lof_test = LocalOutlierFactor(n_neighbors=20, contamination=0.01)
lof_preds_test = lof_test.fit_predict(X_train)
lof_preds = np.where(lof_preds_test == -1, 1, 0)

### Evaluate each model on the training set

In [5]:
# Evaluate each model on the training set
def evaluate_model(y_true, y_pred, model_name):
    print(f"{model_name} Evaluation:")
    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred, target_names=["Legit", "Fraud"]))
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("ROC AUC Score:", roc_auc_score(y_true, y_pred))
    print("\n")

# Evaluate each model
evaluate_model(y_train, xgb_preds, "XGBoost")
evaluate_model(y_train, knn_preds, "KNN")
evaluate_model(y_train, iso_preds, "Isolation Forest")
evaluate_model(y_train, lof_preds, "Local Outlier Factor")

XGBoost Evaluation:
[[227452      0]
 [     4 227448]]
              precision    recall  f1-score   support

       Legit       1.00      1.00      1.00    227452
       Fraud       1.00      1.00      1.00    227452

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904

Accuracy: 0.9999912069359689
ROC AUC Score: 0.9999912069359689


KNN Evaluation:
[[227091    361]
 [     1 227451]]
              precision    recall  f1-score   support

       Legit       1.00      1.00      1.00    227452
       Fraud       1.00      1.00      1.00    227452

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904

Accuracy: 0.9992042277051861
ROC AUC Score: 0.9992042277051861


Isolation Forest Evaluation:
[[227331    121]
 [223025   4427]]
              precision    recall  f1-score   

# Combine predictions using majority vote for hybrid model

In [6]:
# Combine predictions using majority vote for hybrid model
final_preds = []
for i in range(len(X_train)):
    vote_score = (
        (1 if xgb_preds[i] == 1 else 0) * 1.5 +
        (1 if knn_preds[i] == 1 else 0) * 1.5 +
        (1 if iso_preds[i] == 1 else 0) * 1 +
        (1 if lof_preds[i] == 1 else 0) * 1
    )
    final_preds.append(1 if vote_score >= 2.5 else 0)

# Evaluate results on the hybrid model
print("Hybrid Model Evaluation on Training Set:")
evaluate_model(y_train, final_preds, "Hybrid Model")

Hybrid Model Evaluation on Training Set:
Hybrid Model Evaluation:
[[227212    240]
 [     5 227447]]
              precision    recall  f1-score   support

       Legit       1.00      1.00      1.00    227452
       Fraud       1.00      1.00      1.00    227452

    accuracy                           1.00    454904
   macro avg       1.00      1.00      1.00    454904
weighted avg       1.00      1.00      1.00    454904

Accuracy: 0.9994614248280956
ROC AUC Score: 0.9994614248280957




Recommended Evaluation Metrics (Training):
1. Accuracy

Measures the overall correctness of the model. Good for balanced datasets, but may be misleading when fraud cases are rare.

2. Precision (Fraud = class 1)

Out of all transactions predicted as fraud, how many were truly fraud? Important when you want to minimize false alarms (false positives).

3. Recall (Fraud = class 1)

Out of all actual fraud cases, how many did the model correctly identify? Important when you want to catch as many frauds as possible.

4. F1-Score (Fraud = class 1)

Harmonic mean of Precision and Recall. Useful when you need a balance between false positives and false negatives.

5. ROC-AUC (Receiver Operating Characteristic – Area Under Curve)

Measures model's ability to distinguish between fraud and legit transactions across thresholds. AUC closer to 1.0 = better overall performance.

### Compare fraud rates on training dataset

In [7]:
# Compare fraud rates on training dataset
print("Fraud rate predicted by each model on training dataset:")
print("XGBoost:", sum(xgb_preds), "frauds")
print("KNN:", sum(knn_preds), "frauds")
print("Isolation Forest:", sum(iso_preds), "frauds")
print("LOF:", sum(lof_preds), "frauds")
print(f"Final hybrid fraud detection rate (train set): {sum(final_preds) / len(final_preds):.4f}")

# Macro and Weighted F1-Score Calculations for training dataset
print("Macro F1 (Train):", f1_score(y_train, final_preds, average='macro'))
print("Weighted F1 (Train):", f1_score(y_train, final_preds, average='weighted'))

Fraud rate predicted by each model on training dataset:
XGBoost: 227448 frauds
KNN: 227812 frauds
Isolation Forest: 4548 frauds
LOF: 4550 frauds
Final hybrid fraud detection rate (train set): 0.5005
Macro F1 (Train): 0.9994614246843672
Weighted F1 (Train): 0.9994614246843672


-----

# Predict using trained models on the test dataset

In [8]:
# Predict using trained models on the test dataset
xgb_probs_test = xgb.predict_proba(X_test)[:, 1]
xgb_preds_test = np.where(xgb_probs_test >= 0.9, 1, 0)
knn_preds_test = knn.predict(X_test)
iso_preds_test = np.where(iso_forest.predict(X_test) == -1, 1, 0)
lof_preds_test = np.where(lof_test.fit_predict(X_test) == -1, 1, 0)  # Use the same LOF model

### Evaluate each model on the test dataset

In [9]:
# Evaluate each model on the test dataset
print("Evaluation on Test Dataset:")
evaluate_model(y_test, xgb_preds_test, "XGBoost")
evaluate_model(y_test, knn_preds_test, "KNN")
evaluate_model(y_test, iso_preds_test, "Isolation Forest")
evaluate_model(y_test, lof_preds_test, "Local Outlier Factor")

Evaluation on Test Dataset:
XGBoost Evaluation:
[[56848    15]
 [    2 56861]]
              precision    recall  f1-score   support

       Legit       1.00      1.00      1.00     56863
       Fraud       1.00      1.00      1.00     56863

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

Accuracy: 0.9998505179114714
ROC AUC Score: 0.9998505179114714


KNN Evaluation:
[[56673   190]
 [    1 56862]]
              precision    recall  f1-score   support

       Legit       1.00      1.00      1.00     56863
       Fraud       1.00      1.00      1.00     56863

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

Accuracy: 0.9983205247700614
ROC AUC Score: 0.9983205247700614


Isolation Forest Evaluation:
[[56831    32]
 [55745  1118]]
              precision    rec

# Voting ensemble for the test dataset

In [10]:
# Voting ensemble for the test dataset
final_preds_test = []
for i in range(len(X_test)):
    vote_score = (
        (1 if xgb_preds_test[i] == 1 else 0) * 1.5 +
        (1 if knn_preds_test[i] == 1 else 0) * 1.5 +
        (1 if iso_preds_test[i] == 1 else 0) * 1 +
        (1 if lof_preds_test[i] == 1 else 0) * 1
    )
    final_preds_test.append(1 if vote_score >= 2.5 else 0)

# Evaluate on test dataset for hybrid model
print("Hybrid Model Evaluation on Test Dataset:")
evaluate_model(y_test, final_preds_test, "Hybrid Model")

Hybrid Model Evaluation on Test Dataset:
Hybrid Model Evaluation:
[[56793    70]
 [    3 56860]]
              precision    recall  f1-score   support

       Legit       1.00      1.00      1.00     56863
       Fraud       1.00      1.00      1.00     56863

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

Accuracy: 0.9993581063257303
ROC AUC Score: 0.9993581063257303




Recommended Evaluation Metrics (Training):
1. Accuracy

Measures the overall correctness of the model. Good for balanced datasets, but may be misleading when fraud cases are rare.

2. Precision (Fraud = class 1)

Out of all transactions predicted as fraud, how many were truly fraud? Important when you want to minimize false alarms (false positives).

3. Recall (Fraud = class 1)

Out of all actual fraud cases, how many did the model correctly identify? Important when you want to catch as many frauds as possible.

4. F1-Score (Fraud = class 1)

Harmonic mean of Precision and Recall. Useful when you need a balance between false positives and false negatives.

5. ROC-AUC (Receiver Operating Characteristic – Area Under Curve)

Measures model's ability to distinguish between fraud and legit transactions across thresholds. AUC closer to 1.0 = better overall performance.

### Compare fraud rates on test dataset

In [11]:
# Compare fraud rates on test dataset
print("Fraud rate predicted by each model on test dataset:")
print("XGBoost:", sum(xgb_preds_test), "frauds")
print("KNN:", sum(knn_preds_test), "frauds")
print("Isolation Forest:", sum(iso_preds_test), "frauds")
print("LOF:", sum(lof_preds_test), "frauds")
print(f"Final hybrid fraud detection rate (test set): {sum(final_preds_test) / len(final_preds_test):.4f}")

# Macro and Weighted F1-Score Calculations for test dataset
print("Macro F1 (Test):", f1_score(y_test, final_preds_test, average='macro'))
print("Weighted F1 (Test):", f1_score(y_test, final_preds_test, average='weighted'))

Fraud rate predicted by each model on test dataset:
XGBoost: 56876 frauds
KNN: 57052 frauds
Isolation Forest: 1150 frauds
LOF: 1138 frauds
Final hybrid fraud detection rate (test set): 0.5006
Macro F1 (Test): 0.9993581061029415
Weighted F1 (Test): 0.9993581061029415


-----

# Fraud Ratio Comparison Between Predicted and Actual for test dataset

In [12]:
# Fraud Ratio Comparison Between Predicted and Actual for test dataset
print("Actual fraud ratio (Test):", y_test.mean())
print("Predicted fraud ratio (Hybrid Test):", sum(final_preds_test) / len(final_preds_test))

Actual fraud ratio (Test): 0.5
Predicted fraud ratio (Hybrid Test): 0.5005891352900832
