In [3]:
import os
os.chdir('c:/Users/austi/Project/Advanced-Financial-Fraud-detection/notebooks')

# 📦 Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

# 🧾 1. Load cleaned dataset
df = pd.read_csv("../data/paysim_cleaned.csv")  

# 🎯 2. Encode and drop non-numeric columns
df['type'] = df['type'].astype('category').cat.codes
X = df.drop(['isFraud', 'nameOrig', 'nameDest'], axis=1)
y = df['isFraud']

# 📊 3. Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🧪 4. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# 🔁 5. Simulate new incoming data from test set
X_test_part1, X_test_new, y_test_part1, y_test_new = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# 🧠 6. Train initial logistic regression model
log_reg_base = LogisticRegression(max_iter=1000, random_state=42)
log_reg_base.fit(X_train, y_train)
y_pred_base = log_reg_base.predict(X_test_part1)

print("📉 Baseline Logistic Regression Performance:")
print(classification_report(y_test_part1, y_pred_base, digits=4))
print("ROC AUC:", roc_auc_score(y_test_part1, y_pred_base))

# 🔄 7. Adaptive learning - retrain with new data
X_adapted = pd.concat([pd.DataFrame(X_train), pd.DataFrame(X_test_new)], axis=0)
y_adapted = pd.concat([y_train, y_test_new], axis=0)

log_reg_adapted = LogisticRegression(max_iter=1000, random_state=42)
log_reg_adapted.fit(X_adapted, y_adapted)
y_pred_adapted = log_reg_adapted.predict(X_test_part1)

print("\n📈 Adapted Logistic Regression Performance:")
print(classification_report(y_test_part1, y_pred_adapted, digits=4))
print("ROC AUC:", roc_auc_score(y_test_part1, y_pred_adapted))


📉 Baseline Logistic Regression Performance:
              precision    recall  f1-score   support

           0     0.9990    1.0000    0.9995    157101
           1     1.0000    0.1405    0.2464       185

    accuracy                         0.9990    157286
   macro avg     0.9995    0.5703    0.6230    157286
weighted avg     0.9990    0.9990    0.9986    157286

ROC AUC: 0.5702702702702702

📈 Adapted Logistic Regression Performance:
              precision    recall  f1-score   support

           0     0.9992    1.0000    0.9996    157101
           1     0.9016    0.2973    0.4472       185

    accuracy                         0.9991    157286
   macro avg     0.9504    0.6486    0.7234    157286
weighted avg     0.9991    0.9991    0.9989    157286

ROC AUC: 0.648629552653079
