# 🕹️ Real-Time Fraud Detection Simulation
This notebook simulates streaming fraud detection using a trained Random Forest model.

In [1]:
import pandas as pd
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# === 1. Load Cleaned Dataset ===
df = pd.read_csv('../data/paysim_cleaned.csv')

# === 2. Drop Unused Columns ===
cols_to_drop = ['nameOrig', 'nameDest', 'type']  # IDs, categorical strings
X = df.drop(['isFraud'] + cols_to_drop, axis=1)
y = df['isFraud']

# === 3. Split into Training and Simulation Sets (80% train, 20% simulate) ===
X_train, X_sim, y_train, y_sim = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Save simulation data to a new CSV
sim_data = df.loc[X_sim.index]
sim_data.to_csv('../data/simulation_data.csv', index=False)
print("📁 Saved 20% simulation data to '../data/simulation_data.csv'")

# === 4. Scale Training Data ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# === 5. Train Random Forest Model ===
rf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf.fit(X_train_scaled, y_train)

# === 6. Evaluate on Train Set (optional) ===
y_pred = rf.predict(X_train_scaled)
print("📊 Model Performance on Train Set:")
print(classification_report(y_train, y_pred))

# === 7. Save Model and Scaler ===
model_dir = '../models'
os.makedirs(model_dir, exist_ok=True)

joblib.dump(rf, os.path.join(model_dir, 'random_forest_model.pkl'))
joblib.dump(scaler, os.path.join(model_dir, 'standard_scaler.pkl'))
print("✅ Model and scaler saved in '../models/'")


📁 Saved 20% simulation data to '../data/simulation_data.csv'
📊 Model Performance on Train Set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    837946
           1       1.00      1.00      1.00       914

    accuracy                           1.00    838860
   macro avg       1.00      1.00      1.00    838860
weighted avg       1.00      1.00      1.00    838860

✅ Model and scaler saved in '../models/'


In [2]:
import os
os.chdir('c:/Users/austi/Project/Advanced-Financial-Fraud-detection/notebooks')
import pandas as pd
import time
import joblib
from sklearn.preprocessing import StandardScaler
from email.mime.text import MIMEText
import smtplib

# Set working directory if needed
# os.chdir('c:/Users/austi/Project/Advanced-Financial-Fraud-detection/notebooks')

# === 1. Load Simulation Data ===
df = pd.read_csv('../data/simulation_data.csv')

# === 2. Drop unused columns to match training ===
cols_to_drop = ['nameOrig', 'nameDest', 'type']
X_sim = df.drop(['isFraud'] + cols_to_drop, axis=1)

# === 3. Load saved scaler and model ===
scaler = joblib.load('../models/standard_scaler.pkl')
rf = joblib.load('../models/random_forest_model.pkl')

# === 4. Scale simulation features ===
X_scaled = scaler.transform(X_sim)

# === 5. Email Alert Function (update with your credentials) ===
def send_fraud_alert(transaction):
    sender = "cherianaustin16@gmail.com"
    recipient = "austincherian12@gmail.com"
    app_password = "sord qxez diaf fxcs"  # Use app password from Gmail settings

    subject = "🚨 Fraud Detected!"
    body = f"""
    A fraudulent transaction was detected:
    - Amount: {transaction.get('amount', 0)}
    - Sender: {transaction.get('nameOrig', 'N/A')}
    - Receiver: {transaction.get('nameDest', 'N/A')}
    - Type: {transaction.get('type', 'N/A')}
    """
    msg = MIMEText(body)
    msg['Subject'] = subject
    msg['From'] = sender
    msg['To'] = recipient

    try:
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()
        server.login(sender, app_password)
        server.send_message(msg)
        server.quit()
        print(f"📤 Alert sent for transaction: {transaction.get('amount')}")
    except Exception as e:
        print("❌ Email failed:", e)

# === 6. Start Simulation ===
print("▶️ Starting real-time fraud detection simulation...")

for i in range(len(df)):
    new_data = X_scaled[i].reshape(1, -1)
    prediction = rf.predict(new_data)[0]

    if prediction == 1:
        send_fraud_alert(df.iloc[i])
    else:
        print(f"✅ Transaction {i+1}: Not Fraud")

    time.sleep(1)  

print("✅ Simulation complete!")


▶️ Starting real-time fraud detection simulation...
✅ Transaction 1: Not Fraud
✅ Transaction 2: Not Fraud
✅ Transaction 3: Not Fraud
✅ Transaction 4: Not Fraud
✅ Transaction 5: Not Fraud
✅ Transaction 6: Not Fraud
✅ Transaction 7: Not Fraud
✅ Transaction 8: Not Fraud
✅ Transaction 9: Not Fraud
✅ Transaction 10: Not Fraud
✅ Transaction 11: Not Fraud
✅ Transaction 12: Not Fraud
✅ Transaction 13: Not Fraud
✅ Transaction 14: Not Fraud
✅ Transaction 15: Not Fraud
✅ Transaction 16: Not Fraud
✅ Transaction 17: Not Fraud
✅ Transaction 18: Not Fraud
✅ Transaction 19: Not Fraud
✅ Transaction 20: Not Fraud
✅ Transaction 21: Not Fraud
✅ Transaction 22: Not Fraud
✅ Transaction 23: Not Fraud
✅ Transaction 24: Not Fraud
✅ Transaction 25: Not Fraud
✅ Transaction 26: Not Fraud
✅ Transaction 27: Not Fraud
✅ Transaction 28: Not Fraud
✅ Transaction 29: Not Fraud
✅ Transaction 30: Not Fraud
✅ Transaction 31: Not Fraud
✅ Transaction 32: Not Fraud
✅ Transaction 33: Not Fraud
✅ Transaction 34: Not Fraud
✅ Tra

KeyboardInterrupt: 