In [5]:
# 📦 Import Libraries
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder
import joblib

# 📂 Load Final Feature Data
df = pd.read_csv(r"C:\Users\sinha\OneDrive\Documents\Projects\Bihar Hackathon\bank-fraud-detection\data\processed\final_dataset.csv")

# 🎯 Select Features for Training
features = [
    'Amount (INR)', 'hour', 'day_of_week', 'is_weekend', 'is_large_amount',
    'same_bank_transfer', 'time_gap_seconds', 'receiver_repeat_count', 'avg_txn_amt_sender'
]

# 🔢 Encode Categorical Variables
label_cols = ['sender_bank', 'receiver_bank', 'time_of_day']
for col in label_cols:
    if df[col].dtype.name == 'category':
        df[col] = df[col].astype(str)
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    features.append(col)

# 🏗️ Train Isolation Forest (Unsupervised Anomaly Detection)
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
model.fit(df[features])

# 🚨 Predict Anomalies
df['is_fraud'] = model.predict(df[features])
df['is_fraud'] = df['is_fraud'].map({1: 0, -1: 1})  # 1 = fraud, 0 = normal

# 🗂️ Ensure Output Directories Exist
report_path = r"C:\Users\sinha\OneDrive\Documents\Projects\Bihar Hackathon\bank-fraud-detection\reports"
model_path = r"C:\Users\sinha\OneDrive\Documents\Projects\Bihar Hackathon\bank-fraud-detection\models"

os.makedirs(report_path, exist_ok=True)
os.makedirs(model_path, exist_ok=True)

# 📝 Save Fraud Report
fraud_df = df[df['is_fraud'] == 1]
fraud_df.to_csv(os.path.join(report_path, "fraud_report.csv"), index=False)

# 💾 Save Model
joblib.dump(model, os.path.join(model_path, "fraud_model.pkl"))

print("✅ Model training complete. Fraud report and model saved.")


✅ Model training complete. Fraud report and model saved.


