In [2]:
# 📦 Import Libraries
import pandas as pd
import numpy as np

# 📂 Load Cleaned Data from your system
transactions = pd.read_csv(r"C:\Users\sinha\OneDrive\Documents\Projects\Bihar Hackathon\bank-fraud-detection\data\processed\cleaned_transactions.csv")

# 🧠 Feature Engineering

# Time-based features
transactions['Timestamp'] = pd.to_datetime(transactions['Timestamp'])
transactions['hour'] = transactions['Timestamp'].dt.hour
transactions['day_of_week'] = transactions['Timestamp'].dt.dayofweek
transactions['is_weekend'] = transactions['day_of_week'].isin([5, 6]).astype(int)
transactions['time_of_day'] = pd.cut(
    transactions['hour'],
    bins=[0, 6, 12, 18, 24],
    labels=['Night', 'Morning', 'Afternoon', 'Evening'],
    right=False
)

# Amount features
transactions['is_large_amount'] = (transactions['Amount (INR)'] > 10000).astype(int)

# Same bank transfer flag
transactions['same_bank_transfer'] = (transactions['sender_bank'] == transactions['receiver_bank']).astype(int)

# Frequency features
transactions.sort_values(['Sender UPI ID', 'Timestamp'], inplace=True)
transactions['prev_txn_time'] = transactions.groupby('Sender UPI ID')['Timestamp'].shift(1)
transactions['time_gap_seconds'] = (transactions['Timestamp'] - transactions['prev_txn_time']).dt.total_seconds()
transactions['time_gap_seconds'] = transactions['time_gap_seconds'].fillna(999999)

# Receiver repeat count
transactions['receiver_repeat_count'] = transactions.groupby(['Sender UPI ID', 'Receiver UPI ID']).cumcount()

# Rolling average transaction amount (per sender)
transactions['avg_txn_amt_sender'] = (
    transactions.groupby('Sender UPI ID')['Amount (INR)']
    .rolling(window=5, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

# Handle NaNs properly
numeric_cols = transactions.select_dtypes(include=[np.number]).columns
transactions[numeric_cols] = transactions[numeric_cols].fillna(0)

# Fill categorical column safely
transactions['time_of_day'] = transactions['time_of_day'].cat.add_categories(['Unknown']).fillna('Unknown')

# ✅ Save Final Feature Dataset
transactions.to_csv(r"C:\Users\sinha\OneDrive\Documents\Projects\Bihar Hackathon\bank-fraud-detection\data\processed\final_dataset.csv", index=False)

print("✅ Feature engineering complete. Saved to final_dataset.csv")


✅ Feature engineering complete. Saved to final_dataset.csv
