In [None]:
# ===============================
# Feature Engineering
# ===============================

import pandas as pd
import numpy as np
import socket
import struct

fraud = pd.read_csv("../data/raw/Fraud_Data.csv")
ip_map = pd.read_csv("../data/raw/IpAddress_to_Country.csv")

# Datetime conversion
fraud['signup_time'] = pd.to_datetime(fraud['signup_time'])
fraud['purchase_time'] = pd.to_datetime(fraud['purchase_time'])

# IP conversion
def ip_to_int(ip):
    return struct.unpack("!I", socket.inet_aton(ip))[0]

fraud['ip_int'] = fraud['ip_address'].apply(ip_to_int)

ip_map['lower'] = ip_map['lower_bound_ip_address'].astype(int)
ip_map['upper'] = ip_map['upper_bound_ip_address'].astype(int)
ip_map = ip_map.sort_values('lower')

# Merge IP to country
fraud = pd.merge_asof(
    fraud.sort_values('ip_int'),
    ip_map[['lower','upper','country']].sort_values('lower'),
    left_on='ip_int',
    right_on='lower',
    direction='backward'
)

fraud = fraud[fraud['ip_int'] <= fraud['upper']]

# Time features
fraud['hour'] = fraud['purchase_time'].dt.hour
fraud['day_of_week'] = fraud['purchase_time'].dt.dayofweek
fraud['is_weekend'] = fraud['day_of_week'].isin([5,6]).astype(int)

# Time since signup
fraud['time_since_signup'] = (
    fraud['purchase_time'] - fraud['signup_time']
).dt.total_seconds()

# Transaction velocity
fraud = fraud.sort_values(['user_id','purchase_time'])
fraud['tx_count_24h'] = fraud.groupby('user_id')['purchase_time'].transform(
    lambda x: x.rolling('24h').count()
)

# Device & IP risk
fraud['users_per_device'] = fraud.groupby('device_id')['user_id'].transform('nunique')
fraud['users_per_ip'] = fraud.groupby('ip_address')['user_id'].transform('nunique')

# Save processed data
fraud.to_csv("../data/processed/fraud_processed.csv", index=False)

print("Feature engineering complete.")
