In [None]:
import pandas as pd

# Load datasets
fraud_data = pd.read_csv('data/Fraud_Data.csv')
ip_data = pd.read_csv('data/IpAddress_to_Country.csv')

# Feature Engineering: Time-Based Features
fraud_data['signup_time'] = pd.to_datetime(fraud_data['signup_time'])
fraud_data['purchase_time'] = pd.to_datetime(fraud_data['purchase_time'])
fraud_data['time_since_signup'] = (fraud_data['purchase_time'] - fraud_data['signup_time']).dt.total_seconds() / 3600
fraud_data['hour_of_day'] = fraud_data['purchase_time'].dt.hour
fraud_data['day_of_week'] = fraud_data['purchase_time'].dt.dayofweek

# Transaction Frequency and Velocity
fraud_data['transaction_count'] = fraud_data.groupby('user_id')['user_id'].transform('count')
fraud_data['transaction_velocity'] = fraud_data.groupby('user_id')['purchase_time'].diff().dt.total_seconds() / 3600

# Geolocation: Merge with IP data
fraud_data['ip_address'] = fraud_data['ip_address'].astype(int)
ip_data['lower_bound_ip_address'] = ip_data['lower_bound_ip_address'].astype(int)
ip_data['upper_bound_ip_address'] = ip_data['upper_bound_ip_address'].astype(int)
fraud_data = fraud_data.merge(ip_data, how='left', left_on='ip_address', right_on=lambda x: (fraud_data['ip_address'].between(ip_data['lower_bound_ip_address'], ip_data['upper_bound_ip_address'])))

# Categorical Encoding
fraud_data = pd.get_dummies(fraud_data, columns=['source', 'browser', 'sex'], drop_first=True)

# Save processed data
fraud_data.to_csv('data/processed_fraud_data.csv', index=False)