# Fraud Detection Analysis Notebook
## Import and Setup

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from scripts.data_processor import DataProcessor

# Initialize data processor
processor = DataProcessor()

# Load and process data
fraud_raw, ip_raw, credit_raw = processor.load_raw_data()
fraud_processed = processor.process_fraud_data(fraud_raw, ip_raw)
credit_processed = processor.process_credit_data(credit_raw)

## Data Processing Pipeline

In [None]:
# Handle missing values\nfraud_processed = fraud_processed.dropna(subset=['user_id', 'signup_time', 'purchase_time'])\nfraud_processed['device_id'] = fraud_processed['device_id'].fillna('unknown')\n\n# Clean data types\nfraud_processed['signup_time'] = pd.to_datetime(fraud_processed['signup_time'])\nfraud_processed['purchase_time'] = pd.to_datetime(fraud_processed['purchase_time'])\n\n# Merge with geolocation data\nfraud_processed['ip_address'] = fraud_processed['ip_address'].astype('int64')\nfraud_processed = fraud_processed.merge(ip_raw, how='left', left_on='ip_address', right_on='lower_bound_ip_address')\n\n# Feature engineering\nfraud_processed['transaction_velocity'] = fraud_processed.groupby('user_id')['purchase_value'].transform('count')\nfraud_processed['hour_of_day'] = fraud_processed['purchase_time'].dt.hour\nfraud_processed['day_of_week'] = fraud_processed['purchase_time'].dt.dayofweek\n\n# Normalize numerical features\nfraud_processed['scaled_purchase_value'] = (fraud_processed['purchase_value'] - fraud_processed['purchase_value'].mean()) / fraud_processed['purchase_value'].std()

## Exploratory Data Analysis

In [None]:
# Fraud data visualization
plt.figure(figsize=(12, 6))
sns.countplot(x='class', data=fraud_processed)
plt.title('Class Distribution in E-commerce Data')
plt.show()

# Credit card data visualization
plt.figure(figsize=(12, 6))
sns.boxplot(x='Class', y='scaled_amount', data=credit_processed)
plt.title('Transaction Amount Distribution by Class')
plt.show()