Initial ideas:
- feature joining last transaction data with current one (time of purchase, location)
- additional info based on the location of the transaction - if previous purchase was made in Portugal 2h ago, is it possible that the current one is made in Poland? probably not

In [37]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [38]:
df_transactions = pd.read_json('../data/transactions.json', lines=True)
df_users = pd.read_csv('../data/users.csv')
df_merchants = pd.read_csv('../data/merchants.csv')

In [39]:
df_transactions.head(1)

Unnamed: 0,transaction_id,timestamp,user_id,merchant_id,amount,channel,currency,device,location,payment_method,is_international,session_length_seconds,is_first_time_merchant,is_fraud
0,TX000000,2022-06-17 23:28:00,U14804,M0314,130.03,in-store,EUR,Android,"{'lat': 40.057938, 'long': 14.959737}",debit_card,1,145,0,0


In [40]:
df_merchants.head(1)

Unnamed: 0,merchant_id,category,country,trust_score,number_of_alerts_last_6_months,avg_transaction_amount,account_age_months,has_fraud_history
0,M0001,travel,Austria,1.0,3,97.23,84,0


In [41]:
df_users.head(1)

Unnamed: 0,user_id,age,sex,education,primary_source_of_income,sum_of_monthly_installments,sum_of_monthly_expenses,country,signup_date,risk_score
0,U00001,56,Other,High School,Employment,477.69,243.18,Finland,2021-04-01,0.571079


## 🕒 Temporal Features

| Feature Name                                | Format             | Use-Case                                                                 |
|--------------------------------------------|--------------------|--------------------------------------------------------------------------|
| hour of day                                 | Integer (0–23)     | Captures time-of-day behavior; useful for detecting night-time fraud     |
| day of week                                 | Integer (0–6)      | Weekly behavioral patterns; certain days may have higher fraud rates     |
| is weekend                                  | Boolean            | Distinguishes weekend activity; weekends often have different behavior   |
| month of transaction                        | Integer (1–12)     | Seasonal trends; useful for detecting periodic fraud campaigns           |
| time since last transaction - user-level    | Float (seconds)    | Captures user activity frequency; useful for behavioral profiling        |
| time since last transaction - merchant-level| Float (seconds)    | Captures merchant popularity; good for modeling merchant-specific fraud  |
| time since last user-merchant interaction   | Float (seconds)    | Detects anomalies in specific user-merchant interaction patterns         |

---

## 📊 Spatial Features

In [44]:
# --------- Temporal data feature engineering

df_transactions['hour'] = df_transactions['timestamp'].dt.hour
df_transactions['day_of_week'] = df_transactions['timestamp'].dt.dayofweek  # 0=Monday
df_transactions['is_weekend'] = df_transactions['day_of_week'] >= 5
df_transactions['month'] = df_transactions['timestamp'].dt.month

# Time Since Last Transaction: User-level
df_transactions = df_transactions.sort_values(['user_id', 'timestamp'])
df_transactions['time_since_last_user_txn'] = (
    df_transactions.groupby('user_id')['timestamp']
    .diff()
    .dt.total_seconds()
)

# Time Since Last Transaction: Merchant-level
df_transactions = df_transactions.sort_values(['merchant_id', 'timestamp'])
df_transactions['time_since_last_merchant_txn'] = (
    df_transactions.groupby('merchant_id')['timestamp']
    .diff()
    .dt.total_seconds()
)

#TODO: time since last transaction: user-merchant level

In [43]:
# --------- Geo-data feature engineering
# Extract latitude and longitude into separate columns
df_transactions['latitude'] = df_transactions['location'].apply(lambda x: x['lat'])
df_transactions['longitude'] = df_transactions['location'].apply(lambda x: x['long'])
df_transactions.drop(columns=['location'], inplace=True)

### To Be Continued...

df_transactions.head()

Unnamed: 0,transaction_id,timestamp,user_id,merchant_id,amount,channel,currency,device,payment_method,is_international,...,is_first_time_merchant,is_fraud,hour,day_of_week,is_weekend,month,time_since_last_user_txn,time_since_last_merchant_txn,latitude,longitude
140364,TX140364,2022-01-02 12:10:00,U08165,M0001,30.22,in-store,EUR,Android,credit_card,1,...,1,0,12,6,True,1,,,67.627756,21.454305
193565,TX193565,2022-01-02 18:47:00,U15044,M0001,72.3,in-store,EUR,Android,mobile_payment,1,...,1,0,18,6,True,1,,23820.0,69.229369,22.964022
416618,TX416618,2022-01-04 05:25:00,U14129,M0001,92.05,online,EUR,Android,debit_card,1,...,0,0,5,1,False,1,,124680.0,40.468651,14.784945
461100,TX461100,2022-01-05 01:42:00,U10192,M0001,7.57,in-store,EUR,Android,bank_transfer,1,...,0,0,1,2,False,1,,73020.0,65.759649,-8.348409
324183,TX324183,2022-01-05 21:38:00,U12936,M0001,52.13,mobile,EUR,Android,debit_card,1,...,1,0,21,2,False,1,,71760.0,49.963953,37.133362
