# Feature Engineering

In [1]:
# Load datasets
import pandas as pd

fraud_df = pd.read_csv('../data/processed/fraud_with_country.csv')


In [3]:
fraud_df.head()

Unnamed: 0,user_id,signup_time,purchase_time,purchase_value,device_id,source,browser,sex,age,ip_address,class,country
0,22058,2015-02-24 22:55:49,2015-04-18 02:47:11,34,QVPSPJUOCKZAR,SEO,Chrome,M,39,732758368,0,Japan
1,333320,2015-06-07 20:39:50,2015-06-08 01:38:54,16,EOGFQPIZPYXFZ,Ads,Chrome,F,53,350311387,0,United States
2,1359,2015-01-01 18:52:44,2015-01-01 18:52:45,15,YSSKYOSJHPPLJ,SEO,Opera,M,53,2621473820,1,United States
3,150084,2015-04-28 21:13:25,2015-05-04 13:54:50,44,ATGTXKYKUDUQN,SEO,Safari,M,41,3840542443,0,Unknown
4,221365,2015-07-21 07:09:52,2015-09-09 18:40:53,39,NAUITBZFJKHWW,Ads,Safari,M,45,415583117,0,United States


🔹 1. Extract hour_of_day & day_of_week

In [4]:
# format purchase_time as datetime
fraud_df['purchase_time'] = pd.to_datetime(fraud_df['purchase_time'], format='%Y-%m-%d %H:%M:%S')

In [7]:
# format signnup_time as datetime
fraud_df['signup_time'] = pd.to_datetime(fraud_df['signup_time'], format='%Y-%m-%d %H:%M:%S')

In [5]:
fraud_df['hour_of_day'] = fraud_df['purchase_time'].dt.hour
fraud_df['day_of_week'] = fraud_df['purchase_time'].dt.dayofweek  # Monday = 0

🔹 2. Calculate time_since_signup

In [8]:
fraud_df['time_since_signup'] = (fraud_df['purchase_time'] - fraud_df['signup_time']).dt.total_seconds()

🔹 3. Transaction count per user

In [9]:
fraud_df['user_txn_count'] = fraud_df.groupby('user_id')['user_id'].transform('count')

🔹 4. Unique devices per user

In [10]:
device_counts = fraud_df.groupby('user_id')['device_id'].nunique().reset_index(name='user_device_count')
fraud_df = fraud_df.merge(device_counts, on='user_id', how='left')

💾 Save new version

In [11]:
fraud_df.to_csv('../data/processed/fraud_features.csv', index=False)

In [12]:
fraud_df.head()

Unnamed: 0,user_id,signup_time,purchase_time,purchase_value,device_id,source,browser,sex,age,ip_address,class,country,hour_of_day,day_of_week,time_since_signup,user_txn_count,user_device_count
0,22058,2015-02-24 22:55:49,2015-04-18 02:47:11,34,QVPSPJUOCKZAR,SEO,Chrome,M,39,732758368,0,Japan,2,5,4506682.0,1,1
1,333320,2015-06-07 20:39:50,2015-06-08 01:38:54,16,EOGFQPIZPYXFZ,Ads,Chrome,F,53,350311387,0,United States,1,0,17944.0,1,1
2,1359,2015-01-01 18:52:44,2015-01-01 18:52:45,15,YSSKYOSJHPPLJ,SEO,Opera,M,53,2621473820,1,United States,18,3,1.0,1,1
3,150084,2015-04-28 21:13:25,2015-05-04 13:54:50,44,ATGTXKYKUDUQN,SEO,Safari,M,41,3840542443,0,Unknown,13,0,492085.0,1,1
4,221365,2015-07-21 07:09:52,2015-09-09 18:40:53,39,NAUITBZFJKHWW,Ads,Safari,M,45,415583117,0,United States,18,2,4361461.0,1,1
