In [1]:
import pandas as pd

In [2]:
# Load enriched dataset
df = pd.read_csv("../data/Fraud_Data_With_Country.csv")

In [3]:
# Convert time columns to datetime
df['signup_time'] = pd.to_datetime(df['signup_time'])
df['purchase_time'] = pd.to_datetime(df['purchase_time'])

# --- Feature 1: Transaction frequency per user ---
df['user_txn_count'] = df.groupby('user_id')['user_id'].transform('count')

# --- Feature 2: Time difference between signup and purchase (velocity) ---
df['txn_time_sec'] = (df['purchase_time'] - df['signup_time']).dt.total_seconds()

# --- Feature 3: User's average transaction time ---
df['user_avg_txn_time'] = df.groupby('user_id')['txn_time_sec'].transform('mean')

# --- Feature 4: Hour of transaction ---
df['hour_of_day'] = df['purchase_time'].dt.hour

# --- Feature 5: Day of the week ---
df['day_of_week'] = df['purchase_time'].dt.dayofweek

# --- Feature 6: Time since signup (alias of txn_time_sec) ---
df['time_since_signup'] = df['txn_time_sec']



In [4]:

# Save the new dataset
df.to_csv("../data/feature_engineered_data.csv", index=False)
print("✅ Feature-engineered dataset saved to: ../data/feature_engineered_data.csv")

✅ Feature-engineered dataset saved to: ../data/feature_engineered_data.csv


In [6]:
#display columns and first 5 rows  ../data/feature_engineered_data.csv"
print("Columns in the feature-engineered dataset:")
print(df.columns.tolist())
print("\nFirst 5 rows of the feature-engineered dataset:")
print(df.head())
# Display the shape of the dataset
print("\nShape of the feature-engineered dataset:")




Columns in the feature-engineered dataset:
['user_id', 'signup_time', 'purchase_time', 'purchase_value', 'device_id', 'source', 'browser', 'sex', 'age', 'ip_address', 'class', 'ip_address_int', 'country', 'user_txn_count', 'txn_time_sec', 'user_avg_txn_time', 'hour_of_day', 'day_of_week', 'time_since_signup']

First 5 rows of the feature-engineered dataset:
   user_id         signup_time       purchase_time  purchase_value  \
0    22058 2015-02-24 22:55:49 2015-04-18 02:47:11              34   
1   333320 2015-06-07 20:39:50 2015-06-08 01:38:54              16   
2     1359 2015-01-01 18:52:44 2015-01-01 18:52:45              15   
3   150084 2015-04-28 21:13:25 2015-05-04 13:54:50              44   
4   221365 2015-07-21 07:09:52 2015-09-09 18:40:53              39   

       device_id source browser sex  age    ip_address  class  ip_address_int  \
0  QVPSPJUOCKZAR    SEO  Chrome   M   39  7.327584e+08      0       732758368   
1  EOGFQPIZPYXFZ    Ads  Chrome   F   53  3.503114e+08   