## FE - Strategy - 1

In [8]:
import pandas as pd
import numpy as np

original_df = pd.read_parquet("../0 - Data/2 - Clean/clean_transactions.pq")
original_df.head(2)

Unnamed: 0,User,Card,Year,Month,Day,Amount,Use Chip,Merchant Name,Merchant City,Merchant State,...,Zipcode,Latitude,Longitude,Per Capita Income - Zipcode,Yearly Income - Person,Total Debt,FICO Score,Num Credit Cards,Datetime,Is Fraud
0,0,0,2018,1,2,130.95,Chip Transaction,5817218446178736267,La Verne,CA,...,91750,34.15,-117.76,29278.0,59696.0,127613.0,787,5,2018-01-02 06:28:00,0
1,0,0,2018,1,2,130.95,Chip Transaction,5817218446178736267,La Verne,CA,...,91750,34.15,-117.76,29278.0,59696.0,127613.0,787,5,2018-01-02 06:28:00,0


Drop Columns with Low Correlation to Fraud

In [9]:
# Columns to drop
columns_to_drop = [ "Merchant City", "Year", "Month", "Day", "Person", "Zip", "CARD INDEX", "Card Number", 
    "CVV", "Expires", "Address", "Apartment", "City", "State", "Zipcode", "Card on Dark Web"
]

df = original_df.drop(columns_to_drop, axis=1)
df.head(2)

Unnamed: 0,User,Card,Amount,Use Chip,Merchant Name,Merchant State,MCC,Errors?,Card Brand,Card Type,...,Gender,Latitude,Longitude,Per Capita Income - Zipcode,Yearly Income - Person,Total Debt,FICO Score,Num Credit Cards,Datetime,Is Fraud
0,0,0,130.95,Chip Transaction,5817218446178736267,CA,5912,0,Visa,Debit,...,Female,34.15,-117.76,29278.0,59696.0,127613.0,787,5,2018-01-02 06:28:00,0
1,0,0,130.95,Chip Transaction,5817218446178736267,CA,5912,0,Visa,Debit,...,Female,34.15,-117.76,29278.0,59696.0,127613.0,787,5,2018-01-02 06:28:00,0


Time Based Features

In [10]:
# Convert to datetime if not already done
df['Datetime'] = pd.to_datetime(df['Datetime'], errors='coerce')

# Extract hour, day of the week, and month
df['Hour'] = df['Datetime'].dt.hour
df['DayOfWeek'] = df['Datetime'].dt.dayofweek  # Monday=0, Sunday=6
df['MonthOfYear'] = df['Datetime'].dt.month

Time Since the Last Transaction

In [11]:
df = df.sort_values(['User', 'Datetime'])
df['TimeSinceLastTransaction'] = df.groupby('User')['Datetime'].diff().dt.total_seconds() / 60  # in minutes
df['TimeSinceLastTransaction'] = df['TimeSinceLastTransaction'].fillna(0)

df.head(2)

Unnamed: 0,User,Card,Amount,Use Chip,Merchant Name,Merchant State,MCC,Errors?,Card Brand,Card Type,...,Yearly Income - Person,Total Debt,FICO Score,Num Credit Cards,Datetime,Is Fraud,Hour,DayOfWeek,MonthOfYear,TimeSinceLastTransaction
6780,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,59696.0,127613.0,787,5,2018-01-01 05:36:00,0,5,0,1,0.0
6781,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,59696.0,127613.0,787,5,2018-01-01 05:36:00,0,5,0,1,0.0


AVG Transaction Amount / Week / Month

In [12]:
df['AvgTransactionAmountWeek'] = df.groupby('User')['Amount'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())
df['AvgTransactionAmountMonth'] = df.groupby('User')['Amount'].transform(lambda x: x.rolling(window=30, min_periods=1).mean())

df.head(2)

Unnamed: 0,User,Card,Amount,Use Chip,Merchant Name,Merchant State,MCC,Errors?,Card Brand,Card Type,...,FICO Score,Num Credit Cards,Datetime,Is Fraud,Hour,DayOfWeek,MonthOfYear,TimeSinceLastTransaction,AvgTransactionAmountWeek,AvgTransactionAmountMonth
6780,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,787,5,2018-01-01 05:36:00,0,5,0,1,0.0,123.97,123.97
6781,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,787,5,2018-01-01 05:36:00,0,5,0,1,0.0,123.97,123.97


Transaction Amount to Credit Card Limit Ratio

In [13]:
df['AmountToCreditLimitRatio'] = df['Amount'] / df['Credit Limit']

Debt to Incom Ratio

In [14]:
df['DebtToIncomeRatio'] = df['Total Debt'] / df['Yearly Income - Person']

Cards Usage Ratio

In [15]:
df['CardUsageRatio'] = df['Num Credit Cards'] / df['Cards Issued']

Income to Spendings Ratio / Zipcode / Person

In [16]:
df['IncomeToSpendingRatioZip'] = df['Per Capita Income - Zipcode'] / df['Amount']
df['IncomeToSpendingRatioPerson'] = df['Yearly Income - Person'] / df['Amount']

Years to Retirement

In [17]:
df['YearsToRetirement'] = df['Retirement Age'] - df['Current Age']

Account Age

In [18]:
df['Acct Open Date'] = pd.to_datetime(df['Acct Open Date'])

df['Account Age (Days)'] = (df['Datetime'] - df['Acct Open Date']).dt.days

Age Groups

In [19]:
df['Age Group'] = pd.cut(df['Current Age'], bins=[0, 25, 35, 45, 60, 100], labels=['18-25', '26-35', '36-45', '46-60', '60+'])

Retirement

In [20]:
df['Is Retired'] = df['Current Age'] >= df['Retirement Age']

df.head()

Unnamed: 0,User,Card,Amount,Use Chip,Merchant Name,Merchant State,MCC,Errors?,Card Brand,Card Type,...,AvgTransactionAmountMonth,AmountToCreditLimitRatio,DebtToIncomeRatio,CardUsageRatio,IncomeToSpendingRatioZip,IncomeToSpendingRatioPerson,YearsToRetirement,Account Age (Days),Age Group,Is Retired
6780,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,123.97,0.005103,2.137714,2.5,236.170041,481.535855,13,5601,46-60,False
6781,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,123.97,0.005643,2.137714,2.5,236.170041,481.535855,13,1371,46-60,False
6782,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Debit,...,123.97,0.002671,2.137714,2.5,236.170041,481.535855,13,5298,46-60,False
6783,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Visa,Credit,...,123.97,0.009998,2.137714,5.0,236.170041,481.535855,13,5479,46-60,False
6784,0,3,123.97,Swipe Transaction,2027553650310142703,CA,5541,0,Mastercard,Debit (Prepaid),...,123.97,4.4275,2.137714,5.0,236.170041,481.535855,13,3409,46-60,False


#### Save the Featured Data

In [21]:
df.to_parquet("../0 - Data/3 - featured/ft_strategy_1.pq")