In [None]:
import sys
sys.path.append('../scripts')

from scripts.load_data import load_data
from scripts.feature_engineering import create_aggregate_features, extract_temporal_features, encode_categorical, handle_missing_values, scale_features
import scorecardpy as sc


In [None]:

# Load the data
data = load_data('../data/data.csv')


In [None]:

# Step 1: Create aggregate features
agg_features = create_aggregate_features(data)
print("Aggregated features:")
print(agg_features.head())


In [None]:

# Step 2: Extract temporal features
data = extract_temporal_features(data)
print("Data with extracted temporal features:")
print(data[['TransactionStartTime', 'transaction_hour', 'transaction_day']].head())


In [None]:

# Step 3: Handle missing values and encode columns as necessary
data['ProductCategory'] = data['ProductCategory'].fillna('Other')
data['ChannelId'] = data['ChannelId'].fillna('Other')


In [None]:

# Step 4: Perform WoE and IV calculation using ScorecardPy
# Split the data into training and testing sets (70/30 split)
train, test = sc.split_df(data, 'FraudResult', ratio=0.7, seed=999).values()

# Perform WoE binning
categorical_columns = ['ProductCategory', 'ChannelId']
woe_bins = sc.woebin(train, y='FraudResult', x=categorical_columns)


In [None]:

# Step 5: Apply WoE transformation on both train and test sets
train_woe = sc.woebin_ply(train, woe_bins)
test_woe = sc.woebin_ply(test, woe_bins)


In [None]:

# Step 6: View transformed data and IV values
print("WoE-transformed training data:")
print(train_woe.head())

print("Information Value of features:")
iv_values = sc.iv(train_woe, y='FraudResult')
print(iv_values)

