# Credit Card Fraud Detection – Feature Engineering

# 1. Import Required Libraries¶

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


# 2. Load Preprocessed Data

In [2]:
X_train = pd.read_csv('../data/preprocessed/X_train.csv')
X_test = pd.read_csv('../data/preprocessed/X_test.csv')
y_train = pd.read_csv('../data/preprocessed/y_train.csv')
y_test = pd.read_csv('../data/preprocessed/y_test.csv')

print(X_train.shape, X_test.shape)


(226980, 30) (56746, 30)


# 3. Review Current Features

In [3]:
X_train.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount_scaled,Hour
0,2.238954,-1.724499,-2.151484,-2.577803,0.993668,3.565492,-1.785957,0.860122,-1.264003,1.567867,...,-0.149574,-0.049333,0.278442,0.684735,-0.219028,-0.159167,0.03792,-0.049932,0.137472,16.0
1,-1.315062,1.630783,0.597001,-0.038359,-0.40458,-0.965712,0.212249,0.735381,-1.267926,-0.482635,...,-0.238898,-0.946773,0.323904,0.515632,-0.713,-0.266503,-0.017794,0.051058,-0.209119,22.0
2,1.908801,0.021184,-2.087997,0.12931,1.161468,0.605244,-0.022371,0.180296,0.283819,-0.497766,...,0.293609,1.095842,-0.044874,-1.689517,0.106098,0.007758,0.045164,-0.053068,-0.098808,11.0
3,1.811257,0.316556,0.316751,3.880231,0.048454,1.020163,-0.734868,0.233651,0.681423,1.146705,...,0.138869,0.700422,0.174064,0.702997,-0.212523,-0.010018,-0.01774,-0.038006,-0.066242,1.0
4,1.358817,-1.120881,0.550266,-1.547659,-1.19495,0.275448,-1.201843,0.212889,-2.094285,1.492821,...,-0.340972,-0.636442,0.252758,-0.34416,-0.064282,-0.439622,0.062524,0.013095,0.026608,8.0


# 4. Interaction Features (High-Risk Feature Interactions)

In [4]:
X_train['V14_V17_interaction'] = X_train['V14'] * X_train['V17']
X_test['V14_V17_interaction'] = X_test['V14'] * X_test['V17']


In [5]:
X_train['V14_V17_interaction']

0        -0.119590
1        -0.228902
2        -0.546972
3        -0.589048
4        -0.035154
            ...   
226975   -0.022001
226976    0.004638
226977    0.136498
226978    0.022490
226979   -0.124266
Name: V14_V17_interaction, Length: 226980, dtype: float64

In [6]:
X_test['V14_V17_interaction']

0       -0.010055
1       -0.317451
2        1.155403
3       -0.117355
4       -1.464542
           ...   
56741   -0.187663
56742   -0.546461
56743    0.043213
56744   -0.042556
56745    0.323842
Name: V14_V17_interaction, Length: 56746, dtype: float64

# 5. Transaction Amount Risk Bucketing

In [7]:
def amount_risk_bucket(amount):
    if amount < 10:
        return 0
    elif amount < 100:
        return 1
    elif amount < 500:
        return 2
    else:
        return 3

X_train['Amount_risk_bucket'] = X_train['Amount_scaled'].apply(amount_risk_bucket)
X_test['Amount_risk_bucket'] = X_test['Amount_scaled'].apply(amount_risk_bucket)


In [8]:
X_train['Amount_risk_bucket'] 

0         0
1         0
2         0
3         0
4         0
         ..
226975    0
226976    0
226977    0
226978    0
226979    0
Name: Amount_risk_bucket, Length: 226980, dtype: int64

In [9]:
X_test['Amount_risk_bucket']

0        0
1        0
2        0
3        0
4        0
        ..
56741    0
56742    0
56743    0
56744    0
56745    0
Name: Amount_risk_bucket, Length: 56746, dtype: int64

# 6. Log Transformation for Stability

In [10]:
X_train['Amount_log'] = np.log1p(X_train['Amount_scaled'])
X_test['Amount_log'] = np.log1p(X_test['Amount_scaled'])


In [11]:
X_train['Amount_log']

0         0.128809
1        -0.234607
2        -0.104037
3        -0.068538
4         0.026260
            ...   
226975    0.880629
226976    1.463733
226977    0.187941
226978   -0.270099
226979   -0.145481
Name: Amount_log, Length: 226980, dtype: float64

In [12]:
X_test['Amount_log']

0       -0.158549
1       -0.327258
2        0.672424
3        1.349023
4       -0.337498
           ...   
56741   -0.326105
56742   -0.183375
56743   -0.197627
56744   -0.347649
56745   -0.088777
Name: Amount_log, Length: 56746, dtype: float64

# 7. Statistical Aggregation Features (Row-Wise)

In [13]:
pca_cols = [col for col in X_train.columns if col.startswith('V')]

X_train['pca_mean'] = X_train[pca_cols].mean(axis=1)
X_train['pca_std'] = X_train[pca_cols].std(axis=1)

X_test['pca_mean'] = X_test[pca_cols].mean(axis=1)
X_test['pca_std'] = X_test[pca_cols].std(axis=1)


# 8. Feature Scaling (Newly Created Features)

In [14]:
scaler = StandardScaler()

new_features = [
    'V14_V17_interaction',
    'Amount_log',
    'pca_mean',
    'pca_std'
]

X_train[new_features] = scaler.fit_transform(X_train[new_features])
X_test[new_features] = scaler.transform(X_test[new_features])


# 9. Final Feature Validation

In [15]:
X_train.isnull().sum().sum(), X_test.isnull().sum().sum()

(0, 0)

# 10. Save Feature-Engineered Data

In [16]:
X_train.to_csv('../data/preprocessed/X_train_fe.csv', index=False)
X_test.to_csv('../data/preprocessed/X_test_fe.csv', index=False)

# What This Feature Engineering Notebook Achieves