# Feature Engineering

This notebook focuses on creating meaningful, robust, and interpretable
features for credit risk modeling. The features are designed using
domain knowledge, ratio-based metrics, interaction terms, and transformations,
while avoiding proxies for protected attributes.


In [1]:
import pandas as pd
import numpy as np

# Load cleaned data from EDA step
df = pd.read_csv("../data/raw/german_credit.csv")

# Drop index column if present
if "Unnamed: 0" in df.columns:
    df = df.drop(columns=["Unnamed: 0"])


In [2]:
# 1. Credit Amount per Month
df["credit_amount_per_month"] = df["Credit amount"] / df["Duration"]

# 2. Credit Amount per Age
df["credit_amount_per_age"] = df["Credit amount"] / df["Age"]


In [3]:
# 3. Age × Duration interaction
df["age_duration_interaction"] = df["Age"] * df["Duration"]

# 4. Job × Credit Amount interaction
df["job_credit_interaction"] = df["Job"] * df["Credit amount"]


In [4]:
# 5. Log transformation of Credit Amount
df["log_credit_amount"] = np.log1p(df["Credit amount"])

# 6. Square root transformation of Duration
df["sqrt_duration"] = np.sqrt(df["Duration"])


In [5]:
# Encode Saving Accounts (ordinal mapping)
saving_map = {"little": 1, "moderate": 2, "quite rich": 3, "rich": 4}
df["saving_account_score"] = df["Saving accounts"].map(saving_map)

# Encode Checking Accounts
checking_map = {"little": 1, "moderate": 2, "rich": 3}
df["checking_account_score"] = df["Checking account"].map(checking_map)


In [6]:
# 9. High credit flag
df["high_credit_flag"] = np.where(
    df["Credit amount"] > df["Credit amount"].median(), 1, 0
)

# 10. Long duration flag
df["long_duration_flag"] = np.where(
    df["Duration"] > df["Duration"].median(), 1, 0
)


In [7]:
df.head()


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,credit_amount_per_month,credit_amount_per_age,age_duration_interaction,job_credit_interaction,log_credit_amount,sqrt_duration,saving_account_score,checking_account_score,high_credit_flag,long_duration_flag
0,67,male,2,own,,little,1169,6,radio/TV,194.833333,17.447761,402,2338,7.064759,2.44949,,1.0,0,0
1,22,female,2,own,little,moderate,5951,48,radio/TV,123.979167,270.5,1056,11902,8.691483,6.928203,1.0,2.0,1,1
2,49,male,1,own,little,,2096,12,education,174.666667,42.77551,588,2096,7.648263,3.464102,1.0,,0,0
3,45,male,2,free,little,little,7882,42,furniture/equipment,187.666667,175.155556,1890,15764,8.972464,6.480741,1.0,1.0,1,1
4,53,male,2,free,little,little,4870,24,car,202.916667,91.886792,1272,9740,8.491055,4.898979,1.0,1.0,1,1
