In [2]:
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Read the CSV file
clean_loan_data = pd.read_csv("/Users/faisaldernawi/Desktop/Training_And_Development/GW_Bootcamp/Homework/Project_4/Loan_Approval_Classification-/Resources/clean_loan_data.csv")  

# 1. Create a New Feature: Debt-to-Income Ratio
clean_loan_data["debt_to_income_ratio"] = clean_loan_data["loan_amnt"] / clean_loan_data["person_income"]

# 2. Convert Categorical Features to Numerical (One-Hot Encoding)
clean_loan_data = pd.get_dummies(clean_loan_data, columns=["person_home_ownership", "loan_intent", "person_gender", "person_education"], drop_first=True)

# 3. Label Encode a Binary Column (Previous Defaults Yes/No)
clean_loan_data["previous_loan_defaults_on_file"] = clean_loan_data["previous_loan_defaults_on_file"].map({"Yes": 1, "No": 0})

# 4. Categorize Credit Score Ranges
bins = [300, 580, 670, 740, 800, 850]
labels = ["Poor", "Fair", "Good", "Very Good", "Excellent"]
clean_loan_data["credit_score_category"] = pd.cut(clean_loan_data["credit_score"], bins=bins, labels=labels)

# 5. Log Transform Loan Amount to Reduce Skewness
clean_loan_data["log_loan_amnt"] = np.log1p(clean_loan_data["loan_amnt"])

# 6. Extract Yearly Interest Payment
clean_loan_data["yearly_interest_payment"] = (clean_loan_data["loan_amnt"] * clean_loan_data["loan_int_rate"]) / 100

# 7. Create a Loan Approval Indicator
clean_loan_data["loan_approved"] = clean_loan_data["loan_status"].apply(lambda x: 1 if x == 0 else 0)

# 8. Calculate Employment-to-Age Ratio
clean_loan_data["emp_age_ratio"] = clean_loan_data["person_emp_exp"] / clean_loan_data["person_age"]

# 9. Bucket Loan Amount into Categories
bins = [0, 5000, 15000, 30000, 50000, 100000]
labels = ["Very Low", "Low", "Medium", "High", "Very High"]
clean_loan_data["loan_amount_category"] = pd.cut(clean_loan_data["loan_amnt"], bins=bins, labels=labels)

# 10. Compute Credit Utilization Ratio
clean_loan_data["credit_utilization"] = clean_loan_data["loan_amnt"] / (clean_loan_data["credit_score"] * 10)

# 11. Create an Interaction Feature Between Loan Amount and Interest Rate
clean_loan_data["loan_amount_interest_interaction"] = clean_loan_data["loan_amnt"] * clean_loan_data["loan_int_rate"]

# 12. Compute Loan Tenure Using Credit History Length
clean_loan_data["loan_tenure_estimate"] = clean_loan_data["cb_person_cred_hist_length"] * 12

# 13. Standardize Income Feature
scaler = StandardScaler()
clean_loan_data["scaled_income"] = scaler.fit_transform(clean_loan_data[["person_income"]])

# 14. Compute Interest-to-Income Ratio
clean_loan_data["interest_income_ratio"] = clean_loan_data["yearly_interest_payment"] / clean_loan_data["person_income"]

# 15. Check If Borrower is a Young Professional (Age < 30 & Income > 40K)
clean_loan_data["young_professional"] = ((clean_loan_data["person_age"] < 30) & (clean_loan_data["person_income"] > 40000)).astype(int)
