In [1]:
# Cell 1: Setup and imports
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Set style for prettier plots
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

print("✅ Setup complete! Ready to build our credit approval system.")

✅ Setup complete! Ready to build our credit approval system.


In [6]:
# Cell 2: Generate sample credit data for our fictional bank
np.random.seed(42)  # For reproducibility - same data each time we run

# Number of customers in our historical data (10-15 years worth)
n_samples = 500

print("Generating historical credit data from our fictional bank...")
print(f"Creating data for {n_samples} pasts customers (2010-2024)...")

# Feature 1: Annual Income (in dollars)
# Range: $20,000 to $150,000 (realistic salary range)
annual_income = np.random.uniform(20000, 150000, n_samples)

# Feature 2: Credit Score
# Range: 300 to 850 (typical credit score range)
credit_score = np.random.uniform(300, 850, n_samples)

# Feature 3: Years Employed
# Range: 0.5 to 25 years (recent graduates to seasoned professionals)
years_employed = np.random.uniform(0.5, 25, n_samples)

# Feature 4: Current Debt (in dollars)
# Range: $0 to $80,000 (credit cards, car loans, etc.)
current_debt = np.random.uniform(0, 80000, n_samples)

print(" Generated 4 features for each customer:")
print(f" - Annual Income: ${annual_income.min():,.0f} to ${annual_income.max():,.0f}")
print(f" - Credit Score: {credit_score.min():.0f} to {credit_score.max():.0f}")
print(
    f" - Years Employed: {years_employed.min():.1f} to {years_employed.max():.1f} years"
)
print(f" - Current Debt: ${current_debt.min():,.0f} to ${current_debt.max():,.0f}")

Generating historical credit data from our fictional bank...
Creating data for 500 pasts customers (2010-2024)...
 Generated 4 features for each customer:
 - Annual Income: $20,658 to $149,085
 - Credit Score: 303 to 850
 - Years Employed: 0.6 to 25.0 years
 - Current Debt: $257 to $79,868


In [None]:
# Cell 3: Create approval decisions based on a realistic rule
print("\n Creating historical approval/denial decisions ...\n")

# Create a simple bu realistic decision rule
# Good candidates have: high income, high credit score, long employment, low debt

# Let's create a "credit worthiness score" based on these factors
# We'll normalize each factor to contribute fairly

# Income contribution (higher is better) - normalized to 0-100
income_score = (annual_income - annual_income.min()) / (annual_income.max() - annual_income.min()) * 100

# Credit sscore contribution (higher is better) - already roughly 0-100 scale
credit_score_normalized = (credit_score - 300) / (850 - 300) * 100

# Employment contribution (longer is better) - normalized to 0-100
employment_score = (years_employed - years_employed.min()) / (years_employed.max() - years_employed.min()) * 100