In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

In [2]:
df = pd.read_csv('processed_data.csv')

In [3]:
df.head()

Unnamed: 0,loan_status,issue_d,addr_state,annual_inc,dti,emp_length,home_ownership,verification_status,loan_amnt,term,...,revol_bal,revol_util,pub_rec,pub_rec_bankruptcies,mort_acc,inq_last_6mths,target,credit_hist_len,unemployment_rate,delinq_rate
0,Fully Paid,2015-12-01,PA,55000.0,5.91,10,MORTGAGE,Not Verified,3600.0,36 months,...,2765.0,29.7,0.0,0.0,1.0,1.0,0,12.334018,5.0,2.01
1,Fully Paid,2015-12-01,SD,65000.0,16.06,10,MORTGAGE,Not Verified,24700.0,36 months,...,21470.0,19.2,0.0,0.0,4.0,4.0,0,16.0,5.0,2.01
2,Fully Paid,2015-12-01,IL,63000.0,10.78,10,MORTGAGE,Not Verified,20000.0,60 months,...,7869.0,56.2,0.0,0.0,5.0,0.0,0,15.331964,5.0,2.01
3,Fully Paid,2015-12-01,PA,104433.0,25.37,3,MORTGAGE,Source Verified,10400.0,60 months,...,21929.0,64.5,0.0,0.0,6.0,3.0,0,17.500342,5.0,2.01
4,Fully Paid,2015-12-01,GA,34000.0,10.2,4,RENT,Source Verified,11950.0,36 months,...,8822.0,68.4,0.0,0.0,0.0,0.0,0,28.167009,5.0,2.01


In [6]:
print(f"Shape: {df.shape}")
print(f"\nFICO low nulls: {df['fico_range_low'].isnull().sum()}")
print(f"FICO high nulls: {df['fico_range_high'].isnull().sum()}")

Shape: (1345309, 29)

FICO low nulls: 0
FICO high nulls: 0


In [17]:
df['fico_mid'] = (df['fico_range_low'] + df['fico_range_high']) / 2

def assign_fico_tier(score):
    if score < 700:
        return 'standard'
    elif score < 725:
        return 'good'
    elif score < 750:
        return 'very_good'
    else:
        return 'exceptional'

df['fico_tier'] = df['fico_mid'].apply(assign_fico_tier)

print(df['fico_tier'].value_counts())
print(f"\nFICO mid range: {df['fico_mid'].min()} – {df['fico_mid'].max()}")

fico_tier
standard       820806
good           289809
very_good      128094
exceptional    106600
Name: count, dtype: int64

FICO mid range: 627.0 – 847.5


In [18]:
tier_summary = df.groupby('fico_tier').agg(
    row_count=('target', 'count'),
    default_rate=('target', 'mean')
).reindex(['standard', 'good', 'very_good', 'exceptional'])

tier_summary['default_rate'] = (tier_summary['default_rate'] * 100).round(2)
print(tier_summary)

             row_count  default_rate
fico_tier                           
standard        820806         23.59
good            289809         16.88
very_good       128094         12.90
exceptional     106600          8.89


In [22]:
# Cell 5 — Build imputation table
bureau_cols = [
    'open_acc', 'total_acc', 'revol_bal', 'revol_util',
    'pub_rec', 'pub_rec_bankruptcies', 'mort_acc',
    'inq_last_6mths', 'credit_hist_len'
]

imputation_table = (
    df.groupby('fico_tier')[bureau_cols]
    .median()
    .reindex(['standard', 'good', 'very_good', 'exceptional'])
    .round(4)
    .to_dict(orient='index')
)

print(json.dumps(imputation_table, indent=2))

{
  "standard": {
    "open_acc": 10.0,
    "total_acc": 23.0,
    "revol_bal": 10865.0,
    "revol_util": 59.6,
    "pub_rec": 0.0,
    "pub_rec_bankruptcies": 0.0,
    "mort_acc": 1.0,
    "inq_last_6mths": 0.0,
    "credit_hist_len": 14.3299
  },
  "good": {
    "open_acc": 11.0,
    "total_acc": 23.0,
    "revol_bal": 13194.0,
    "revol_util": 48.3,
    "pub_rec": 0.0,
    "pub_rec_bankruptcies": 0.0,
    "mort_acc": 1.0,
    "inq_last_6mths": 0.0,
    "credit_hist_len": 15.0856
  },
  "very_good": {
    "open_acc": 11.0,
    "total_acc": 24.0,
    "revol_bal": 11909.0,
    "revol_util": 35.3,
    "pub_rec": 0.0,
    "pub_rec_bankruptcies": 0.0,
    "mort_acc": 1.0,
    "inq_last_6mths": 0.0,
    "credit_hist_len": 15.5811
  },
  "exceptional": {
    "open_acc": 11.0,
    "total_acc": 24.0,
    "revol_bal": 7422.0,
    "revol_util": 17.7,
    "pub_rec": 0.0,
    "pub_rec_bankruptcies": 0.0,
    "mort_acc": 1.0,
    "inq_last_6mths": 0.0,
    "credit_hist_len": 16.334
  }
}


In [23]:
output_path = 'fico_imputation_table.json'
with open(output_path, 'w') as f:
    json.dump(imputation_table, f, indent=2)

print(f"Saved to {output_path}")

Saved to fico_imputation_table.json
