In [3]:
# --- FINAL NOTEBOOK 3 BLOCK ---

# Step 1: Upload zips
from google.colab import files
uploaded = files.upload()  # select both models.zip and full_data.zip

# Step 2: Unzip models.zip
import zipfile, os
models_zip = [f for f in uploaded.keys() if "models" in f][0]
with zipfile.ZipFile(models_zip, 'r') as zip_ref:
    zip_ref.extractall("models")

# Step 3: Unzip full_data.zip
data_zip = [f for f in uploaded.keys() if "data" in f][0]
with zipfile.ZipFile(data_zip, 'r') as zip_ref:
    zip_ref.extractall("data")

# Step 4: Find CSV files inside data folder
import glob
csv_files = glob.glob("data/**/*.csv", recursive=True)

# Step 5: Load datasets dynamically
import pandas as pd
X_train = pd.read_csv([f for f in csv_files if "X_train" in f][0])
X_test  = pd.read_csv([f for f in csv_files if "X_test" in f][0])
y_train = pd.read_csv([f for f in csv_files if "y_train" in f][0])
y_test  = pd.read_csv([f for f in csv_files if "y_test" in f][0])

# Step 6: Align X_test with training features
feature_cols = list(X_train.columns)
X_test_aligned = X_test.reindex(columns=feature_cols, fill_value=0)

# Step 7: Load all models from models folder
import joblib
model_files = [f for f in os.listdir("models") if f.endswith(".pkl")]
models = {}
for mf in model_files:
    path = os.path.join("models", mf)
    models[mf.split(".")[0]] = joblib.load(path)

# Example: access xgb_calibrated
xgb_cal = models['xgb_calibrated']

# Step 8: Predict probabilities using XGB
y_proba_xgb = xgb_cal.predict_proba(X_test_aligned)[:, 1]

# Step 9: Create results DataFrame
results = X_test_aligned.copy()
results['Actual'] = y_test.values
results['PD'] = y_proba_xgb

print("✅ All done! Models loaded, predictions made, results DataFrame created.")
print("Available models:", list(models.keys()))
print("Results preview:")
print(results.head())

Saving models.zip to models (2).zip
Saving full_data.zip to full_data.zip
✅ All done! Models loaded, predictions made, results DataFrame created.
Available models: ['xgb_calibrated']
Results preview:
   Account_Balance  Duration_of_Credit_monthly  \
0                1                          15   
1                4                           4   
2                4                          12   
3                4                          12   
4                1                          48   

   Payment_Status_of_Previous_Credit  Purpose  Credit_Amount  \
0                                  2        0           2511   
1                                  4        0           3380   
2                                  2        2           1574   
3                                  4        3            930   
4                                  2        9           4308   

   Value_Savings_Stocks  Length_of_current_employment  Instalment_per_cent  \
0                     1             

Business Integration – Risk Scores & Expected Loss

In [4]:
# -------------------------------
# Step 8: Business Integration
# -------------------------------

import pandas as pd

# Use calibrated XGBoost probabilities as PD (Probability of Default)
results = X_test.copy()
results['Actual'] = y_test.values
results['PD'] = y_proba_xgb

# Create a simple risk score (0–1000 scale, higher = safer)
results['Risk_Score'] = (1 - results['PD']) * 1000

# Assign Risk Bands based on PD thresholds
results['Risk_Band'] = pd.cut(
    results['PD'],
    bins=[-0.01, 0.02, 0.05, 0.1, 0.2, 1.0],
    labels=['A (Excellent)','B (Good)','C (Fair)','D (Poor)','E (Very Poor)']
)

# Assume Exposure at Default (EAD) = Credit Amount
results['EAD'] = results['Credit_Amount']

# Assume Loss Given Default (LGD) = 45% (typical regulatory assumption)
LGD = 0.45

# Expected Loss = PD × LGD × EAD
results['Expected_Loss'] = results['PD'] * LGD * results['EAD']

# Preview results
print(results[['PD','Risk_Score','Risk_Band','EAD','Expected_Loss']].head())

# Save for Power BI dashboard
results.to_csv("predictions_with_risk.csv", index=False)
print("\nSaved predictions_with_risk.csv for Power BI.")

         PD  Risk_Score      Risk_Band   EAD  Expected_Loss
0  0.320749  679.250549  E (Very Poor)  2511     362.430839
1  0.999330    0.670195  E (Very Poor)  3380    1519.980553
2  0.995276    4.723787  E (Very Poor)  1574     704.954113
3  0.999852    0.147700  E (Very Poor)   930     418.438182
4  0.035135  964.864746       B (Good)  4308      68.113226

Saved predictions_with_risk.csv for Power BI.


In [5]:
from google.colab import files
files.download("predictions_with_risk.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>