# Model Evaluation & Profit Optimization

Evaluates the model and optimizes the decision threshold.

In [None]:
import sys
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import shap

sys.path.append(os.path.abspath('../src'))
from data_utils import load_data, split_data
from features import create_features
from model import RiskModel
from inference import calculate_expected_profit

df = load_data('../data/raw/synthetic_bnpl_data.csv')
df_features = create_features(df)
target = 'is_default'
X = df_features
X[target] = df[target]
_, X_test_full = split_data(X, target)

y_test = X_test_full[target]
X_test = X_test_full.drop(columns=[target])

model = RiskModel('../models/lightgbm_model.pkl')

In [None]:
probs = model.predict_proba(X_test)

profits = []
thresholds = np.linspace(0, 1, 100)
for t in thresholds:
    profit = 0
    for prob, row_idx in zip(probs, X_test.index):
        original_row = df.loc[row_idx]
        amount = original_row['amount']
        actual_default = y_test.loc[row_idx]
        
        decision = 'APPROVE' if prob < t else 'REJECT'
        if decision == 'APPROVE':
            if actual_default == 1:
                profit -= amount # Loss
            else:
                profit += amount * 0.02 # Fee
    profits.append(profit)

best_threshold = thresholds[np.argmax(profits)]
print(f'Best Threshold: {best_threshold:.2f}')

plt.figure(figsize=(10, 5))
plt.plot(thresholds, profits)
plt.xlabel('Threshold')
plt.ylabel('Total Profit')
plt.title('Profit vs Threshold')
plt.axvline(best_threshold, color='r', linestyle='--')
plt.show()

In [None]:
explainer = shap.TreeExplainer(model.model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)