In [10]:
# ====================== FINAL MODEL ENGINEERING ======================
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.base import BaseEstimator, ClassifierMixin
import pickle

class LoanApprovalModel(BaseEstimator, ClassifierMixin):
    def __init__(self, model, threshold=0.7, scaler=None):
        self.model = model
        self.threshold = threshold
        self.scaler = scaler
        self.rejection_reasons = []
        
    def predict(self, X):
        """Safe prediction with bounds checking"""
        if not hasattr(self.model, 'predict_proba'):
            raise AttributeError("Wrapped model must implement predict_proba()")
            
        if self.scaler:
            X_scaled = X.copy()
            X_scaled[num_cols] = self.scaler.transform(X[num_cols])
        else:
            X_scaled = X
            
        try:
            proba = self.model.predict_proba(X_scaled)[:,1]
        except Exception as e:
            raise ValueError(f"Prediction failed: {str(e)}")
            
        self.rejection_reasons = []
        preds = []
        
        for idx, row in X.iterrows():
            # First apply business rules
            if self._reject_underage(row):
                preds.append(0)
                continue
                
            if self._reject_risky_young_adults(row):
                preds.append(0)
                continue
                
            if self._reject_large_young_loans(row):
                preds.append(0)
                continue
                
            # Then model prediction
            try:
                if proba[idx] >= self.threshold:
                    preds.append(1)
                else:
                    self.rejection_reasons.append(f"Low confidence ({proba[idx]:.2f} < {self.threshold})")
                    preds.append(0)
            except IndexError:
                self.rejection_reasons.append("Prediction error")
                preds.append(0)
                
        return np.array(preds)
    
    def _reject_underage(self, row):
        if row['age'] < 18 and row['Principal'] > 300:
            self.rejection_reasons.append("Minors cannot borrow >$300")
            return True
        return False
    
    def _reject_risky_young_adults(self, row):
        if (row['age'] < 21 and 
            row['Principal'] > 1000 and 
            row['education'] < 2):
            self.rejection_reasons.append("Young adults without college degree limited to $1000")
            return True
        return False
    
    def _reject_large_young_loans(self, row):
        if row['Principal'] > 2000 and row['age'] < 25:
            self.rejection_reasons.append("Borrowers under 25 limited to $2000")
            return True
        return False

# ====================== MODEL PACKAGING ======================
final_model = LoanApprovalModel(
    model=best_gb,
    threshold=0.65,  # Conservative threshold
    scaler=scaler
)

# ====================== COMPREHENSIVE VALIDATION ======================
from sklearn.metrics import classification_report, confusion_matrix

# 1. Standard Metrics
y_pred = final_model.predict(X_test)
print("=== Standard Metrics ===")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# 2. Demographic Fairness
from fairlearn.metrics import MetricFrame
demo_groups = X_test['Gender'].map({0: 'Male', 1: 'Female'})
metrics = {
    'accuracy': accuracy_score,
    'precision': precision_score,
    'false_positive_rate': lambda y_true, y_pred: confusion_matrix(y_true, y_pred)[0,1] / (confusion_matrix(y_true, y_pred)[0,:].sum())
}

fairness_report = MetricFrame(
    metrics=metrics,
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=demo_groups
)
print("\n=== Fairness Report ===")
print(fairness_report.by_group)

# 3. Business Rule Verification
test_cases = pd.DataFrame([
    {'Principal': 1000, 'terms': 30, 'age': 17, 'education': 0, 'Gender': 0},
    {'Principal': 1500, 'terms': 30, 'age': 19, 'education': 1, 'Gender': 1},
    {'Principal': 2500, 'terms': 90, 'age': 22, 'education': 2, 'Gender': 0},
    {'Principal': 800, 'terms': 30, 'age': 35, 'education': 3, 'Gender': 1}
])

test_preds = final_model.predict(test_cases)
print("\n=== Business Rule Test Cases ===")
print("Predictions:", test_preds)
print("Rejection Reasons:", final_model.rejection_reasons)

# ====================== DEPLOYMENT PACKAGE ======================
model_metadata = {
    'version': '2.0.0',
    'training_date': pd.Timestamp.now().strftime('%Y-%m-%d'),
    'performance_metrics': {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'fairness_report': fairness_report.by_group.to_dict()
    },
    'business_rules': {
        'max_minor_loan': 300,
        'max_young_adult_loan': 1000,
        'max_young_borrower_loan': 2000,
        'min_age': 18,
        'confidence_threshold': 0.65
    }
}

with open('production_loan_model.pkl', 'wb') as f:
    pickle.dump({
        'model': final_model,
        'metadata': model_metadata,
        'scaler': scaler,
        'education_map': education_map,
        'features': list(X.columns),
        'num_cols': num_cols
    }, f)

print(f"""
=== DEPLOYMENT CHECKLIST ===
1. Model accuracy >70 % : {accuracy_score(y_test, y_pred) > 0.7}
2. No minors approved: {not any((X_test['age'] < 18) & (y_pred == 1))}
3. Gender bias <5 % : {abs(fairness_report.by_group['accuracy']['Male'] - fairness_report.by_group['accuracy']['Female']) < 0.05}
4. All test cases passed: {all(test_preds == [0, 0, 0, 1])}  # Verify test cases
5. Rejection reasons logged: {bool(final_model.rejection_reasons)}
""")

=== Standard Metrics ===
              precision    recall  f1-score   support

           0       0.45      1.00      0.62        40
           1       1.00      0.20      0.33        60

    accuracy                           0.52       100
   macro avg       0.73      0.60      0.48       100
weighted avg       0.78      0.52      0.45       100

[[40  0]
 [48 12]]

=== Fairness Report ===
        accuracy  precision  false_positive_rate
Gender                                          
Female  0.454545        1.0                  0.0
Male    0.528090        1.0                  0.0

=== Business Rule Test Cases ===
Predictions: [0 0 0 1]
Rejection Reasons: ['Minors cannot borrow >$300', 'Young adults without college degree limited to $1000', 'Borrowers under 25 limited to $2000']

=== DEPLOYMENT CHECKLIST ===
1. Model accuracy >70 % : False
2. No minors approved: False
3. Gender bias <5 % : False
4. All test cases passed: True  # Verify test cases
5. Rejection reasons logged: True

