In [1]:
import pandas as pd
import joblib
import os

In [2]:
def predict_loan_default(new_data, model_dir='model_artifacts'):
    """
    Complete prediction pipeline for new loan applications

    Parameters:
    new_data: DataFrame with same structure as training data
    model_dir: Directory containing saved model artifacts

    Returns:
    predictions: Binary predictions (0/1)
    probabilities: Prediction probabilities
    """

    model = joblib.load(os.path.join(model_dir, 'loan_default_xgb_model.pkl'))
    num_imputer = joblib.load(os.path.join(model_dir, 'numerical_imputer.pkl'))
    cat_imputer = joblib.load(os.path.join(model_dir, 'categorical_imputer.pkl'))
    selected_features = joblib.load(os.path.join(model_dir, 'selected_features.pkl'))
    threshold = joblib.load(os.path.join(model_dir, 'optimal_threshold.pkl'))
    numerical_features = joblib.load(os.path.join(model_dir, 'numerical_features.pkl'))
    categorical_features = joblib.load(os.path.join(model_dir, 'categorical_features.pkl'))

    new_data_processed = new_data.copy()

    if 'ID' in new_data_processed.columns:
        new_data_processed = new_data_processed.drop('ID', axis=1)

    new_data_processed[numerical_features] = num_imputer.transform(new_data_processed[numerical_features])
    new_data_processed[categorical_features] = cat_imputer.transform(new_data_processed[categorical_features])

    new_data_encoded = pd.get_dummies(new_data_processed, columns=categorical_features, drop_first=True)

    for feature in selected_features:
        if feature not in new_data_encoded.columns:
            new_data_encoded[feature] = 0

    new_data_final = new_data_encoded[selected_features]

    probabilities = model.predict_proba(new_data_final)[:, 1]
    predictions = (probabilities >= threshold).astype(int)

    return predictions, probabilities

In [14]:
new_applications = pd.DataFrame([{
    'year': 2025,
    'Gender': 'Male',                  # options: 'Male', 'Female', 'Joint', 'Sex Not Available'
    'age': '35-44',                    # options: '18-24', '25-34', '35-44', '45-54', '55-64', '65-74', '>74'
    'Region': 'central',               # options: 'north', 'south', 'central', etc.
    
    'loan_amount': 1400000,            # INR, could be 1,400,000
    'loan_purpose': 'p3',              # example: 'p1', 'p2', 'p3', 'p4'
    'loan_type': 'type3',              # example: 'type1', 'type2', 'type3'
    'loan_limit': 'ncf',               # example: 'cf', 'ncf'
    
    'income': 0,                  # annual income in INR (₹9,50,000)
    'Credit_Score': 305,               # typical CIBIL score range 300–900
    'open_credit': 'opc',              # 'opc', 'nopc'
    'business_or_commercial': 'nob/c', # 'b/c', 'nob/c'
    
    'property_value': 2200000,         # INR, could be 2,200,000
    'construction_type': 'sb',         # 'sb', 'mh'
    'occupancy_type': 'pr',            # 'pr' (primary residence), 'sr' (second home), 'ir' (investment)
    'Secured_by': 'home',              # 'home', 'land'
    'total_units': '1U',               # '1U', '2U', '3U', '4U'
    'Security_Type': 'direct'          # 'direct', 'Indriect'
}])


In [15]:
predictions, probabilities = predict_loan_default(new_applications)

for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
    print(f"Applicant {i+1}: Predicted Default: {'Yes' if pred else 'No'} (Score: {prob:.3f})")

Applicant 1: Predicted Default: Yes (Score: 0.987)
