In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data = {
    'Decision Tree':  {'Accuracy': 0.93, 'ROC-AUC': 0.65, 'Recall': 0.11, 'Precision': 0.25},
    'Neural Network': {'Accuracy': 0.94, 'ROC-AUC': 0.79, 'Recall': 0.39, 'Precision': 0.47},
    'XGBoost':        {'Accuracy': 0.86, 'ROC-AUC': 0.91, 'Recall': 0.82, 'Precision': 0.39},
    'Random Forest':  {'Accuracy': 0.80, 'ROC-AUC': 0.77, 'Recall': 0.56, 'Precision': 0.16},
    'kNN':            {'Accuracy': 0.81, 'ROC-AUC': 0.82, 'Recall': 0.56, 'Precision': 0.17}
}

df_results = pd.DataFrame.from_dict(data, orient='index').reset_index()
df_results.rename(columns={'index': 'Model Name'}, inplace=True)

# Dictionary of the Top 3 features found for EACH model
feature_importance = {
    "Decision Tree": ["debt_to_income", "credit_score", "payment_punctuality"],
    "Neural Network": ["credit_score", "savings_ratio", "debt-to-income"],
    "XGBoost": ["credit_score", "business", "feedback"],
    "Random Forest": ["credit_score", "savings_ratio", "debt_to_income"],
    "kNN": ["credit_score", "savings_ratio", "debt_to_income"]
}

# 4. VISUAL TABLE FOR PRESENTATION
styled_table = df_results.style\
    .background_gradient(cmap='Blues', subset=['ROC-AUC', 'Recall'])\
    .highlight_max(color='lightgreen', subset=['ROC-AUC', 'Recall'])\
    .format({'Accuracy': '{:.2%}', 'ROC-AUC': '{:.2f}', 'Recall': '{:.2%}', 'Precision': '{:.2%}'})\
    .set_caption("Sterling Financial: Model Performance Comparison")

print("Visualizing Table...\n")
display(styled_table)

Visualizing Table...



Unnamed: 0,Model Name,Accuracy,ROC-AUC,Recall,Precision
0,Decision Tree,93.00%,0.65,11.00%,25.00%
1,Neural Network,94.00%,0.79,39.00%,47.00%
2,XGBoost,86.00%,0.91,82.00%,39.00%
3,Random Forest,80.00%,0.77,56.00%,16.00%
4,kNN,81.00%,0.82,56.00%,17.00%


In [4]:
import torch
import os
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig
from google.colab import userdata
from huggingface_hub import login

hf_token = userdata.get('HF_TOKEN')
login(token=hf_token)

# 1. LOAD MISTRAL 7B (4-bit quantization for T4 GPU)
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

torch.cuda.empty_cache()

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    quantization_config=quant_config
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Model Ready!")

# 2. DEFINE THE PROMPT FUNCTION
def generate_interpretation_prompt(df_results, feature_importance):
    prompt = f"""
[INST]
SYSTEM ROLE: You are the Chief Risk Officer (CRO) at Sterling Financial.
OBJECTIVE: Audit these 5 models and recommend the one that provides the best FINANCIAL SAFETY.

CRITICAL CONTEXT:
In banking, the cost of a missed default (False Negative) is catastrophic.
You must ignore 'Accuracy' if the 'Recall' is low. A model that misses defaults is useless.
INPUT DATA (Final Model Results):
{df_results.to_string(index=False)}

TOP FEATURES PER MODEL:
{feature_importance}

TASK:
1. Identify the "Accuracy Trap": Explain why the Decision Tree (93% Acc) and Neural Network (94% Acc) are POOR models for our bank given their low Recall (11% and 39%).
2. The Winning Model: Justify why XGBoost is the definitive winner. Highlight its 0.91 ROC-AUC and 82% Recall. Explain how catching 82% of defaulters protects the bank's capital.
3. Feature Insight: Interpret why XGBoost's unique focus on 'business' and 'feedback' makes it more effective than the others.
4. Business Decision: Formally recommend XGBoost for deployment.
[/INST]
"""
    return prompt

# 3. RUN INTERPRETATION
final_prompt = generate_interpretation_prompt(df_results, feature_importance)
inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")

print("Analyzing Results...")

gen_config = GenerationConfig(
    max_new_tokens=2000,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)
outputs = model.generate(
    **inputs,
    generation_config=gen_config
)
interpretation = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n" + "="*60)
print("FINAL STRATEGIC REPORT (GENERATED BY MISTRAL 7B)")
print("="*60)
print(interpretation.split("[/INST]")[-1].strip())

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Model Ready!
Analyzing Results...

FINAL STRATEGIC REPORT (GENERATED BY MISTRAL 7B)
1. The "Accuracy Trap":
The Decision Tree and Neural Network models have high accuracies but very low recall rates. This means that they are likely to miss many defaults, which is catastrophic in banking. Even though they have high accuracy, they are not providing the necessary financial safety for the bank. Therefore, these models should not be used for default prediction in banking.

2. The Winning Model:
XGBoost has the highest ROC-AUC score of 0.91 and a recall rate of 82%. This means that it is the most effective model for default prediction in banking. Catching 82% of defaulters protects the bank's capital by reducing the risk of losses due to missed defaults.

3. Feature Insight:
XGBoost's unique focus on 'business' and 'feedback' makes it more effective than the others. These features provide valuable information about the borrower's financial behavior and can help predict the likelihood of defa