In [None]:
import sys
import os

project_root = r'' ## Give path of your root directory
sys.path.append(project_root)

In [None]:
from src.llm_integration import OPENROUTER_MODEL, explain_prediction, suggest_recovery_actions_llm, generate_recovery_report


from src.data_loader import load_data
from src.model import load_model, predict_risk_scores, assign_recovery_strategy
import pandas as pd

In [None]:
# Load your trained model
rf_model = load_model(r'outputs\models\random_forest_risk_model.joblib')

# Load your segmented and flagged data 
df_full = load_data(r'data\processed\segmented_and_flagged_data.csv')


X_features_for_rf = [
    'Age', 'Monthly_Income', 'Num_Dependents', 'Loan_Amount', 'Loan_Tenure',
    'Interest_Rate', 'Collateral_Value', 'Outstanding_Loan_Amount', 'Monthly_EMI',
    'Num_Missed_Payments', 'Days_Past_Due', 'Collection_Attempts',
    'Loan_to_income_ratio', 'EMI_to_income_ratio', 'Has_Collateral',
    'Gender', 'Payment_History', 'Loan_Type', 'Legal_Action_Taken',
    'Employment_Type_Salaried', 'Employment_Type_Self-Employed',
    'Collection_Method_Debt Collectors', 'Collection_Method_Legal Notice', 'Collection_Method_Settlement Offer'
]


feature_importances_raw = rf_model.feature_importances_
feature_importance_map = dict(zip(X_features_for_rf, feature_importances_raw))


top_n_features = sorted(feature_importance_map.items(), key=lambda item: item[1], reverse=True)[:5]
top_n_features_dict = {k: v for k, v in top_n_features}

Model loaded from C:\Loan Recovery ML Project\outputs\models\random_forest_risk_model.joblib


In [None]:
print("--- Example 1: High-Risk Borrower Explanation ---")

high_risk_borrowers = df_full[df_full['High_Risk_Flag'] == 1].sample(1, random_state=1)
if high_risk_borrowers.empty:
    print("No high-risk borrowers found in the dataset to sample.")
else:
    sample_borrower = high_risk_borrowers.iloc[0]

    
    borrower_data_for_llm = sample_borrower[['Monthly_Income', 'Loan_Amount', 'Num_Missed_Payments', 'EMI_to_income_ratio', 'Loan_to_income_ratio']].to_dict()
    predicted_risk_score = predict_risk_scores(rf_model, pd.DataFrame([sample_borrower[X_features_for_rf]]))
    predicted_strategy = assign_recovery_strategy(predicted_risk_score[0])
    segment_info = sample_borrower['Segment_Name']

    explanation = explain_prediction(
        borrower_data_for_llm,
        predicted_risk_score[0],
        predicted_strategy,
        top_n_features_dict,
        segment_info
    )
    print(f"\nBorrower ID: {sample_borrower['Borrower_ID']}")
    print(f"Segment: {segment_info}")
    print(f"Predicted Risk Score: {predicted_risk_score[0]:.2f}")
    print(f"Assigned Strategy: {predicted_strategy}")
    print("\nLLM Explanation:")
    print(explanation)

--- Example 1: High-Risk Borrower Explanation ---

Borrower ID: BRW_122
Segment: High Missed Payments, Behavioral Risk
Predicted Risk Score: 0.99
Assigned Strategy: Immediate legal notices & aggressive recovery attempts

LLM Explanation:
Based on the provided details and the prediction of high risk (Score: 0.99), here are the reasons why the borrower has been segmented into the "High Missed Payments, Behavioral Risk" category and the suggested strategy of "Immediate legal notices & aggressive recovery attempts":


2. **Loan Amount (855351)**: The loan amount is substantial, making it a significant risk if the borrower defaults. The high loan amount contributes 0.17 to the prediction score.

3. **EMI to Income Ratio (0.139)**: The Equated Monthly Installment (EMI) to income ratio is relatively low at approximately 13.9%. While this might seem manageable, it's important to consider other factors.

4. **Loan to Income Ratio (19.606)**: The loan to income ratio is high, meaning the borrowe

In [None]:
if not high_risk_borrowers.empty: 
    print("\n--- Example 2: Nuanced Recovery Actions ---")
    llm_suggestions = suggest_recovery_actions_llm(
        borrower_data_for_llm,
        predicted_risk_score[0],
        predicted_strategy,
        segment_info
    )
    print("\nLLM Suggested Actions:")
    print(llm_suggestions)
else:
    print("Skipping Example 2 as no high-risk borrower was sampled for explanation.")


--- Example 2: Nuanced Recovery Actions ---

LLM Suggested Actions:
Given the borrower's high risk score and profile, here are some specific, actionable recovery strategies:

**Immediate Actions:**

- **Send a Formal Demand Letter:** Dispatch a registered letter demanding full payment within a specified timeframe (e.g., 7 days). Clearly state the consequences of non-payment.
- **Initiate Daily Contact Attempts:** Utilize multiple channels (calls, SMS, emails) to reach the borrower. Ensure compliance with local regulations and fair debt collection practices.
- **Asset Check:** Verify if the borrower has any assets (property, vehicles, investments) that can be used as collateral or seized if necessary.

**Alternative Strategies:**

- **Negotiated Settlement:** Offer a settlement plan that the borrower can realistically follow. This could include:
  - A lump-sum settlement for a reduced amount.
  - A short-term repayment plan with lower EMIs.
  - Extending the loan tenure to reduce the m

In [None]:
print("\n--- Example 3: Daily Recovery Report ---")
simulated_daily_cases = df_full[df_full['High_Risk_Flag'] == 1].sample(min(5, len(df_full[df_full['High_Risk_Flag'] == 1])), random_state=42).copy()

if not simulated_daily_cases.empty:
    simulated_daily_cases['Risk_Score_Pred'] = predict_risk_scores(rf_model, simulated_daily_cases[X_features_for_rf])
    simulated_daily_cases['Recovery_Strategy_Assigned'] = simulated_daily_cases['Risk_Score_Pred'].apply(assign_recovery_strategy)

    report_data = []
    for index, row in simulated_daily_cases.iterrows():
        report_data.append({
            'Borrower_ID': row['Borrower_ID'],
            'Risk_Score': row['Risk_Score_Pred'],
            'Segment_Name': row['Segment_Name'],
            'Recovery_Strategy': row['Recovery_Strategy_Assigned'],
            'Monthly_Income': row['Monthly_Income'],
            'Loan_Amount': row['Loan_Amount'],
            'Num_Missed_Payments': row['Num_Missed_Payments'],
            'EMI_to_income_ratio': row['EMI_to_income_ratio']
        })

    daily_report = generate_recovery_report(report_data)
    print("\nDaily Recovery Report:")
    print(daily_report)
else:
    print("No high-risk borrowers found to generate a daily report.")


--- Example 3: Daily Recovery Report ---

Daily Recovery Report:
**Daily Loan Recovery Report - 2025-07-30 15:52:11 IST**

**Overview:**
Today, 5 high-risk borrowers were identified, all requiring immediate legal notices and aggressive recovery attempts. The common theme among these cases is a high EMI ratio and significant loan amounts relative to their incomes, indicating a high loan burden and critical risk. Notably, some borrowers have a high risk score despite having no missed payments, emphasizing the importance of considering multiple risk factors.

**Common Characteristics:**

1. **High EMI Ratio:** All cases have an EMI ratio above 0.01, with some as high as 0.15, indicating a significant portion of their income goes toward loan repayments.

2. **High Loan Burden:** Loan amounts are substantial relative to their monthly incomes, with one borrower having a loan amount nearly 10 times their monthly income.

3. **Missed Payments:** While some borrowers have multiple missed payme