In [1]:
import os
import joblib
import tempfile
import logging
import datetime
import json
from typing import Tuple, Dict, Any, Optional, List

import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text

# Optional ML libs (used for retrain_from_logs)
try:
    import lightgbm as lgb
except Exception:
    lgb = None

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

# transformers (loaded lazily)
from transformers import pipeline as hf_pipeline

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("chatbot_pipeline")


In [2]:
from google.colab import drive
drive.mount('/content/drive')

MODEL_PATH = os.environ.get("MODEL_PATH", "/content/drive/MyDrive/Colab Notebooks/loan_default_predictor.pkl")
DB_URL = os.environ.get("DB_URL", "postgresql://username:password@localhost:5432/chatbot_db")
RETRAIN_THRESHOLD = int(os.environ.get("RETRAIN_THRESHOLD", "1000"))
DEFAULT_SENTIMENT_MODEL = os.environ.get("DEFAULT_SENTIMENT_MODEL", "j-hartmann/emotion-english-distilroberta-base")

Mounted at /content/drive


In [3]:
ACTIONS = [
    "no_contact",
    "soft_reminder",
    "reminder_payment",
    "offer_plan_low",
    "offer_plan_high",
    "escalate_call",
    "senior_agent",
    "legal_notice"
]
EPSILON = float(os.environ.get("POLICY_EPSILON", "0.05"))
POLICY_REFERENCE = os.environ.get("POLICY_REFERENCE", "heuristic_v1")

_model = None
MODEL_FEATURES: List[str] = []
CATEGORICAL_FEATURES: List[str] = []


In [4]:
# Lazy HF pipelines
_HF_PIPELINES = {"sentiment": None, "zero_shot": None, "llm": None}

# Intent classifier objects
INTENT_VECT = None
INTENT_CLF = None
INTENT_LABELS = None

LLM_MODEL_NAME = None  # set to a model id to enable LLM templating

def load_model(path: str = MODEL_PATH) -> Tuple[Any, list, list]:
    global _model, MODEL_FEATURES, CATEGORICAL_FEATURES
    if not os.path.exists(path):
        raise FileNotFoundError(f"Model file not found at {path}")
    data = joblib.load(path)
    _model = data.get("model")
    MODEL_FEATURES = data.get("features", [])
    CATEGORICAL_FEATURES = data.get("categorical", []) or []
    logger.info(f"Loaded model from {path}. Feature count: {len(MODEL_FEATURES)}")
    return _model, MODEL_FEATURES, CATEGORICAL_FEATURES

def get_engine(db_url: str = DB_URL):
    return create_engine(db_url, future=True)


In [5]:
# ---------- HF PIPELINE HELPERS (lazy) ----------
def get_sentiment_pipeline(model_name: str = DEFAULT_SENTIMENT_MODEL):
    if _HF_PIPELINES["sentiment"] is None:
        logger.info(f"Loading emotion pipeline ({model_name})")
        # For emotion model, return all scores for each emotion class
        _HF_PIPELINES["sentiment"] = hf_pipeline(
            "text-classification",
            model=model_name,
            return_all_scores=True
        )
    return _HF_PIPELINES["sentiment"]

def get_zero_shot_pipeline():
    if _HF_PIPELINES["zero_shot"] is None:
        logger.info("Loading zero-shot classification pipeline (facebook/bart-large-mnli)")
        _HF_PIPELINES["zero_shot"] = hf_pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    return _HF_PIPELINES["zero_shot"]

def get_llm_pipeline(model_name: str):
    if model_name is None:
        return None
    if _HF_PIPELINES["llm"] is None or LLM_MODEL_NAME != model_name:
        logger.info(f"Loading LLM pipeline: {model_name}")
        _HF_PIPELINES["llm"] = hf_pipeline("text-generation", model=model_name)
    return _HF_PIPELINES["llm"]

In [6]:
# ---------- SENTIMENT (EMOTION) ANALYZER ----------
def analyze_sentiment(text: str) -> Tuple[str, float]:
    """Return top emotion label and its confidence score (0..1)."""
    if not isinstance(text, str) or text.strip() == "":
        return "neutral", 0.0
    try:
        sentiment = get_sentiment_pipeline()
        results = sentiment(text[:512])[0]  # List of dicts: [{'label':..., 'score':...}, ...]
        top_emotion = max(results, key=lambda x: x['score'])
        label = top_emotion['label'].lower()  # e.g. "joy", "anger", "sadness"
        score = float(top_emotion['score'])
        return label, score
    except Exception:
        logger.exception("Emotion analysis failed; returning neutral")
        return "neutral", 0.0

def detect_persona(sentiment_label: str, message: str, user_features: Optional[dict] = None) -> str:
    """
    Heuristic-based persona detection.
    """
    msg = (message or "").strip()
    user_features = user_features or {}
    missed = int(user_features.get("MissedPayments", 0) or 0)
    response_time = user_features.get("ResponseTimeHours", None)

    # Adapted heuristics using emotion labels instead of simple POSITIVE/NEGATIVE
    if sentiment_label in ("joy", "love", "surprise") and missed <= 1:
        return "cooperative"
    if sentiment_label in ("anger", "fear", "sadness") and ("!" in msg or len(msg) < 40):
        return "aggressive"
    if "?" in msg or "don't understand" in msg.lower() or "how" in msg.lower():
        return "confused"
    if missed >= 2 or (response_time is not None and response_time > 48):
        return "evasive"
    return "neutral"


In [7]:
#---------- STRATEGY RECOMMENDATION (legacy/helper mapping) ----------
ACTION_TO_STRATEGY = {
    "no_contact": {"code": "monitor", "description": "Continue monitoring."},
    "soft_reminder": {"code": "soft_reminder", "description": "Soft reminder via app/email."},
    "reminder_payment": {"code": "reminder", "description": "SMS + payment link."},
    "offer_plan_low": {"code": "offer_plan_low", "description": "Offer low-leniency repayment plan."},
    "offer_plan_high": {"code": "offer_plan_high", "description": "Offer high-leniency plan with discount."},
    "escalate_call": {"code": "escalate_call", "description": "Schedule agent call."},
    "senior_agent": {"code": "senior_agent", "description": "Escalate to senior agent."},
    "legal_notice": {"code": "legal_notice", "description": "Initiate legal review / notice (guarded)."}
}

def recommend_strategy(risk_score: float, persona: str, sentiment_label: str) -> Dict[str, str]:
    """Legacy function retained for backwards compatibility but not used for policy sampling."""
    if risk_score >= 0.8:
        if persona == "cooperative":
            return {"code": "offer_plan_high", "description": "Offer structured repayment plan with possible small discount."}
        elif persona == "evasive":
            return {"code": "escalate_call", "description": "Schedule personalized call and frequent reminders."}
        elif persona == "aggressive":
            return {"code": "senior_agent", "description": "Escalate to senior agent for sensitive handling."}
        else:
            return {"code": "contact_high", "description": "Immediate outreach via call and SMS."}
    elif 0.5 <= risk_score < 0.8:
        if sentiment_label in ("anger", "fear", "sadness"):
            return {"code": "empathetic_reminder", "description": "Send empathetic SMS + email with options."}
        else:
            return {"code": "reminder", "description": "Send reminder SMS + email with payment link."}
    elif 0.3 <= risk_score < 0.5:
        return {"code": "soft_reminder", "description": "Soft reminder via app notification and email."}
    else:
        return {"code": "monitor", "description": "No immediate action — continue monitoring."}

# ---------- PHASE-2: Policy / Action Probabilities & Sampling ----------
from math import exp

def score_to_action_probs(risk_score: float, persona: str, profile: dict) -> Dict[str, Any]:
    logits = {a: 0.0 for a in ACTIONS}

    if risk_score >= 0.8:
        logits.update({
            "offer_plan_high": 3.0 if persona == "cooperative" else 1.0,
            "escalate_call": 2.0 if persona in ("evasive", "neutral") else 0.5,
            "senior_agent": 2.5 if persona == "aggressive" else 0.1,
            "legal_notice": 0.5
        })
    elif 0.5 <= risk_score < 0.8:
        logits.update({
            "reminder_payment": 2.5,
            "offer_plan_low": 1.8 if persona == "cooperative" else 0.6,
            "escalate_call": 0.8 if persona == "evasive" else 0.2
        })
    elif 0.3 <= risk_score < 0.5:
        logits.update({"soft_reminder": 2.0, "reminder_payment": 0.5})
    else:
        logits.update({"no_contact": 2.0, "soft_reminder": 0.5})

    if persona == "confused":
        logits["offer_plan_low"] += 0.5
        logits["reminder_payment"] += 0.5
    if persona == "aggressive":
        logits["senior_agent"] += 1.0
        logits["offer_plan_high"] -= 0.5
    if persona == "evasive":
        logits["escalate_call"] += 1.0

    missed = int(profile.get("MissedPayments", 0) or 0)
    if missed < 3:
        logits["legal_notice"] = -999.0

    vals = list(logits.values())
    maxv = max(vals)
    exp_vals = [exp(v - maxv) for v in vals]
    s = sum(exp_vals)
    probs = [v / s for v in exp_vals]
    return {"probs": probs, "policy_reason": f"heuristic_risk:{risk_score:.2f}_persona:{persona}", "base_scores": logits}

def sample_action(probs: List[float], epsilon: float = EPSILON) -> Tuple[str, float, List[float]]:
    probs = np.array(probs, dtype=float)
    probs = probs / probs.sum()

    if np.random.rand() < epsilon:
        final_probs = np.ones_like(probs) / len(probs)
    else:
        final_probs = probs

    action_idx = int(np.random.choice(np.arange(len(final_probs)), p=final_probs))
    action = ACTIONS[action_idx]
    propensity = float(final_probs[action_idx])
    return action, propensity, final_probs.tolist()

In [9]:
# === Persona-Adaptive Chatbot Loop ===
import random

# Templates for persona + sentiment tone adaptation
# RESPONSE_TEMPLATES = {
#     "cooperative": {
#         "POSITIVE": [
#             "Glad to hear from you! Let's work out a suitable repayment plan together.",
#             "That's great. We can arrange something that works for both of us."
#         ],
#         "NEGATIVE": [
#             "I understand this might be stressful. We can discuss options to ease the process.",
#             "I hear your concerns. Let's explore how we can make this manageable."
#         ],
#         "NEUTRAL": [
#             "Thanks for your update. Could we discuss a date for the repayment?",
#             "Alright, let's finalize the details for your payment."
#         ]
#     },
#     "evasive": {
#         "POSITIVE": [
#             "It's important we finalize your repayment plan soon to avoid penalties.",
#             "I'm happy you're positive. Let's lock in a payment date today."
#         ],
#         "NEGATIVE": [
#             "I understand, but delaying further could increase charges.",
#             "It's best we address this now before the situation escalates."
#         ],
#         "NEUTRAL": [
#             "Please confirm when you can make the payment.",
#             "We need your confirmation to proceed with your repayment plan."
#         ]
#     },
#     "aggressive": {
#         "POSITIVE": [
#             "I appreciate your willingness to cooperate. Let's move forward constructively.",
#             "That's good to hear. Let's set a repayment date."
#         ],
#         "NEGATIVE": [
#             "I understand you're upset. My goal is to help you avoid penalties.",
#             "We can find a resolution if we work together."
#         ],
#         "NEUTRAL": [
#             "Let's focus on finding a solution to your repayment.",
#             "Could we talk about your repayment schedule?"
#         ]
#     },
#     "confused": {
#         "POSITIVE": [
#             "I'm glad you're feeling positive. I can clarify any doubts you have.",
#             "Great! Let me explain the next steps clearly."
#         ],
#         "NEGATIVE": [
#             "I understand things might be unclear. Let me walk you through your loan details.",
#             "Let's go step-by-step so you understand the repayment process."
#         ],
#         "NEUTRAL": [
#             "Do you have any questions about your repayment schedule?",
#             "Let me explain how the repayment process works."
#         ]
#     }
# }

# def chatbot_loop():
#     print("Loan Recovery Chatbot (type 'exit' to quit)")
#     while True:
#         user_msg = input("\nCustomer: ")
#         if user_msg.lower() == "exit":
#             print("Chatbot: Thank you for your time. Goodbye!")
#             break

#         # Example features for demo (replace with actual customer features)
#         features = {
#             "MissedPayments": random.randint(0, 3),
#             "ResponseTimeHours": random.randint(1, 72)
#         }
#         risk_score = random.uniform(0, 1)

#         # Step 1: Sentiment Analysis
#         sentiment_label, _ = analyze_sentiment(user_msg)

#         # Step 2: Persona Detection
#         persona = detect_persona(sentiment_label, user_msg, features)

#         # Step 3: Strategy Recommendation
#         strategy = recommend_strategy(risk_score, persona, sentiment_label)

#         # Step 4: Select chatbot response
#         response_list = RESPONSE_TEMPLATES.get(persona, {}).get(sentiment_label, [])
#         if response_list:
#             reply = random.choice(response_list)
#         else:
#             reply = "Let's discuss your repayment plan."

#         print(f"Chatbot ({persona}, {sentiment_label}): {reply}")
#         print(f"💡 Suggested Recovery Strategy: {strategy['description']} (Risk Score: {risk_score:.2f})")

# Run chatbot loop
# chatbot_loop()

'RESPONSE_TEMPLATES = {\n    "cooperative": {\n        "POSITIVE": [\n            "Glad to hear from you! Let\'s work out a suitable repayment plan together.",\n            "That\'s great. We can arrange something that works for both of us."\n        ],\n        "NEGATIVE": [\n            "I understand this might be stressful. We can discuss options to ease the process.",\n            "I hear your concerns. Let\'s explore how we can make this manageable."\n        ],\n        "NEUTRAL": [\n            "Thanks for your update. Could we discuss a date for the repayment?",\n            "Alright, let\'s finalize the details for your payment."\n        ]\n    },\n    "evasive": {\n        "POSITIVE": [\n            "It\'s important we finalize your repayment plan soon to avoid penalties.",\n            "I\'m happy you\'re positive. Let\'s lock in a payment date today."\n        ],\n        "NEGATIVE": [\n            "I understand, but delaying further could increase charges.",\n            

In [10]:
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine


In [11]:
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("persona_chatbot_db")

# ---------- Envs / Paths ----------
DB_URL = os.environ.get("DB_URL", "postgresql://username:password@localhost:5432/chatbot_db")
MODEL_PATH = os.environ.get("MODEL_PATH", "./loan_default_predictor.pkl")
DEFAULT_SENTIMENT_MODEL = os.environ.get("DEFAULT_SENTIMENT_MODEL", "distilbert-base-uncased-finetuned-sst-2-english")

In [12]:
# ---------- Optional transformers ----------
try:
    from transformers import pipeline as hf_pipeline
except Exception:
    hf_pipeline = None

# ---------- Risk model (optional) ----------
_model = None
MODEL_FEATURES: List[str] = []
CATEGORICAL_FEATURES: List[str] = []

def load_risk_model(path: str = MODEL_PATH):
    global _model, MODEL_FEATURES, CATEGORICAL_FEATURES
    if not os.path.exists(path):
        log.warning("Risk model not found at %s; will use heuristic risk.", path)
        return None
    try:
        data = joblib.load(path)
        _model = data.get("model", None)
        MODEL_FEATURES[:] = data.get("features", []) or []
        CATEGORICAL_FEATURES[:] = data.get("categorical", []) or []
        log.info("Loaded risk model. Features: %d; Categorical: %d", len(MODEL_FEATURES), len(CATEGORICAL_FEATURES))
        return _model
    except Exception as e:
        log.exception("Failed loading risk model: %s", e)
        return None

In [13]:
def compute_risk(state: Dict[str, Any]) -> float:
    """Predict risk with model if available, else heuristic.
    Returns a probability between 0 and 1."""
    if _model is not None and MODEL_FEATURES:
        try:
            row = {k: state.get(k, None) for k in MODEL_FEATURES}
            X = pd.DataFrame([row])
            # Basic type fixes
            for c in CATEGORICAL_FEATURES:
                if c in X:
                    X[c] = X[c].astype("category")
            # Predict proba if available
            if hasattr(_model, "predict_proba"):
                proba = float(_model.predict_proba(X)[:, 1][0])
            else:
                pred = float(_model.predict(X)[0])
                proba = min(max(pred, 0.0), 1.0)
            return float(proba)
        except Exception as e:
            log.warning("Model prediction failed (%s); falling back to heuristic.", e)
    # Heuristic: heavier weight to MissedPayments and DelaysDays; light to negative sentiment
    missed = float(state.get("MissedPayments", 0) or 0)
    delays = float(state.get("DelaysDays", 0) or 0)
    sent = float(state.get("SentimentScore", 0.0) or 0.0)  # -1..1
    base = min(1.0, (missed / 4.0) + (delays / 120.0))
    penalty = 0.15 if sent < -0.3 else (0.05 if sent < 0 else 0.0)
    proba = max(0.0, min(1.0, base + penalty))
    return float(proba)

In [14]:
# ---------- Sentiment (optional) ----------
_sentiment_pipe = None

def analyze_sentiment(text: str) -> Tuple[str, float]:
    """Return label (POSITIVE/NEGATIVE/NEUTRAL) and signed score (-1..1)."""
    global _sentiment_pipe
    if not isinstance(text, str) or not text.strip():
        return "NEUTRAL", 0.0
    if hf_pipeline is None:
        return "NEUTRAL", 0.0
    try:
        if _sentiment_pipe is None:
            _sentiment_pipe = hf_pipeline("sentiment-analysis", model=DEFAULT_SENTIMENT_MODEL)
        res = _sentiment_pipe(text[:512])[0]
        label = res.get("label", "NEUTRAL").upper()
        score = float(res.get("score", 0.0))
        signed = score if label == "POSITIVE" else -score
        return label, signed
    except Exception:
        return "NEUTRAL", 0.0

In [15]:
# ---------- Persona detection (LLM zero-shot with heuristic fallback) ----------
POSSIBLE_PERSONAS = ["cooperative", "aggressive", "confused", "evasive", "neutral"]
_zero_shot = None

NEGATIVE_WORDS = {"terrible", "worst", "angry", "useless", "hate", "annoyed", "mad"}
EVADE_PATTERNS = [r"\bi don't know\b", r"\bnot sure\b", r"\blater\b", r"\bskip\b", r"\bmaybe\b"]

def detect_persona_heuristic(user_text: str, sentiment_label: str = "NEUTRAL", features: Dict[str, Any] = None) -> str:
    txt = (user_text or "").lower().strip()
    features = features or {}
    if sentiment_label == "POSITIVE":
        return "cooperative"
    if any(w in txt for w in NEGATIVE_WORDS) or "!" in txt:
        return "aggressive"
    if any(re.search(p, txt) for p in EVADE_PATTERNS):
        return "evasive"
    if "?" in txt or "how" in txt or "help" in txt or "don't understand" in txt:
        return "confused"
    if (features.get("MissedPayments", 0) or 0) >= 2 or (features.get("ResponseTimeHours", 0) or 0) > 48:
        return "evasive"
    return "neutral"

def detect_persona_llm(user_text: str, features: Dict[str, Any] = None) -> Optional[str]:
    global _zero_shot
    if hf_pipeline is None:
        return None
    try:
        if _zero_shot is None:
            _zero_shot = hf_pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        context = ""
        if features:
            missed = features.get("MissedPayments", 0)
            resp = features.get("ResponseTimeHours", "N/A")
            context = f" Missed payments: {missed}; Response time: {resp} hours."
        res = _zero_shot(
            f"Customer message: {user_text}\n{context}",
            candidate_labels=POSSIBLE_PERSONAS,
            multi_label=False
        )
        return res["labels"][0]
    except Exception:
        return None

def detect_persona(user_text: str, sentiment_label: str, features: Dict[str, Any] = None) -> str:
    llm_label = detect_persona_llm(user_text, features)
    if llm_label:
        return llm_label
    return detect_persona_heuristic(user_text, sentiment_label, features)


In [16]:
# ---------- Persona detection (LLM zero-shot with heuristic fallback) ----------
POSSIBLE_PERSONAS = ["cooperative", "aggressive", "confused", "evasive", "neutral"]
_zero_shot = None

NEGATIVE_WORDS = {"terrible", "worst", "angry", "useless", "hate", "annoyed", "mad"}
EVADE_PATTERNS = [r"\bi don't know\b", r"\bnot sure\b", r"\blater\b", r"\bskip\b", r"\bmaybe\b"]

def detect_persona_heuristic(user_text: str, sentiment_label: str = "NEUTRAL", features: Dict[str, Any] = None) -> str:
    txt = (user_text or "").lower().strip()
    features = features or {}
    if sentiment_label == "POSITIVE":
        return "cooperative"
    if any(w in txt for w in NEGATIVE_WORDS) or "!" in txt:
        return "aggressive"
    if any(re.search(p, txt) for p in EVADE_PATTERNS):
        return "evasive"
    if "?" in txt or "how" in txt or "help" in txt or "don't understand" in txt:
        return "confused"
    if (features.get("MissedPayments", 0) or 0) >= 2 or (features.get("ResponseTimeHours", 0) or 0) > 48:
        return "evasive"
    return "neutral"

def detect_persona_llm(user_text: str, features: Dict[str, Any] = None) -> Optional[str]:
    global _zero_shot
    if hf_pipeline is None:
        return None
    try:
        if _zero_shot is None:
            _zero_shot = hf_pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        context = ""
        if features:
            missed = features.get("MissedPayments", 0)
            resp = features.get("ResponseTimeHours", "N/A")
            context = f" Missed payments: {missed}; Response time: {resp} hours."
        res = _zero_shot(
            f"Customer message: {user_text}\n{context}",
            candidate_labels=POSSIBLE_PERSONAS,
            multi_label=False
        )
        return res["labels"][0]
    except Exception:
        return None

def detect_persona(user_text: str, sentiment_label: str, features: Dict[str, Any] = None) -> str:
    llm_label = detect_persona_llm(user_text, features)
    if llm_label:
        return llm_label
    return detect_persona_heuristic(user_text, sentiment_label, features)
