In [None]:
import json
import re
import pandas as pd
from groq import Groq

with open(r"D:\CTS Claims project\fraud_explanation.json", "r") as f:
    explainable_output = json.load(f)

claim_id = explainable_output.get("claim_id", "N/A")
provider_id = explainable_output.get("provider_id", "N/A")
fraud_score = explainable_output["fraud_score"]
predicted_label = explainable_output["predicted_label"]
feature_contributions = explainable_output.get("feature_contributions", {})
metrics = explainable_output.get("metrics", {})

tp = metrics.get("tp", 0)
fp = metrics.get("fp", 0)
fn = metrics.get("fn", 0)
tn = metrics.get("tn", 0)
avg_claim_value = metrics.get("avg_claim_value", 0)
personnel_cost = metrics.get("personnel_cost", 0)
infra_cost = metrics.get("infra_cost", 0)
compliance_cost = metrics.get("compliance_cost", 0)

client = Groq(api_key="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") 
def explain_claim_with_llm():
    contrib_text = "\n".join([f"- {feat}: {weight:.4f}" for feat, weight in feature_contributions.items()]) \
        if feature_contributions else "No feature-level explanation available."

    explanation_prompt = f"""
    You are an expert fraud investigator. Explain this prediction in plain English for a business user.

    Claim Details:
    - Claim ID: {claim_id}
    - Provider ID: {provider_id}
    - Fraud Score: {fraud_score:.4f}
    - Prediction: {predicted_label}

    Why:
    {contrib_text}

    Metrics:
    - TP: {tp}, FP: {fp}, FN: {fn}, TN: {tn}
    - Avg Claim Value: ${avg_claim_value:,.2f}
    - Costs: Personnel ${personnel_cost}, Infra ${infra_cost}, Compliance ${compliance_cost}

    Task:
    Explain in 3–4 sentences why this claim was classified this way, using very simple language.
    """
    table_prompt = f"""
    Return ONLY a JSON array (no text, no markdown) with the following fields:
    - Claim ID
    - Provider ID
    - Fraud Score
    - Prediction
    - TP, FP,FN, TN these values should be not in the confusion matrix values, it should be a real time value (eg: 55 )
    - Avg Claim Value
    - Personnel Cost
    - Infra Cost
    - Compliance Cost

    Use this data:
    Claim ID: {claim_id}
    Provider ID: {provider_id}
    Fraud Score: {fraud_score:.4f}
    Prediction: {predicted_label}
    TP: {tp}, FP: {fp}, FN: {fn}, TN: {tn}
    Avg Claim Value: {avg_claim_value}
    Personnel Cost: {personnel_cost}
    Infra Cost: {infra_cost}
    Compliance Cost: {compliance_cost}
    """
    explanation_response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": explanation_prompt}],
        temperature=0
    )
    explanation = explanation_response.choices[0].message.content.strip()
    table_response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": table_prompt}],
        temperature=0
    )
    table_json = table_response.choices[0].message.content.strip()

    cleaned_json = re.sub(r"```json|```", "", table_json).strip()
    json_match = re.search(r"(\[.*\]|\{.*\})", cleaned_json, re.DOTALL)
    if json_match:
        cleaned_json = json_match.group(1)

    try:
        metrics_data = json.loads(cleaned_json)
        if isinstance(metrics_data, dict):
            metrics_data = [metrics_data]
    except json.JSONDecodeError:
        raise ValueError(f"Failed to parse JSON from LLM. Cleaned text:\n{cleaned_json}")
    df = pd.DataFrame(metrics_data)
    csv_file = r"D:\CTS Claims project\metrics_table.csv"
    json_file = r"D:\CTS Claims project\metrics_table.json"

    df.to_csv(csv_file, index=False)
    df.to_json(json_file, orient="records", indent=4)

    print(f"✅ Metrics saved as:\n- {csv_file}\n- {json_file}")

    return explanation, metrics_data

explanation, metrics_data = explain_claim_with_llm()

print("\n=== HUMAN-FRIENDLY EXPLANATION ===")
print(explanation)
print("\n=== METRICS JSON (from LLM) ===")
print(json.dumps(metrics_data, indent=4))


✅ Metrics saved as:
- D:\CTS Claims project\metrics_table.csv
- D:\CTS Claims project\metrics_table.json

=== HUMAN-FRIENDLY EXPLANATION ===
This claim was classified as "not fraudulent" because several factors suggested it was a normal claim. The claim amount was relatively low (under $1000) and the patient's length of stay was short (5 days or less), which are common characteristics of legitimate claims. Additionally, the procedure and diagnosis codes used were consistent with standard practices, further supporting the claim's legitimacy. Overall, the combination of these factors led to a low fraud score, indicating that the claim is likely genuine.

=== METRICS JSON (from LLM) ===
[
    {
        "Claim ID": "CLM0000000001",
        "Provider ID": "PRV001253",
        "Fraud Score": 0.1915,
        "Prediction": "NON-FRAUD",
        "TP": 55,
        "FP": 30,
        "FN": 25,
        "TN": 40,
        "Avg Claim Value": -0.46298859948090226,
        "Personnel Cost": 5000,
       