Capstone 2 Insurance Claim Approval Agent

1. Environment Setup and Dependencies

1.1 Load Necessary Libraries

In [None]:
%run ./.setup/learner_setup

1.2. Install the required libraries

In [None]:
# Core Python libraries
import os
import json
import textwrap

import textwrap
import openai
import os
import json
import httpx
from dotenv import load_dotenv
# Data handling
import pandas as pd

# Environment variables
from dotenv import load_dotenv

# HTTP and retry logic
import httpx
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

# Display utilities for Jupyter
from IPython.display import display, Image, Markdown

# LangChain & LangGraph core
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.tools import tool
from langchain.prompts import PromptTemplate
from langchain_core.messages import (
    SystemMessage,
    HumanMessage,
    trim_messages,
)
from langchain_core.messages.utils import count_tokens_approximately

# LangGraph components
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langgraph.prebuilt import (
    create_react_agent,
    ToolNode,
    tools_condition,
)
from langgraph.checkpoint.memory import MemorySaver
from langgraph.types import Command
# Typing
from typing import Annotated, Literal
from typing_extensions import TypedDict

1.3. Create model Client
Below is the code to set-up the UAIS environment for establishing the connection to LLM and to get authentication.

In [None]:
# Authentication:
def get_access_token():
    auth = "https://api.uhg.com/oauth2/token"
    scope = "https://api.uhg.com/.default"
    grant_type = "client_credentials"

    with httpx.Client() as client:
        body = {
            "grant_type": grant_type,
            "scope": scope,
            "client_id": dbutils.secrets.get(scope="AIML_Training", key="client_id"),
            "client_secret": dbutils.secrets.get(scope="AIML_Training", key="client_secret"),
        }
        headers = {"Content-Type": "application/x-www-form-urlencoded"}
        resp = client.post(auth, headers=headers, data=body, timeout=60)
        access_token = resp.json()["access_token"]
        return access_token
    

load_dotenv('./Data/UAIS_vars.env')

endpoint = os.environ.get("MODEL_ENDPOINT")
model_name = os.environ.get("MODEL_NAME")
project_id = os.environ.get("PROJECT_ID")
api_version = os.environ.get("API_VERSION")


chat_client = openai.AzureOpenAI(
        azure_endpoint=endpoint,
        api_version=api_version,
        azure_deployment=model_name,
        azure_ad_token=get_access_token(),
        default_headers={
            "projectId": project_id
        }
    )

AZURE_OPENAI_ENDPOINT = os.environ["AZURE_OPENAI_ENDPOINT"]
OPENAI_API_VERSION = os.environ["OPENAI_API_VERSION"]
EMBEDDINGS_DEPLOYMENT_NAME = os.environ["EMBEDDINGS_DEPLOYMENT_NAME"]
MODEL_DEPLOYMENT_NAME = os.environ["MODEL_DEPLOYMENT_NAME"]
PROJECT_ID = os.environ['PROJECT_ID']

chat_client = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    azure_deployment=MODEL_DEPLOYMENT_NAME,
    temperature=0,
    azure_ad_token=get_access_token(),
    default_headers={
        "projectId": PROJECT_ID
    }
)


embeddings_client = AzureOpenAIEmbeddings(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    azure_deployment=EMBEDDINGS_DEPLOYMENT_NAME,
    azure_ad_token=get_access_token(),
    default_headers={
        "projectId": PROJECT_ID
    }
)

2. Data loading and Preprocessing

2.1. Load and Inspect Validation Patient Data
We begin by loading the validation patient records directly from the JSON file as python dictonaries. It is easy to inspect

In [None]:
# Load from files
with open('Data/validation_records.json') as f:
    sample_patients = json.load(f)

In [None]:
# sample records
sample_patients[0:2]

2.2. Implement utility function for computing age

In [None]:
from datetime import datetime

def compute_age(dob: str, reference_date: str) -> int:
    dob_dt = datetime.strptime(dob, "%Y-%m-%d")
    ref_dt = datetime.strptime(reference_date, "%Y-%m-%d")
    age = ref_dt.year - dob_dt.year - ((ref_dt.month, ref_dt.day) < (dob_dt.month, dob_dt.day))
    return age

# Example usage:
age = compute_age("1980-05-15", "2025-09-21")
display(age)

In [None]:
for patient in sample_patients:
    dob = patient.get("date_of_birth")
    dos = patient.get("date_of_service")
    if dob and dos:
        patient["age"] = compute_age(dob, dos)
    else:
        patient["age"] = None

sample_patients[0:2]

2.3 Load and inspect insurance policy data
Data from insurance_policies.json is loaded here. This preserves all metadata and makes it easy to inspect policy information and coverage details for further processing by agent later on as needed.

In [None]:
# Load from files
with open('Data/insurance_policies.json') as f:
    insurance_policies = json.load(f)
    

# looking at sample insurance policy records
insurance_policies[0:2]

2.4. Reference codes
We load the reference codes from reference_codes.json JSON file as python dictionary. This dictionary contains CPT(procedure) and ICD-10 (diagnosis) codes with human readable descriptions. This will be fed into our agent to improve the quality of reports and summaries created which will aid in its reasoning process. Data from Reference code is loaded here. CPT_CODES and ICD10_CODES are inspected here

In [None]:
# Load from files
with open('Data/reference_codes.json') as f:
    ref = json.load(f)
    CPT_CODES = ref['CPT']
    ICD10_CODES = ref['ICD10']

In [None]:
# Look at CPT Codes
CPT_CODES

In [None]:
#look at ICD 10 code
ICD10_CODES

In [None]:
Create Dictionary for easy data access and look ups

In [None]:
# Create DB Look ups
PATIENT_DB = {p["patient_id"]: p for p in sample_patients}
POLICY_DB = {p["policy_id"]: p for p in insurance_policies}

3. Create and Define Tools for the ReAct Agent
In this section we define the three tools powering our ReAct Agent:

summarize_patient_record: Summarizes a patient record in a structured format using LLM reasoning. Returns key points for use in downstream eligibility reasoning.

summarize_policy_guideline: Summarizes an insurance policy in a structured format using LLM reasoning. Returns clearly defined conditions for claim coverage.

check_claim_coverage: Uses LLM reasoning to assess whether each claimed procedure in the patient's record satisfies the policy's coverage conditions.

This tool does not make final denial decisions

If all requirements are satisfied, the procedure is approved
If any requirement is not met, the case is routed for further human review. Only procedures and diagnoses explicitly claimed by the patient are evaluated.
Returns a structured, step-by-step coverage analysis along with a recommendation per procedure: APPROVE or ROUTE FURTHER REVIEW.
3.1. Define Tool for Summarizing Patient Records
This tool uses LLM-powered reasoning to generate summaries of patient records along with their insurance claim data. The summaries follow a clear, structured format to support consistent and accurate downstream eligibility reasoning.

3.1.1 Create a function to build prompt

Build the prompt and check for sample patient. Verify the output

In [None]:
def build_prompt_summarize_patient_record(record, icd10_codes, cpt_codes):
    return f"""
You are a medical claims summarization assistant. Given a patient insurance claim record, generate a structured summary with the following SEVEN labeled sections, in this order:

1. Patient Demographics: Include name, gender, and age (use "age" field if present, otherwise compute from date_of_birth and date_of_service).
2. Insurance Policy ID
3. Diagnoses and Descriptions: List all ICD-10 codes and their mapped descriptions.
4. Procedures and Descriptions: List all CPT codes and their mapped descriptions.
5. Preauthorization Status: Clearly state if preauthorization was required and whether it was obtained.
6. Billed Amount (in USD)
7. Date of Service

Use the following ICD-10 code mappings: {json.dumps(icd10_codes)}
Use the following CPT code mappings: {json.dumps(cpt_codes)}

Here is the patient record:
{json.dumps(record)}

Return only the structured summary, clearly formatted with each section labeled.
"""
prompt = build_prompt_summarize_patient_record(PATIENT_DB['P011'], ICD10_CODES, CPT_CODES)
print(prompt)
prompt_messages = [
    {"role": "developer", "content": prompt},
    {"role": "user", "content": f"Patient review text:\n\n{PATIENT_DB['P011']}"}
]
response = chat_client.invoke(prompt_messages)
print(response.content)

3.1.2 Create function summarize_patient_record
This tool is responsible for extracting a structured summary of a patient’s insurance claim record using LLM reasoning. It accepts a raw patient record (as a JSON string or plain string) and returns a well-structured summary report that will later be used for claim coverage evaluation.

In [None]:
# Tool 1: Summarizing Patient  Health  Record with  Insurance  Claim
@tool
def summarize_patient_record(record_str: str) -> str:
    """
    Summarizes a patient's health record and insurance claim information from a JSON string input.
    """
        # Parse input string to dict
    try:
        prompt = build_prompt_summarize_patient_record(record_str, ICD10_CODES, CPT_CODES)
        # print(prompt)
        prompt_messages = [
            {"role": "developer", "content": prompt},
            {"role": "user", "content": f"Patient review text:\n\n{PATIENT_DB['P011']}"}
        ]
        response = chat_client.invoke(prompt_messages)
        return (response.content)
    except Exception:
        raise ValueError("Input must be a valid JSON string representing a patient record.")

Test tool to check sample patient record summary
This section uses the summarize_patient_record tool on a specific patient record (in this case , the patient with ID 'P011') and displays the generated summary report

In [None]:
patient_record_json = json.dumps(PATIENT_DB['P011'])
patient_summary = summarize_patient_record.invoke(patient_record_json)
display(Markdown(patient_summary))

3.2. Define Tool for Summarizing Insurance Policy Guideline
This tool uses LLM reasoning to summarize the claim coverage rules and conditions of an insurance policy, ensuring that the claim coverage process follows the defined criteria.

In [None]:
@tool
def summarize_policy_guideline(policy_id: str) -> str:
    """Summarizes the policy details."""
    prompt = (
        f"You are an insurance policy assistant. Given a policy id {policy_id}, extract a summary from it. "
        f"You need to refer {insurance_policies} to gather information about that policy id. "
        "You have to include the following sections in the order provided:\n"
        "• Policy Details: policy ID and plan name\n"
        "• Covered Procedures:\n"
        "    For each covered procedure listed in the policy, include the following sub-points:\n"
        f"    o Procedure Code and Description (Use {CPT_CODES} for reference)\n"
        f"    o Covered Diagnoses and Descriptions (Use {ICD10_CODES} for reference)\n"
        "    o Gender Restriction\n"
        "    o Age Range\n"
        "    o Preauthorization Requirement\n"
        "    o Notes on Coverage (if any)\n"
    )
    response = chat_client.invoke(prompt)
    return response.content

Test tool to check sample insurance policy guideline summary
This section runs the summary_policy_guideline function to summarize the policy details for the given policy ID (POL1007) and displays the summary report

In [None]:
POLICY_DB['POL1002']

In [None]:
policy_summary = summarize_policy_guideline.invoke("POL1002")
display(Markdown(policy_summary))

3.3. Define Tool for Claim Coverage Check
This tool uses LLM reasoning to assess whether each claimed procedure in the patient's record satisfies the coverage conditions outlined in their insurance policy. It relies on structured summaries of both the patient record and the policy guideline.

Only procedures and diagnoses explicitly claimed by the patient are evaluated.

The tool does not issue final denials.

If all policy requirements are met, the procedure is marked as APPROVE.
If any requirement is not met, the procedure is flagged as ROUTE FOR REVIEW for expert human decision-making.
The tool returns a clear, step-by-step coverage analysis and recommendation for each claimed procedure.

@tool
def check_claim_coverage(patient_record_summary: str, policy_summary: str) -> str:
    """
    Evaluates whether the patient's claimed procedure is covered under their insurance policy.
    Returns a structured decision with reasoning.

    """
    prompt = f"""You are a claims coverage validation assistant.
Your task is to determine whether the patient's claimed procedure is covered under their insurance policy. Given a patient claim summary {patient_record_summary} and the summary of the insurance policy the patient has opted for {policy_summary}, determine if the claim is covered under the policy. You have to return a coverage eligibility decision, either approval or routing for review.: 
A procedure should be approved only if all the below conditions are met:
        • The patient's diagnosis code(s) match the policy-covered diagnoses for the claimed procedure.
        • The procedure code is explicitly listed in the policy, and all associated conditions are satisfied.
        • The patient's age falls within the policy's defined age range (inclusive of the lower bound, exclusive of the upper bound).
        • The patient's gender matches the policy's requirement for that procedure.
        • If preauthorization is required by the policy, it must have been obtained.
        
        Do **not** infer or assume any missing information.
        Do **not** evaluate procedures or diagnoses not explicitly listed in the patient record.
         If any required condition is missing or ambiguous, route the case for **manual review**.

        The response should contain the following points:
        • Coverage Review: Step-by-step analysis for the claimed procedure, detailing the checks performed. (each patient has only one procedure)
        • Summary of Findings: Summary of which coverage requirements were met or not met.
        • Final Decision: For each procedure for the claim, return either "APPROVE" or "ROUTE FOR REVIEW" with a brief explanation of the reason behind it.
        """
        
    response = chat_client.invoke(prompt)
    return {"Coverage eligibility decision": response.content}

result = check_claim_coverage.invoke({
    'patient_record_summary': patient_summary,
    'policy_summary': policy_summary
})
# Extract the string from the dictionary
markdown_text = result['Coverage eligibility decision']

display(Markdown(markdown_text))

Tools for Claims Processing Agent
In this section, we define the tools that will be used by the agent to:

Process patient records
Summarize policy guidelines
Check claim coverage

4. Set Up the System Instruction Prompt for the Claims Approval Agent
This system prompt defines the agent's end-to-end workflow and enforces strict compliance with claims coverage determination procedures.

It explicitly specifies:

The tools the agent must use
The exact sequence of steps to follow
The required output format for determining claim approval or denial
This ensures:

Consistent, audit-ready claim processing
Elimination of ambiguity in agent decision-making

In [None]:
# Instruction prompt for the overall Agent

AGENT_PROMPT_TXT = """
You are an Insurance Claims Reviewer Assistant. Your task is to determine whether a patient's insurance claim should be APPROVED or ROUTED FOR REVIEW, strictly following the policy guidelines.

You have access to ONLY the following three tools, which must be used in this exact order:
1. summarize_patient_record(record)
2. summarize_policy_guideline(policy_id)
3. check_claim_coverage(patient_record_summary, policy_summary)

Your workflow:
- Step 1: Use summarize_patient_record to generate a structured summary of the patient claim record.
- Step 2: Extract the policy_id from the patient summary and use summarize_policy_guideline to summarize the insurance policy.
- Step 3: Use check_claim_coverage with the patient record summary and policy summary to determine coverage eligibility.

Once both the summaries are obtained, check if the claim should be approved or not under the policy guidelines based on the claims summary and the policy summary.
Call the tools in this way mentioned above and provide the most appropriate response. First call the summarize_patient_record tool, then summarize_policy_guideline, and finally check_claim_coverage.

Do NOT call any tool more than once per claim. Do NOT repeat or loop tool calls. Do NOT attempt to answer without using all three tools in the specified order.

if you get "Sorry, need more steps to process this request." as the ouput , mark the decision as ROUTE FOR REVIEW, and provide suitable reason and mention somewhere it is "Tool limitation"

Your final response must be formatted as follows:
 - Decision: decision should be APPROVE or ROUTE FOR REVIEW. (Decision heading should be in bold)
 - Reason: A concise explanation (max 4 sentences) referencing specific coverage rules, policy conditions, diagnosis and procedure codes/descriptions, and any relevant age or gender criteria that led to your decision.

Strictly follow this workflow and output format for every claim.
"""

5. Create LangGraph Claims Approval ReAct Agent
This step creates the ReAct agent and equips it with the necessary tools, LLM, and system prompt.

In [None]:
# List of all tools that the LLM should be aware of
tools = [summarize_patient_record, summarize_policy_guideline, check_claim_coverage]

AGENT_SYS_PROMPT = SystemMessage(content=AGENT_PROMPT_TXT)
# Create the agent using tools, LLM, and the system instruction prompt
agent = create_react_agent(
    model=chat_client,
    tools=tools,
    prompt=AGENT_SYS_PROMPT
)

6. Define Utility Function to Call the Agent
This utility function is used to interact with the agent, stream its step-by-step reasoning, and display the final response in Markdown format.

In [None]:
def call_agent(agent, query, verbose=False, config=None):
    if config is None:
        config = {}
    # Stream the agent's execution for the given query
    for event in agent.stream(
        {"messages": [HumanMessage(content=query)]},  # Input prompt
        stream_mode='values',  # Stream output as intermediate values
        config=config
    ):
        # If verbose is enabled, print each intermediate message
        if verbose:
            event["messages"][-1].pretty_print()
    # Display the final response from the agent as Markdown
    print('\n\nFinal Response:\n')
    display(Markdown(event["messages"][-1].content))
    # Return the final message content for optional downstream use
    return event["messages"][-1].content



7. Run the Agent on a Validation Patient Data
In this section, we run the agent on all the sample patient records from validation_records.json. The goal is to observe how the agent processes these claims and ensure that everything is working as expected.

In [None]:
PATIENT_DB['P011']

In [None]:
# run in for a single patient
patient_record = str(PATIENT_DB['P011'])  # converting dict to string
response = call_agent(agent=agent,
                      query=f"Evaluate this claim: {patient_record}",
                      verbose=True,
                      config = {"recursion_limit": 50})

In [None]:
display(Markdown(response))

In [None]:
PATIENT_DB.keys()

8. Validate Agent Performance Using Validation Human Reference Data
We save the agent responses in a dataframe
Load the validation_reference_results.csv
Check agent performance by comparing results manually or using a simple LLM-as-judge prompt

In [None]:
validation_agent_results = pd.DataFrame({
    'patient_id': list(PATIENT_DB.keys()),
    'generated_response': agent_responses
})


In [None]:
validation_human_results = pd.read_csv("Data/validation_reference_results.csv")

In [None]:
# You can merge the dataframes and validate manually

In [None]:
pd.set_option('display.max_colwidth', None)
validation_merged_df = validation_agent_results.merge(validation_human_results, on='patient_id', how='inner')
validation_merged_df

OR use LLM as a judge with creating your own prompt and validate the agendt performance


# Grading prompt template
grading_prompt_template = PromptTemplate.from_template("""
You are an expert insurance claims adjudicator.
Your task is to evaluate whether the AI-generated response matches the human reference response for the same claim.
Human Response:
    {reference}
    Model-Generated Response:
    {generated}

**Grading Rules:**

> - Grade as "Correct" if the generated response matches the reference response in **semantic meaning, factual accuracy, decision outcome (e.g., approve/deny), and reasoning based on policy terms and claim details.** Minor differences in wording are acceptable as long as the core rationale and adjudication logic are aligned.

- The wording of the reason may differ, but as long as the core rationale remains consistent (e.g., policy coverage mismatch, age/gender requirements, diagnosis not aligning), the response can still be graded as "Correct."

> - Grade as "Incorrect" if the generated response: **differs in decision outcome, contains factual inconsistencies, misinterprets policy terms, omits key reasoning, or introduces irrelevant or misleading information.**

- Focus on **content similarity**, **factual alignment**, and **coherence of reasoning** when comparing the responses.

Respond in JSON format:
{{
    "grade": "Correct" or "Incorrect",
    "justification": "Brief justification here."
}}

Reference response:
{reference}

Generated response:
{generated}
""")

results = []



for idx, row in validation_merged_df.iterrows():
    prompt = grading_prompt_template.format(
        reference=row["reference_response"],
        generated=row["generated_response"]
    )

    # Send to LLM and get response
    llm_response = chat_client.invoke([HumanMessage(content=prompt)])
    
    # If the response is in markdown/code block, strip those
    import re
    response_content = llm_response.content.strip()
    response_content = re.sub(r"^```json|```$", "", response_content).strip()
    
    try:
        eval_result = eval(response_content)  # Or use json.loads if safe
    except Exception:
        eval_result = {"grade": "Error", "justification": response_content}
    
    results.append({
        "patient_id": row["patient_id"],
        "grade": eval_result.get("grade", ""),
        "justification": eval_result.get("justification", "")
    })

# run prompt against agent response and human reference response and pass to LLM

# Create a DataFrame with the grading results
df_results = pd.DataFrame(results)
df_results

Based on a above it is clear that the agent is working as expected and now we can generate responses on test patient records for submission

In [None]:
9. Generate Responses on Test Data
We load the test_records.json
Generate results from agent
Store in submission.csv for submission

In [None]:
with open('Data/test_records.json') as f:
    test_patients = json.load(f)

In [None]:
len(test_patients)

In [None]:
test_patients[0]

In [None]:
for patient in test_patients:
    dob = patient.get("date_of_birth")
    dos = patient.get("date_of_service")
    if dob and dos:
        patient["age"] = compute_age(dob, dos)
    else:
        patient["age"] = None

In [None]:
test_patients[0:10]

In [None]:
test_patient_ids = [patient['patient_id'] for patient in test_patients]
test_patient_ids

In [None]:
test_responses = []

for patient in test_patients:
    print(f'Processing for patient {patient["patient_id"]}...')
    patient_record = str(patient)
    response = call_agent(agent,
                          f"Evaluate this claim: {patient_record}",
                          verbose=True,
                          config = {"recursion_limit": 50})
    test_responses.append(response)

In [None]:
submission_df.to_csv('submission.csv',  # use the correct file name - submission.csv
                     index=False)

In [None]:
import pandas as pd
df = pd.read_csv('submission.csv')
df