In [0]:
%run ./.setup/learner_setup

## Load Necessary Dependencies

In [0]:
import os
from dotenv import load_dotenv
import httpx
import json
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage, HumanMessage
from IPython.display import display, Markdown
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)
from datetime import datetime
import csv

## 

## Setup Authentication and LLM Client

In [0]:
def get_access_token():
    auth = "https://api.uhg.com/oauth2/token"
    scope = "https://api.uhg.com/.default"
    grant_type = "client_credentials"


    with httpx.Client() as client:
        body = {
            "grant_type": grant_type,
            "scope": scope,
            "client_id": dbutils.secrets.get(scope="AIML_Training", key="client_id"),
            "client_secret": dbutils.secrets.get(scope="AIML_Training", key="client_secret"),
        }
        headers = {"Content-Type": "application/x-www-form-urlencoded"}
        resp = client.post(auth, headers=headers, data=body, timeout=60)
        access_token = resp.json()["access_token"]
        return access_token



chat_client = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    azure_deployment=MODEL_DEPLOYMENT_NAME,
    temperature=0,
    azure_ad_token=get_access_token(),
    default_headers={
        "projectId": PROJECT_ID
    }
)


embeddings_client = AzureOpenAIEmbeddings(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=OPENAI_API_VERSION,
    azure_deployment=EMBEDDINGS_DEPLOYMENT_NAME,
    azure_ad_token=get_access_token(),
    default_headers={
        "projectId": PROJECT_ID
    }
)

## Load Necessary Datasets

In [0]:
# Loading datasets for reference codes, validation records, test records, and insurance policies
with open('./Data/reference_codes.json', 'r') as f:
    reference_codes = json.load(f)
with open('./Data/validation_records.json', 'r') as f:
    validation_records = json.load(f)
with open('./Data/test_records.json', 'r') as f:
    test_records = json.load(f)
with open('./Data/insurance_policies.json', 'r') as f:
    insurance_policies = json.load(f)


## Helper Function for Computing Age

In [0]:
# Tool for computing a patient's age
def get_patient_age(patient_record: dict) -> dict:
    """
    Computes the age of a patient based on their date of birth and date of service. 
    Then, adds it directly to the patient record as a new field.
    """
    date_of_birth = datetime.strptime(patient_record.get("date_of_birth", ""), "%Y-%m-%d")
    date_of_service = datetime.strptime(patient_record.get("date_of_service", ""), "%Y-%m-%d")
    patient_record["age"] = date_of_service.year - date_of_birth.year - ((date_of_service.month, date_of_service.day) < (date_of_birth.month, date_of_birth.day))
    return patient_record
    
# # Test Case
# test_patient = validation_records[0]
# test_patient_age = get_patient_age(test_patient)
# print(test_patient_age)

## Create Tools for the Agent

In [0]:
# Tool for summarizing a patient's health record with insurance claim

@tool
def summarize_patient_record(record_str: str) -> dict:
    """
    Extracts a structured summary of a patient's insurance claim record using LLM reasoning.
    """
    # Use the LLM to generate the summary
    prompt = f"""You are a Healthcare Insurance Claim Approval Agent. Given the following patient record, summarize the patient's insurance claim record in a clearly formatted seven labeled sections in order: Patient demographics (name, gender, and age), Insurance Policy ID, Diagnoses and Descriptions (ICD-10 codes and their mapped descriptions), Procedures and Descriptions (CPT codes and their mapped descriptions), Preauthorization Status (if preauthorization was required and obtained), Billed Amount (in USD), and Date of Service.

    Given the patient record: "{record_str}"

    Given the ICD-10 codes and their mapped descriptions: "{json.dumps(reference_codes['ICD10'])}"

    Given the CPT codes and their mapped descriptions: "{json.dumps(reference_codes['CPT'])}"

    Return the summary as a well-structured summary report separated into seven sections, clearly seperated and labeled, so that downstream tools can easily reason over the content in JSON format. Do not include the '''json''' watermark.
    """

    response = chat_client.invoke(prompt)
    return response.content

# Test Case
# summary_record = summarize_patient_record(json.dumps(test_patient_age))
# print(summary_record)

# Tool for summarizing an insurance policy corresponding to a given policy id

@tool
def summarize_policy_guideline(policy_id: str) -> dict:
    """
    Extracts a structured summary of an insurance policy's guidelines using LLM reasoning.
    """
    # Use the LLM to generate the summary
    prompt = f"""You are a Healthcare Insurance Claim Approval Agent. Given the following insurance policy id, summarize the insurance policy's coverage rules in a clearly formatted labeled sections in order: Policy details (policy id and plan name) and Covered Procedures, with each covered procedure listed in the policy, including sub-points: Procedure Code and Description (using CPT code mappings), Covered Diagnoses and Descriptions (using ICD-10 code mappings), Gender Restriction, Age Range, Preauthorization Requirement, and Notes on Coverage (if any).
    
    Given the insurance policy id: "{policy_id}"

    Given the insurance policy dataset: "{json.dumps(insurance_policies)}"

    Given the ICD-10 codes and their mapped descriptions: "{json.dumps(reference_codes['ICD10'])}"

    Given the CPT codes and their mapped descriptions: "{json.dumps(reference_codes['CPT'])}"

    Return the summary as a well-structured summary report separated into the sections, clearly seperated and labeled, so that downstream tools can easily reason over the content in JSON format. Do not include the '''json''' watermark.
    """
    
    response = chat_client.invoke(prompt)
    return response.content

# Test Case
# summary_policy = summarize_policy_guideline(test_patient_age["insurance_policy_id"])
# print(summary_policy)

# Tool for determining whether the procedures claimed by a patient are covered under their insurance policy
# Problem with tool decorator
@tool
def check_claim_coverage(record_summary: str, policy_summary: str) -> dict:
    """
    Checks whether the procedures claimed by a patient are covered under their insurance policy, using summaries of a patient's records and insurance policy guidelines from LLM reasoning.
    """
    # Use the LLM to determine whether the procedures are covered
    prompt = f"""You are Healthcare Insurance Claim Approval Agent. Given the following patient record summary and insurance policy summary, determine whether the procedures claimed by the patient are covered under their insurance policy. The criteria for coverage are as follows:

    - The patient's diagnosis code(s) match the policy-covered diagnoses for the claimed procedure
    - The procedure code is explicitly listed in the policy, and all associated conditions are satisfied
    - The patient's age falls within the policy's defined age range (inclusive of the lower bound, exclusive of the upper bound)
    - The patient's gender matches the policy's requirement for that procedure
    - If preauthorization is required by the policy, it must have been obtained

    Only procedures explicitly listed in the patient record should be evaluated.

    Given the patient record summary: "{record_summary}"

    Given the insurance policy summary: "{policy_summary}"

    Return the output into three sections, clearly seperated and labeled in JSON format:

    - Coverage Review: Step-by-step analysis for the claimed procedure, detailing the checks performed. (each patient has only one procedure)
    - Summary of Findings: Summary of which coverage requirements were met or not met.
    - Final Decisions: For each procedure for the claim, return either "APPROVE" or "ROUTE FOR REVIEW" with a brief explanation of the reasoning behind it.

     Do not include the '''json''' watermark.
    """

    response = chat_client.invoke(prompt)
    return response.content

# Test Case
# Only works without tool decorator
# response = check_claim_coverage(summary_record, summary_policy)
# print(response)


## Define Tools to be Used by the Agent

In [0]:
# List of all tools that the LLM should be aware of
# These tools were defined earlier using the @tool decorator
tools = [summarize_patient_record, summarize_policy_guideline, check_claim_coverage]

## Define Agent Instructions Prompt

In [0]:
# Instruction prompt for the Agent
AGENT_PROMPT = """
    You are a Healthcare Insurance Claim Approval Agent.
    You must use exactly three tools in the following order:
    1. summarize_patient_record(record_str)
    2. summarize_policy_guideline(policy_id)
    3. check_claim_coverage(record_summary, policy_summary)
 
    For each patient record, follow the order:
    - Summarize the patient record.
    - Summarize the corresponding insurance policy.
    - Check claim coverage using both summaries.
 
    Given a patient record, your final output must follow this format:
    Decision: either APPROVE or ROUTE FOR REVIEW
    Reason: Concise reasoning referencing specific coverage rules and policy conditions.

    Here is a sample for approval:
    - Decision: APPROVE
    - Reason: The claim for the complete blood count (CPT code 85025) is approved because this procedure is covered under the policy for the diagnosis of urinary tract infection (N39.0), which applies to the patient's diagnosis. The patient's age (43) and gender (Female) meet the policy requirements, and preauthorization was obtained. Additionally, the claim amount of $3500.000 would be paid in full, as the bill amount is less than the permissible amount of $5947.37. All coerage requirements are satisfied.

    Here is a sample for manual review routing:
    -Decision: ROUTE FOR REVIEW
    -Reason: The claim for the electrocardiogram (CPT code 93000) cannot be automatically approved because the patient's age of 69 exceeds the policy's allowed age range of 11 to 63. Although the diagnosis of low back pain (M54.5) and the procedure code match the policy's covered conditions, the age requirement is not met. Therefore, the claim needs to be routed for further manual review.

    Do not make up information. Do not use data anywhere else besides from the patient record and policy.
    """

## Create ReAct Agent

In [0]:
# Create the agent using tools, LLM, and the system instruction prompt
healthcare_agent = create_react_agent(
    model=chat_client,
    tools=tools,
    prompt=AGENT_PROMPT,
)

## Utility Function to Call Agent

In [0]:
# Utility function to call the agent
def call_agent(agent, query, verbose=False):

    # Stream the agent's execution for the given record
    for event in agent.stream(
        {"messages": [HumanMessage(content=query)]},
        stream_mode='values'
    ):
        # If verbose is enabled, print each intermediate message
        if verbose:
            event["messages"][-1].pretty_print()

    # Display the final response from the agent as Markdown
    display(Markdown(event["messages"][-1].content))

    # Return the final message content for downstream use
    return event["messages"][-1].content

## Testing Agent on validation_records

In [0]:
# Adding age field for validation_records.json
validation_records_age = []
for record in validation_records:
    validation_records_age.append(get_patient_age(record))

# Testing all validation_records
for record in validation_records_age:
    result = call_agent(healthcare_agent, str(record.values()), verbose=False)
    print()

## Running Agent on test_records

In [0]:
# Adding age field for test_records.json
test_records_age = []
for record in test_records:
    test_records_age.append(get_patient_age(record))

# Testing all test_records
test_results = [] # List to store agent responses
for record in test_records_age:
    result = call_agent(healthcare_agent, str(record.values()), verbose=False)
    test_results.append(result)
    print()

## Format Responses into CSV format

In [0]:
# Creating headers for submission.csv
data = [["patient_id", "generated_response"]]

# Adding data to submission.csv
for i in range(len(test_records_age)):
    data.append([test_records_age[i]["patient_id"], test_results[i]])

# Writing data to submission.csv
with open("submission.csv", 'w', newline = "") as file:
    writer = csv.writer(file)
    writer.writerows(data)