In [89]:
# Set up the credentials in the environment to run the chat bot - Windows (PowerShell)
# setx AWS_ACCESS_KEY_ID YOUR_ACCESS_KEY_ID 
# setx AWS_SECRET_ACCESS_KEY YOUR_SECRET_ACCESS_KEY 
# setx AWS_DEFAULT_REGION us-east-1

In [None]:
# pip install -U "langchain-core"
# pip install -qU langchain-aws
# pip install langchain langchain-text-splitters langchain-community bs4

In [63]:
from langchain.tools import tool, ToolRuntime
from langchain_aws import ChatBedrock
from langchain.agents import create_agent
from elasticsearch import Elasticsearch
from langgraph.checkpoint.memory import InMemorySaver  
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.runnables import RunnableConfig
import json

In [61]:
from langchain_aws import ChatBedrock

model = ChatBedrock(model="us.anthropic.claude-sonnet-4-5-20250929-v1:0")

In [4]:
from langchain_aws import BedrockEmbeddings

embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0")

In [11]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [12]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

assert len(docs) == 1
print(f"Total characters: {len(docs[0].page_content)}")

Total characters: 43047


In [13]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 63 sub-documents.


In [15]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [16]:
document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

['bf3eb605-70ad-47f8-a4c3-d12be2fc14ff', '8f35abba-f1bc-49e7-b386-ce4bef9845c9', '274d0396-a830-4428-a505-2267690135ae']


In [19]:
from langchain.agents import create_agent


tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from a blog post. "
    "Use the tool to help answer user queries."
)
agent = create_agent(model, tools, system_prompt=prompt)

In [20]:
query = (
    "What is the standard method for Task Decomposition?\n\n"
    "Once you get the answer, look up common extensions of that method."
)

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


What is the standard method for Task Decomposition?

Once you get the answer, look up common extensions of that method.

Certainly! I'll retrieve the information about the standard method for Task Decomposition and then look up common extensions of that method. Let me use the retrieval tool to get this information for you.
Tool Calls:
  retrieve_context (toolu_bdrk_01RQZ3bJbTsACcAcDLVgP6BJ)
 Call ID: toolu_bdrk_01RQZ3bJbTsACcAcDLVgP6BJ
  Args:
    query: What is the standard method for Task Decomposition? What are common extensions of that method?
Name: retrieve_context

Source: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 2578}
Content: Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.
Another quite distinct approach, LLM+P (Liu et al. 2023), invol

In [22]:
from elasticsearch import Elasticsearch

In [25]:
es = Elasticsearch(
        hosts=['https://sqml-dhmp.es.us-west-2.aws.found.io:9243'],
        basic_auth=('elastic', 'f7VU4FcOvsnrYr4poj9H4LG0')
    )
# Test the connection by getting cluster info
info = es.info()
print("Connected to Elasticsearch!")
print(f"Cluster name: {info['cluster_name']}")

Connected to Elasticsearch!
Cluster name: 19c8ca233239405e8c2ec66b872b1e05


In [27]:
index_info = es.indices.get(index='tiamd_prod_clinical_notes')
print(f"\nInformation for index '{index_info}':")


Information for index '{'tiamd_prod_clinical_notes': {'aliases': {}, 'mappings': {'properties': {'dateOfService': {'type': 'date', 'format': 'MM-DD-YYYY'}, 'dateOfServiceEpoch': {'type': 'long'}, 'ingestionDate': {'type': 'date', 'format': 'epoch_millis'}, 'ingestionDateTime': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'ingestionTS': {'type': 'long'}, 'location': {'type': 'keyword'}, 'locationname': {'type': 'keyword'}, 'mrn': {'type': 'keyword'}, 'noteId': {'type': 'keyword'}, 'noteType': {'type': 'keyword'}, 'pateintName': {'type': 'keyword'}, 'pateintname': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'patientID': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'patientId': {'type': 'keyword'}, 'patientMRN': {'type': 'keyword'}, 'patientName': {'type': 'keyword'}, 'patientname': {'type': 'keyword'}, 'rawdata': {'type': 'keyword'}, 'serviceDate': {'type': 'text', 'fields'

In [30]:
response = es.get(index='tiamd_prod_clinical_notes', id=98)

In [39]:
raw_text = response["_source"]["rawdata"]

In [87]:
print(raw_text)

MRN: Mrn6790 Erin Rajeev, MD Erin Rajeev, MD Physician Neurology Consults The patient can view the shared note after they get an active MyChart account This note has been shared with the patient Signed Date of Service: 11/05/2025 11:58 AM Consult Orders Signed 43912 Wood Ave, Sui 102 field Tkk, IP 4465 Phone #: 124-955-2345 | Fax #: 248-928-9088 Pavithra, MD MPH Tay, MD Rama, MD Isma DO Alen, DO Neurology consult Name: Ann Mathew DOB: 8/23/1955 Age: 70 y.o. Sex: female MRN: Mrn6790 CSN: 1000520864654 Admit date: 11/05/2025 Attending: Erin Rajeev, MD Date of consult:11/05/2025 CCD: Reason for Consult: Subjective HPI 70 y.o. female with a PMH of schizophrenia, CKD presents to the emergency room brought in with her sister for acute mental status change. Sister states yesterday and today patient has refused to eat and drink, has had very little conversation and response at home. Denies any fever chills nausea vomiting or diarrhea. States she was under the weather last week with a cough and

In [45]:
from langchain_core.documents import Document

# Put size inside the body parameter
response = es.search(
    index='tiamd_prod_clinical_notes', 
    body={
        "query": {"match_all": {}},
        "size": 100
    }
)

docs = []
for hit in response['hits']['hits']:
    doc = Document(
        page_content=hit["_source"]["rawdata"],
        metadata={
            "id": hit["_id"],
            "index": hit["_index"],
            "noteId": hit["_source"].get("noteId"),
            "patientName": hit["_source"].get("patientName"),
            "dateOfService": hit["_source"].get("dateOfService"),
        }
    )
    docs.append(doc)

In [48]:
docs[0:5]

[Document(metadata={'id': '98', 'index': 'tiamd_prod_clinical_notes', 'noteId': '98', 'patientName': None, 'dateOfService': None}, page_content='MRN: Mrn6790 Erin Rajeev, MD Erin Rajeev, MD Physician Neurology Consults The patient can view the shared note after they get an active MyChart account This note has been shared with the patient Signed Date of Service: 11/05/2025 11:58 AM Consult Orders Signed 43912 Wood Ave, Sui 102 field Tkk, IP 4465 Phone #: 124-955-2345 | Fax #: 248-928-9088 Pavithra, MD MPH Tay, MD Rama, MD Isma DO Alen, DO Neurology consult Name: Ann Mathew DOB: 8/23/1955 Age: 70 y.o. Sex: female MRN: Mrn6790 CSN: 1000520864654 Admit date: 11/05/2025 Attending: Erin Rajeev, MD Date of consult:11/05/2025 CCD: Reason for Consult: Subjective HPI 70 y.o. female with a PMH of schizophrenia, CKD presents to the emergency room brought in with her sister for acute mental status change. Sister states yesterday and today patient has refused to eat and drink, has had very little co

In [49]:
all_splits = text_splitter.split_documents(docs)

In [51]:
all_splits[0:5]

[Document(metadata={'id': '98', 'index': 'tiamd_prod_clinical_notes', 'noteId': '98', 'patientName': None, 'dateOfService': None, 'start_index': 0}, page_content='MRN: Mrn6790 Erin Rajeev, MD Erin Rajeev, MD Physician Neurology Consults The patient can view the shared note after they get an active MyChart account This note has been shared with the patient Signed Date of Service: 11/05/2025 11:58 AM Consult Orders Signed 43912 Wood Ave, Sui 102 field Tkk, IP 4465 Phone #: 124-955-2345 | Fax #: 248-928-9088 Pavithra, MD MPH Tay, MD Rama, MD Isma DO Alen, DO Neurology consult Name: Ann Mathew DOB: 8/23/1955 Age: 70 y.o. Sex: female MRN: Mrn6790 CSN: 1000520864654 Admit date: 11/05/2025 Attending: Erin Rajeev, MD Date of consult:11/05/2025 CCD: Reason for Consult: Subjective HPI 70 y.o. female with a PMH of schizophrenia, CKD presents to the emergency room brought in with her sister for acute mental status change. Sister states yesterday and today patient has refused to eat and drink, has 

In [52]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

['c94c936a-26f7-4ede-aea3-2bfb26c716c0', '11beb047-dc60-4406-9ef3-5e96490bb903', 'd447bb77-32ea-45f8-8293-58e00fb6e04e']


In [67]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=7)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

@tool(description="Retrieve the mappings for a specified Elasticsearch index")
def get_index_mappings(index: str, runtime: ToolRuntime = None) -> dict:
    mappings = es.indices.get_mapping(index=index)
    return mappings

@tool(description="Execute an Elasticsearch DSL query and return the results")
def run_es_query(index: str, dsl_query: str, runtime: ToolRuntime = None) -> dict:
    query_dict = json.loads(dsl_query)

    # Replace 'body' with individual parameters
    query_params = {}
    if "query" in query_dict:
        query_params["query"] = query_dict["query"]
    if "aggs" in query_dict:
        query_params["aggs"] = query_dict["aggs"]
    if "size" in query_dict:
        query_params["size"] = query_dict["size"]

    response = es.search(index=index, **query_params)
    return {"dsl": query_dict, "results": response}


In [76]:
from langchain.agents import create_agent


tools = [retrieve_context, get_index_mappings, run_es_query]
# If desired, specify custom instructions
prompt = (
    """ 
You are a clinical data assistant.

You have THREE tools:

1. retrieve_context
   - Use this ONLY to understand, summarize, or explain clinical notes
   - Example: patient history, consultation summary, medications

2. get_index_mappings
   - Use this BEFORE writing any Elasticsearch query

3. run_es_query
   - Use this ONLY for counts, filters, or aggregations

---------------------------------
## TOOL SELECTION RULES

If the question is about a specific patient or clinical text
  - Use retrieve_context
  - Do NOT use Elasticsearch aggregations

If the question asks for numbers, counts, totals, or statistics
  - Do NOT use retrieve_context
  - First call get_index_mappings
  - Then call run_es_query

---------------------------------
## ELASTICSEARCH RULES

• Query ONLY this index: tiamd_prod_processed_notes_json
• Use cardinality for "how many" or "unique"
• Use keyword fields for aggregations
• Use size = 0 for aggregation-only queries
• Do NOT guess field names

---------------------------------
Choose the correct tool. Using the wrong tool is an error.


"""
)
agent = create_agent(model, tools, system_prompt=prompt, checkpointer=InMemorySaver())

In [81]:
result = agent.invoke(
    {"messages": [{"role": "user", "content": "Number of patients under doctor madhavan?"}]},
    {"configurable": {"thread_id": "1"}}
)

In [83]:
print(result['messages'][-1].content)

**Number of patients under Dr. Madhavan: 10**

This is based on finding 27 clinical notes where Dr. Madhavan appears as an attending physician, consulting specialist, or note author, representing 10 unique patients (identified by unique MRNs).


In [85]:
result = agent.invoke(
    {"messages": [{"role": "user", "content": "List the patients in the data"}]},
    {"configurable": {"thread_id": "1"}}
)

In [86]:
print(result['messages'][-1].content)

## **All Patients in the Dataset (12 unique patients):**

1. **Frank P Barazsu**
   - MRN: 101972538
   - Age: 74 years
   - Sex: Male
   - Number of Notes: 4

2. **Debra Garrison**
   - MRN: 101548585
   - Age: 70 years
   - Sex: Female
   - Number of Notes: 3

3. **Michele Hedy Schlaud**
   - MRN: 101587502
   - Age: 74 years
   - Sex: Female
   - Number of Notes: 3

4. **Wilma Nicholson**
   - MRN: 101697051
   - Age: 88 years
   - Sex: Female
   - Number of Notes: 3

5. **William C Robinson**
   - MRN: 101793660
   - Age: 65 years
   - Sex: Male
   - Number of Notes: 3

6. **Darleen M Sutherland**
   - MRN: 101891023
   - Age: 87 years
   - Sex: Female
   - Number of Notes: 3

7. **Judith Masterson**
   - MRN: 102025939
   - Age: 66 years
   - Sex: Female
   - Number of Notes: 3

8. **Marvin D Silver**
   - MRN: 20319274
   - Age: 88 years
   - Sex: Male
   - Number of Notes: 3

9. **Doris L Pippen**
   - MRN: 101700652
   - Age: 79 years
   - Sex: Female
   - Number of Notes: 2

1

In [None]:
result = agent.invoke(
    {"messages": [{"role": "user", "content": "What are the medications for the patient Doris L Pippen"}]},
    {"configurable": {"thread_id": "1"}}
)

In [80]:
print(result['messages'][-1].content)

Based on the clinical notes for **Doris L Pippen** (MRN: 101700652, DOB: 12/3/1945), here are her medications:

## **Medications Prior to Admission:**
- **amLODIPine (NORVASC)** 2.5 mg tablet - once daily
- **losartan-hydroCHLOROthiazide (HYZAAR)** 100-12.5 mg tablet - once daily
- **rosuvastatin (CRESTOR)** 20 mg tablet - once daily

## **Current Outpatient Medications** (from another note):
- **amLODIPine (NORVASC)** 5 mg tablet
- **atorvastatin (LIPITOR)** 40 mg tablet - once daily for hypercholesterolemia
- **clopidogrel (PLAVIX)** 75 mg tablet - once daily
- **cyanocobalamin** 1,000 mcg tablet (Vitamin B-12)
- **donepezil (ARICEPT)** 10 mg tablet - nightly
- **ipratropium (ATROVENT)** 0.06% nasal spray - 2 sprays twice daily
- **latanoprost (XALATAN)** 0.005% ophthalmic solution
- **loratadine (CLARITIN)** 10 mg tablet - as needed
- **losartan (COZAAR)** 100 mg tablet - once daily for hypertension
- **pantoprazole (PROTONIX)** 40 mg

The medication records show some variations bet

In [74]:
result = agent.invoke(
    {"messages": [{"role": "user", "content": "What was my last questions"}]},
    {"configurable": {"thread_id": "1"}}
)

In [75]:
result

{'messages': [HumanMessage(content='Number of patients in this dataset?', additional_kwargs={}, response_metadata={}, id='4c15a4d2-e292-4864-baac-2395a8a01aa9'),
  AIMessage(content="I need to find the number of unique patients in the dataset. This is a count/aggregation question, so I'll first get the index mappings to identify the correct field for patient identification, then run an Elasticsearch query.", additional_kwargs={'usage': {'prompt_tokens': 1315, 'completion_tokens': 112, 'cache_read_input_tokens': 0, 'cache_write_input_tokens': 0, 'total_tokens': 1427}, 'stop_reason': 'tool_use', 'thinking': {}, 'model_id': 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', 'model_name': 'us.anthropic.claude-sonnet-4-5-20250929-v1:0'}, response_metadata={'usage': {'prompt_tokens': 1315, 'completion_tokens': 112, 'cache_read_input_tokens': 0, 'cache_write_input_tokens': 0, 'total_tokens': 1427}, 'stop_reason': 'tool_use', 'thinking': {}, 'model_id': 'us.anthropic.claude-sonnet-4-5-20250929-v1

In [56]:
query = "What information is available in these clinical notes?"

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


What information is available in these clinical notes?

To answer your question about what information is available in the clinical notes, I'll need to use the retrieval tool to get an overview of the content. Let me do that for you.
Tool Calls:
  retrieve_context (toolu_bdrk_012A9VHjuWkFZgeTF95bNUZn)
 Call ID: toolu_bdrk_012A9VHjuWkFZgeTF95bNUZn
  Args:
    query: What information is available in these clinical notes? Give an overview of the content.
Name: retrieve_context

Source: {'id': '104', 'index': 'tiamd_prod_clinical_notes', 'noteId': '104', 'patientName': None, 'dateOfService': None, 'start_index': 3187}
Content: &nbsp; ALT 16 10/03/2025 &nbsp; AST 34 10/03/2025 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Lab Results Component Value Date &nbsp; CHOL 189 10/05/2025 &nbsp; TRIG 61 10/05/2025 &nbsp; HDL 57 10/05/2025 &nbsp; LDLCALC 120 10/05/2025 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; Lab Results Component Value Date &nbsp; VITAMINB12 1,414 (H) 07/08/2025 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 

In [57]:
query = "Number of patients in this dataset?"

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Number of patients in this dataset?

To answer your question about the number of patients in the dataset, I'll need to retrieve some context information. Let me use the retrieval tool to get this information for you.
Tool Calls:
  retrieve_context (toolu_bdrk_013d63UArvRPTc8vSdgetxQz)
 Call ID: toolu_bdrk_013d63UArvRPTc8vSdgetxQz
  Args:
    query: Number of patients in the dataset
Name: retrieve_context

Source: {'id': '109', 'index': 'tiamd_prod_clinical_notes', 'noteId': '109', 'patientName': None, 'dateOfService': None, 'start_index': 4790}
Content: ROS is negative except for symptoms mentioned in HPI &nbsp; Past Medical History: No date: Abdominal pain No date: Acid reflux No date: Altered level of consciousness Comment: in past; pt does have short term memory problems per daughter No date: Aneurysm (CMS/HCC V24) No date: Anxiety No date: Arthritis No date: Cerebral aneurysm No date: Colitis No date: Constipation No date: CVA (cerebral vascular accident) (CMS/HCC V24, CMS/HCC V28

In [58]:
query = "Number of patients under doctor madhavan?"

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Number of patients under doctor madhavan?

To answer this question, we need to retrieve information about patients under Dr. Madhavan's care. Let's use the retrieve_context function to get this information.
Tool Calls:
  retrieve_context (toolu_bdrk_014P7pvY89iTxSjDWriZWw4F)
 Call ID: toolu_bdrk_014P7pvY89iTxSjDWriZWw4F
  Args:
    query: Number of patients under doctor Madhavan
Name: retrieve_context

Source: {'id': '102', 'index': 'tiamd_prod_clinical_notes', 'noteId': '102', 'patientName': None, 'dateOfService': None, 'start_index': 1}
Content: Barazsu, Frank P MRN:&nbsp;101972538 &nbsp; Ramesh Madhavan, MD Physician Neurology &nbsp; Consults&nbsp;&nbsp;&nbsp; Signed &nbsp; Date of Service:&nbsp;10/5/2025 &nbsp;7:44 AM &nbsp; &nbsp; &nbsp; Signed &nbsp; 43902 Woodward Ave, Suite 100 Bloomfield Twp, MI 48302 Phone #: 248-955-9949 | Fax #: 248-928-2274 Pratik Bhattacharya, MD MPH Taylor Graham, MD Ramesh Madhavan, MD Ismail Rahal DO Alex Tobar, DO &nbsp; Neurology consult &nbsp; Name

In [60]:
query = "Give the soap notes for the patient Frank P Barazsu"

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Give the soap notes for the patient Frank P Barazsu

To provide you with the SOAP notes for the patient Frank P Barazsu, I'll need to retrieve that information from the clinical notes. Let me use the available tool to fetch this data for you.
Tool Calls:
  retrieve_context (toolu_bdrk_01K9qTYRKBHwt7T4jbGN643f)
 Call ID: toolu_bdrk_01K9qTYRKBHwt7T4jbGN643f
  Args:
    query: SOAP notes for Frank P Barazsu
Name: retrieve_context

Source: {'id': '102', 'index': 'tiamd_prod_clinical_notes', 'noteId': '102', 'patientName': None, 'dateOfService': None, 'start_index': 1}
Content: Barazsu, Frank P MRN:&nbsp;101972538 &nbsp; Ramesh Madhavan, MD Physician Neurology &nbsp; Consults&nbsp;&nbsp;&nbsp; Signed &nbsp; Date of Service:&nbsp;10/5/2025 &nbsp;7:44 AM &nbsp; &nbsp; &nbsp; Signed &nbsp; 43902 Woodward Ave, Suite 100 Bloomfield Twp, MI 48302 Phone #: 248-955-9949 | Fax #: 248-928-2274 Pratik Bhattacharya, MD MPH Taylor Graham, MD Ramesh Madhavan, MD Ismail Rahal DO Alex Tobar, DO &nbsp; Neu