# Components

In [47]:
import os
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_chroma import Chroma

gpt4all_embd = GPT4AllEmbeddings()

vector_store = Chroma(
    collection_name="mmb",
    embedding_function=gpt4all_embd,
    # persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [48]:
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("LLAMA_API_KEY"),
    base_url="https://api.llama-api.com/"
)

def ask_gpt(prompt) -> str:
    response = client.chat.completions.create(
        model="llama3.2-3b",
        # model="llama3.3-70b",
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        max_tokens=512,
        # stream=False
    )
    return response.choices[0].message.content

In [49]:
template = '''
You are an assistant specialized in answering surgical tasks.
Use the retrieved context below to provide a concise, clinically accurate answer to the question.
Clearly reference relevant guidelines or sources from the context where possible.
If you cannot answer based on the provided information, say you don't know.
Keep the answer clear and concise, but provide enough detail to be medically useful.
Record: {record}
Task: {task}
Guideline: {guideline}
Answer:
'''

task1 = "Identify laboratory results outside of normal reference ranges"

task2 = "Identify unavailable preoperative tests"

task3 = "Surgical recommendation"

task4 = "Prepare sample operative notes."

record = '''
Patient Name: Mr. John Smith  
Age: 65 years  
Gender: Male  
Medical History:
- Type 2 Diabetes Mellitus  
- Hypertension  
- Known multivessel coronary artery disease with involvement of the left anterior descending artery

Presenting Clinical Data:  
- Laboratory Results: 
  - Hemoglobin: 9.0 g/dL
- Preoperative Evaluation:
  - Incomplete assessment
'''

In [50]:
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain.document_loaders import PyPDFLoader
from tqdm import tqdm

# Define state for application
class State(TypedDict):
    sauce: str #
    task: str #
    record: str #
    guideline: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    # Load and chunk contents of the pdf
    loader = PyPDFLoader(state["sauce"])
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    all_splits = text_splitter.split_documents(docs)

    # Index chunks
    _ = vector_store.add_documents(documents=all_splits)

    retrieved_docs = vector_store.similarity_search(state["task"])
    return {"guideline": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["guideline"])
    messages = template.format(record=state["record"], task=state["task"], guideline=state["guideline"])
    response = ask_gpt(messages)
    return {"answer": response}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [51]:
params = {"sauce": "task1.pdf", "task": task1, "record": record}
response = graph.invoke(params)
print(response["answer"])

Based on the provided context, I don't know which laboratory results are outside of normal reference ranges for Mr. John Smith, as the context does not provide the normal reference ranges for the patient's laboratory results.
