# Components

In [None]:
import os
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_chroma import Chroma

gpt4all_embd = GPT4AllEmbeddings()

vector_store = Chroma(
    collection_name="mmb",
    embedding_function=gpt4all_embd,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [5]:
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("LLAMA_API_KEY"),
    base_url="https://api.llama-api.com/"
)

def ask_gpt(prompt) -> str:
    response = client.chat.completions.create(
        model="llama3.2-3b",
        # model="llama3.3-70b",
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        max_tokens=512,
        # stream=False
    )
    return response.choices[0].message.content

template = '''
You are an assistant specialized in answering surgical tasks.
Use the retrieved context below to provide a concise, clinically accurate answer to the task.
Record: {record}
Context: {context}
Task: {task}
Answer:
'''

task1 = "Identify laboratory results outside of normal reference ranges"

task2 = "Identify unavailable preoperative tests"

task3 = "Surgical recommendation"

task4 = "Prepare sample operative notes."

record = '''
Patient Name: Mr. John Smith  
Age: 65 years  
Gender: Male  
Medical History:
- Type 2 Diabetes Mellitus  
- Hypertension  
- Known multivessel coronary artery disease with involvement of the left anterior descending artery

Presenting Clinical Data:  
- Laboratory Results: 
  - Hemoglobin: 9.0 g/dL
- Preoperative Evaluation:
  - Incomplete assessment
'''

In [None]:
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain.document_loaders import PyPDFLoader

def chunking(sauce: str, chunk_size, chunk_overlap) -> str:
    print("Begin chunking...")
    loader = PyPDFLoader(sauce)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    print("Done!")
    return text_splitter.split_documents(docs)

all_splits = []
all_splits = all_splits + chunking("ExternalDoc\\labval.pdf", 200, 50)
all_splits = all_splits + chunking("ExternalDoc\\aortic.pdf", 1000, 200)
all_splits = all_splits + chunking("ExternalDoc\\coronary.pdf", 1000, 200)
all_splits = all_splits + chunking("ExternalDoc\\valvular.pdf", 1000, 200)

print("Begin vector storing...")
_ = vector_store.add_documents(documents=all_splits)
print("Done vector storing!") #

Begin chunking...
Done!
Begin chunking...
Done!
Begin chunking...
Done!
Begin chunking...
Done!


In [None]:
# Define state for application
class State(TypedDict):
    task: str #
    record: str #
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    print("Begin retriving!")
    retrieved_docs = vector_store.similarity_search(state["task"])
    print("Done retrieving!")
    return {"context": retrieved_docs}

def generate(state: State):
    
    print("Begin generating!")

    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = template.format(record = state["record"], task = state["task"], context = docs_content)

    with open("output/prompt.txt", "w", encoding="utf-8") as file:
        file.write(messages)
        
    response = ask_gpt(messages)
    
    print("Done generating!")

    return {"answer": response}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
params = {"task": task1, "record": record}
response = graph.invoke(params)

with open("output/response.txt", "w", encoding="utf-8") as file:
    file.write(response["answer"])

Begin retriving!
page_content='Circulation
e18 January 18, 2022 Circulation. 2022;145:e18–e114. DOI: 10.1161/CIR.0000000000001038
Circulation is available at www.ahajournals.org/journal/circ
*Writing committee members are required to recuse themselves from voting on sections to which their specific relationships with industry may apply; see Appendix 1 
for detailed information. †ACC/AHA Representative. ‡ACC/AHA Joint Committee on Clinical Practice Guidelines Liaison. §ACC/AHA Task Force on Data Standards 
Representative. ‖SCAI Representative.
ACC/AHA Joint Committee on Clinical Practice Guidelines Members, see page e80.
The American Heart Association requests that this document be cited as follows: Lawton JS, Tamis-Holland JE, Bangalore S, Bates ER, Beckie TM, Bischoff JM, Bittl 
JA, Cohen MG, DiMaio JM, Don CW, Fremes SE, Gaudino MF, Goldberger ZD, Grant MC, Jaswal JB, Kurlansky PA, Mehran R, Metkus TS Jr, Nnacheta LC, Rao SV, 
Sabik JF, Sellke FW, Sharma G, Yong CM, Zwischenberger BA

KeyboardInterrupt: 