# Components

In [13]:
import os
from langchain_ollama import OllamaEmbeddings
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
import faiss

embeddings = OllamaEmbeddings(model="mxbai-embed-large")

d = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(d)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [14]:
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

sys = """
Please answer the question based on the provided information and references.
There are 4 options for the answer, strictly choose one of them and say nothing else.
Example output: "A"
"""

def ask_gpt(prompt: str, model="gpt-4.1-nano") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system", 
                "content": sys
            },
            {
                "role": "user",
                "content": prompt,
            }
        ]
    )
    return response.choices[0].message.content

In [15]:
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

def load_high(datapath):
    all_content = ""  # Initialize an empty string to hold all the content
    with open(datapath, 'r', encoding='utf-8') as file:
        for line in file:
            all_content += line.strip() + "\n"  # Append each line to the string, add newline character if needed
    return Document(page_content=all_content, metadata={"source": datapath})

# Path to the saved vector store
faiss_index_path = "/home/ngjabach/Documents/NgJaBach/Medical-Graph-RAG/faiss_index"
if os.path.exists(faiss_index_path):
    vector_store = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)
else:
    documents = []
    data_path = "./books_medqa"
    files = [file for file in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, file))]
    for i, file_name in enumerate(files):
        print(f"Processing Book {i + 1}: {file_name}")
        file_path = os.path.join(data_path, file_name)
        documents.append(load_high(file_path))
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=400)
    all_splits = text_splitter.split_documents(documents)
    print("Begin vector storing...")
    _ = vector_store.add_documents(documents=all_splits)
    print("Done vector storing!")

    vector_store.save_local(faiss_index_path)

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

Processing Book 1: Physiology_Levy.txt
Processing Book 2: Biochemistry_Lippincott.txt
Processing Book 3: Pathoma_Husain.txt
Processing Book 4: Cell_Biology_Alberts.txt
Processing Book 5: Anatomy_Gray.txt
Processing Book 6: InternalMed_Harrison.txt
Processing Book 7: Gynecology_Novak.txt
Processing Book 8: Pharmacology_Katzung.txt
Processing Book 9: First_Aid_Step1.txt
Processing Book 10: Histology_Ross.txt
Processing Book 11: Psichiatry_DSM-5.txt
Processing Book 12: Pathology_Robbins.txt
Processing Book 13: Neurology_Adams.txt
Processing Book 14: Surgery_Schwartz.txt
Processing Book 15: Immunology_Janeway.txt
Processing Book 16: First_Aid_Step2.txt
Processing Book 17: Obstentrics_Williams.txt
Processing Book 18: Pediatrics_Nelson.txt
Begin vector storing...
Done vector storing!


In [16]:
testing = """
---Start of Context---
{Context}
---End of Context---

Question: A junior orthopaedic surgery resident is completing a carpal tunnel repair with the department chairman as the attending physician. During the case, the resident inadvertently cuts a flexor tendon. The tendon is repaired without complication. The attending tells the resident that the patient will do fine, and there is no need to report this minor complication that will not harm the patient, as he does not want to make the patient worry unnecessarily. He tells the resident to leave this complication out of the operative report. 
Which of the following is the correct next action for the resident to take?
A. Disclose the error to the patient and put it in the operative report
B. Tell the attending that he cannot fail to disclose this mistake
C. Report the physician to the ethics committee
D. Refuse to dictate the operative report
"""

In [17]:
class State(TypedDict):
    context: List[Document]
    question: str

# Define application steps
def retrieve(state: State):
    print("Begin retriving!")
    docs = retriever.invoke(state["question"])
    print("Done retrieving!")
    return {"context": docs}

def generate(state: State):
    print("Begin generating!")
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = testing.format(context = docs_content)
    response = ask_gpt(messages)
    print("Done generating!")
    return {"answer": response}

print("Compiling...")
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()
print("Done compiling!")

Compiling...
Done compiling!


In [18]:
response = graph.invoke({"question": testing})
print(response["answer"])

Begin retriving!
Done retrieving!
Begin generating!


KeyError: 'Context'