In [5]:
import os
import gc # Garbage collection to free up RAM
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# 1. Loading PDF
loader = PyPDFLoader("suzetrigine_study.pdf")
docs = loader.load()

# 2. Splitting into smaller chunks (less RAM usage per chunk)
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = text_splitter.split_documents(docs)

# 3. Using an In-Memory search (instead of ChromaDB files)
# This is faster and avoids disk I/O crashes
embeddings = OllamaEmbeddings(model="phi3")
vectorstore = DocArrayInMemorySearch.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever()

# 4. Using the llama3 Model
model = ChatOllama(model="llama3")

# 5. Defining the Prompt
template = """Answer based ONLY on the context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# 6. Building the Chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": lambda x: x}
    | prompt
    | model
    | StrOutputParser()
)

# 7. Execute and immediately clear memory
try:
    print("--- Local AI Analyst ---")
    response = rag_chain.invoke("What dose of Suzetrigine was administered?")
    print(f"Answer: {response}")
finally:
    gc.collect() # Force Python to clean up memory

--- Local AI Analyst ---
Answer: There is no mention of the dose of Suzetrigine being administered in this context. The information provided only discusses the results and outcomes of the studies, without mentioning the dosage of the medication.
