Ingesting PDF

In [1]:
!pip install langchain




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

In [3]:
local_path = ["1.pdf", "2.pdf", "3.pdf", "4.pdf", "5.pdf"]
all_docs = []

if local_path:
    for path in local_path:
        loader = UnstructuredPDFLoader(file_path=path)
        data = loader.load()
        all_docs.extend(data)

    print(f"Total documents loaded: {len(all_docs)}")
else:
    print("PDF file not uploaded")

Total documents loaded: 5


In [4]:
#all_docs[0].page_content

Vector Embeddings

In [5]:
!ollama pull mxbai-embed-large

[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ´ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â § [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‡ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â � [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 819c2adf5ce6: 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 669 MB                         [K
pulling c71d239df917: 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ

In [6]:
!ollama list

NAME                        ID              SIZE      MODIFIED               
mxbai-embed-large:latest    468836162de7    669 MB    Less than a second ago    
llama3.2:latest             a80c4f17acd5    2.0 GB    2 weeks ago               


In [7]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [8]:
#Split and chunk
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 7500, chunk_overlap = 100)
chunks = text_splitter.split_documents(all_docs)

In [9]:
#Add to vector database
vector_db = Chroma.from_documents(
    documents = chunks,
    embedding = OllamaEmbeddings(model = "mxbai-embed-large", show_progress = True),
    collection_name = "local-rag"
)

  embedding = OllamaEmbeddings(model = "mxbai-embed-large", show_progress = True),
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████| 110/110 [04:13<00:00,  2.31s/it]


Retrieval

In [10]:
!pip install -U langchain-ollama




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [12]:
#LLM from Ollama
local_model = "llama3.2"
llm = ChatOllama(model=local_model)

In [32]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI assistant who only answers based on the given context. You are supposed to identify the validity of an insurance policy given these conditions, give answer in no more than 100 characters: 
    Original question: {question}"""
)

In [54]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

#RAG prompt
template = """
You are an insurance assistant. Based only on the context below, answer the user's question in **exactly one short sentence**, and respond strictly in this JSON format:

{{
  "answer": "<your short answer here>"
}}

Context:
{context}

Question: {question}
"""


prompt = ChatPromptTemplate.from_template(template)

In [55]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [56]:
retriever

MultiQueryRetriever(retriever=VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000002564D8416A0>, search_kwargs={}), llm_chain=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI assistant who only answers based on the given context. You are supposed to identify the validity of an insurance policy given these conditions, give answer in no more than 100 characters: \n    Original question: {question}')
| ChatOllama(model='llama3.2')
| LineListOutputParser())

In [73]:
prompt = "46M, knee surgery, Pune, 3-month policy"

In [86]:
back_prompt = "Output only one line, which starts with yes or no, which is the verdict. Explain your reasoning"

In [87]:
chain.invoke(prompt + back_prompt)

OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.13s/it]


'Yes. The policy mentions a rider for knee surgery, and M has undergone knee surgery within the coverage period of 3 months, so it is likely that the policy will be paid out.'