In [1]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

DATA_PATH = r"pdf"
loader = PyPDFDirectoryLoader(DATA_PATH)
docs = loader.load()

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks=text_splitter.split_documents(docs)

In [3]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

db=Chroma.from_documents(chunks, OllamaEmbeddings(model="nomic-embed-text"))

# from langchain_huggingface import HuggingFaceEmbeddings
# db=Chroma.from_documents(chunks, HuggingFaceEmbeddings(model="NovaSearch/stella_en_1.5B_v5"))

In [4]:
from langchain_ollama import OllamaLLM

llm=OllamaLLM(model="qwen3:14b")

In [5]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer.
Answer in a concise manner.
Do not use any formatting in your response.
<context>
{context}
</context>
Question: {input}
/no_think""")

In [6]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

In [7]:
from langchain.chains import create_retrieval_chain

retriever=db.as_retriever()
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [8]:
response=retrieval_chain.invoke({"input":"What are the job openings?"})
response["answer"].removeprefix("<think>\n\n</think>\n\n")

'The job openings are for a Data Scientist, a UX Designer, and a Software Developer.'