In [None]:
import os, certifi
from dotenv import load_dotenv


In [None]:
import sys
print(sys.executable)

In [None]:
# Load environment variables
load_dotenv()
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["SSL_CERT_FILE"] = certifi.where()
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
FILE_PATH = os.environ.get("FILE_PATH")
FILE_PATH

In [None]:
# ---- 1. Load LLM + Embeddings (Ollama) ----
from langchain_community.llms import Ollama
from langchain_ollama import OllamaEmbeddings

In [None]:
llm = Ollama(model="gemma3:4b")   # Text generation
embeddings_model = OllamaEmbeddings(model="nomic-embed-text")

In [None]:
# ---- 2. Load PDF ----
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
file_path = FILE_PATH

In [None]:
loader = PyPDFLoader(file_path)
docs = loader.load()

In [None]:
print("Sample text from PDF:")
print(docs[0].page_content[:200])   # Preview first 200 chars
print(docs[0].metadata)

In [None]:
# ---- 3. Split text into chunks ----
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=50,
)
final_documents = text_splitter.split_documents(docs)


In [None]:
# ---- 4. Create Vectorstore ----
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(final_documents, embeddings_model)


In [None]:
# ---- 5. Create Retriever ----
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [None]:
# ---- 6. Create Prompt Chain ----
from langchain.chains import RetrievalQA

In [None]:
prompt = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",   # "stuff" = direct injection, try "map_reduce" if PDF is big
    return_source_documents=True
)

In [None]:
# ---- 7. Ask Questions ----
#query = "What was Shivaji's role in promoting the navy?"
#query = "Who is Shivaji?"
#query ="Shivaji's birthday"
#query = "who is Shivaji great grandfather?"

query = "Shivaji's birthday"
# prompt = RetrievalQA.from_chain_type(
answer = prompt.invoke({"query": query})


In [None]:
print("\nAnswer:")
print(answer["result"])

# print("\nSources:")
# for i, doc in enumerate(answer["source_documents"], 1):
#     print(f"Source {i}: {doc.metadata} -> {doc.page_content[:100]}...")