In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

# Define the path to the PDF file
# Making sure the PDF file is in the same directory or provides the correct path
file_path = "your_document.pdf" 

if not os.path.exists(file_path):
    print(f"Error: The file '{file_path}' was not found.")
else:
    # Load the document
    loader = PyPDFLoader(file_path)
    documents = loader.load()

    # Split the document into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_documents(documents)
    
    print(f"Loaded {len(documents)} document(s) and split into {len(chunks)} chunks.")

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

embeddings_model = OllamaEmbeddings(model="llama2")

vector_store = Chroma.from_documents(
    chunks,
    embeddings_model,
    persist_directory="./rag_db"
)

print("Vector store created and saved to './rag_db'")

In [None]:
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

llm = Ollama(model="llama2")

retriever = vector_store.as_retriever()

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

print("RAG chain initialized with Ollama.")

In [None]:
query = "Enter your query here! E.g Compile the most important talking points in the document"

# Use the RAG chain to get a response
response = rag_chain.invoke({"query": query})

# Print the generated answer and the source documents that were used
print("\n--- Answer ---")
print(response["result"])

print("\n--- Sources ---")
for doc in response["source_documents"]:
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Content: {doc.page_content[:200]}...") # Print a snippet of the content