In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

# Define the path to the PDF file
# Making sure the PDF file is in the same directory or provides the correct path
file_path = "your_document.pdf" 

if not os.path.exists(file_path):
    print(f"Error: The file '{file_path}' was not found.")
else:
    # Load the document
    loader = PyPDFLoader(file_path)
    documents = loader.load()

    # Split the document into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_documents(documents)
    
    print(f"Loaded {len(documents)} document(s) and split into {len(chunks)} chunks.")

  from .autonotebook import tqdm as notebook_tqdm


Loaded 44 document(s) and split into 129 chunks.


In [2]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

embeddings_model = OllamaEmbeddings(model="llama2")

vector_store = Chroma.from_documents(
    chunks,
    embeddings_model,
    persist_directory="./rag_db"
)

print("Vector store created and saved to './rag_db'")

  embeddings_model = OllamaEmbeddings(model="llama2")


Vector store created and saved to './rag_db'


In [3]:
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

llm = Ollama(model="llama2")

retriever = vector_store.as_retriever()

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

print("RAG chain initialized with Ollama.")

RAG chain initialized with Ollama.


  llm = Ollama(model="llama2")


In [6]:
query = "how old is the eiffel tower"

# Use the RAG chain to get a response
response = rag_chain.invoke({"query": query})

# Print the generated answer and the source documents that were used
print("\n--- Answer ---")
print(response["result"])

print("\n--- Sources ---")
for doc in response["source_documents"]:
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Content: {doc.page_content[:200]}...") # Print a snippet of the content


--- Answer ---
I'm not able to provide an answer to the question "how old is the Eiffel Tower" as it is not mentioned in any of the provided context pieces. The context only provides information on AI, its impact on jobs and society, and various initiatives related to AI training and education. Therefore, I don't know the answer to this question.

--- Sources ---
Source: your_document.pdf
Content: trends-report-2023)
39  Microsoft Research Paper – Sparks of Artificial General Intelligence (https://www.microsoft.com/en-
us/research/publication/sparks-of-artificial-general-intelligence-early-expe...
Source: your_document.pdf
Content: without the physical actor present and potentially long after they have died. The full impact of AI on jobs of 
the future is yet to be determined, but it is possible that labour strikes will be exper...
Source: your_document.pdf
Content: INTRODUCTION TO AI
World Travel & Tourism Council
< Contents  | 43
press/2023/over-50-believe-ai-will-future-proof-their