In [14]:
from pathlib import Path
import os
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

load_dotenv()
# os.environ["OPENAI_API_KEY"]

True

In [21]:
def load_document(file_path: Path | str) -> str:
    with open(file_path, 'r') as file:
        text = file.read()
    return text

In [3]:
file_path = "the_fellowship_of_the_ring.txt"
document_text = load_document(file_path)

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_text(document_text)
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 1454


In [23]:
chunks[69]

'the copies and abstracts. But many copies contain the true account (as an alternative), derived no doubt from notes by Frodo or Samwise, both of whom learned the truth, though they seem to have been unwilling to delete anything actually written by the old hobbit himself.'

In [9]:
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_texts(chunks, embeddings)

In [24]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [25]:
query = "How old is Bilbo Bagins?"

In [26]:
retrieved_docs = retriever.invoke(query)

print("\nRetrieved Documents:")
for i, doc in enumerate(retrieved_docs):
    print(f"\n--- Document {i+1} ---")
    print(doc.page_content + "..." if len(doc.page_content) > 200 else doc.page_content)


Retrieved Documents:

--- Document 1 ---
The eldest of these, and Bilbo's favourite, was young Frodo Baggins. When Bilbo was ninety-nine, he adopted Frodo as his heir, and brought him to live at Bag End; and the hopes of the Sackville-Bagginses were finally dashed. Bilbo and Frodo happened to have the same birthday, September 22nd. 'You had better come and live here, Frodo my lad,' said Bilbo one day; 'and then we can celebrate our birthday-parties comfortably together.' At that time Frodo was still in his _tweens,_ as the hobbits called the irresponsible twenties between childhood and coming of age at thirty-three....

--- Document 2 ---
Twelve more years passed. Each year the Bagginses had given very lively combined birthday-parties at Bag End; but now it was understood that something quite exceptional was being planned for that autumn. Bilbo was going to be _eleventy-one,_ 111, a rather curious number and a very respectable age for a hobbit (the Old Took himself had only reached 13

In [27]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# 5. Create custom prompt template
template = """
Use the following context to answer the question. If you don't know the answer based on the context, just say you don't know.

Context: {context}

Question: {question}

Answer:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

# 6. Create QA chain
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)


result = qa_chain.run(query)

In [28]:
result

'Bilbo Baggins is 111 years old.'