In [1]:
from langchain_community.document_loaders import DirectoryLoader

from dotenv import load_dotenv

load_dotenv()

loader = DirectoryLoader('data', glob="**/*.txt")

documents = loader.load()

In [2]:
print(documents)
print(documents[0].page_content)

[Document(page_content='Q: What makes our pizza unique? A: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.\n\nQ: Do we offer gluten-free pizza options? A: Yes, we offer a delicious gluten-free crust option for our guests with dietary restrictions or preferences.\n\nQ: Can customers create their own pizza? A: Absolutely! Customers can choose from a variety of fresh toppings to create their own unique pizza masterpiece.\n\nQ: What are our most popular pizzas? A: Our most popular pizzas include the Classic Margherita, Pepperoni Supreme, and the Gourmet Vegetarian. Each offers a unique blend of flavors that cater to a variety of tastes.\n\nQ: Do we offer vegan pizza options? A: Yes, we have vegan pizza options which include dairy-free cheese and a variety of fresh vegetable toppings.\n\nQ: How long does it take t

In [3]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=250,
    chunk_overlap=10,
    length_function=len,
    is_separator_regex=False,
)

In [4]:
chunks = text_splitter.split_documents(documents)
chunks

Created a chunk of size 262, which is longer than the specified 250


[Document(page_content='Q: What makes our pizza unique? A: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.', metadata={'source': 'data/text.txt'}),
 Document(page_content='Q: Do we offer gluten-free pizza options? A: Yes, we offer a delicious gluten-free crust option for our guests with dietary restrictions or preferences.', metadata={'source': 'data/text.txt'}),
 Document(page_content='Q: Can customers create their own pizza? A: Absolutely! Customers can choose from a variety of fresh toppings to create their own unique pizza masterpiece.', metadata={'source': 'data/text.txt'}),
 Document(page_content='Q: What are our most popular pizzas? A: Our most popular pizzas include the Classic Margherita, Pepperoni Supreme, and the Gourmet Vegetarian. Each offers a unique blend of flavors that cater to a variety of t

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

index = Chroma.from_documents(chunks, OpenAIEmbeddings())

In [6]:
retriever = index.as_retriever()

In [7]:
retriever.get_relevant_documents("How long does it take to prepare a pizza")

[Document(page_content='Q: How long does it take to prepare a pizza? A: On average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.', metadata={'source': 'data/text.txt'}),
 Document(page_content='Q: What makes our pizza unique? A: Our pizzas are made with a secret family recipe for the dough, hand-tossed, and topped with fresh, locally-sourced ingredients. We use a traditional wood-fired oven to give our pizzas a distinctive smoky flavor and crisp crust.', metadata={'source': 'data/text.txt'}),
 Document(page_content='Q: Can customers create their own pizza? A: Absolutely! Customers can choose from a variety of fresh toppings to create their own unique pizza masterpiece.', metadata={'source': 'data/text.txt'}),
 Document(page_content='Q: What are our most popular pizzas? A: Our most popular pizzas include the Classic Margherita, Pepperoni Supreme, and the Gourmet Vegetarian. Each offers a unique

In [8]:
from operator import itemgetter

template = """
Answer the question based only on the following context:
{context}

Answer the following question:
Question: {question}
"""

In [9]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(model_name="gpt-3.5-turbo")

In [10]:
rag_chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
    | prompt
    | model
    | StrOutputParser()
)

In [11]:
rag_chain.invoke({"question": "How long does it take to prepare a pizza"})

'On average, it takes about 15-20 minutes from the time you order until your pizza is ready. This time ensures that each pizza is cooked to perfection.'