In [None]:
from langchain_community.document_loaders import DirectoryLoader

from dotenv import load_dotenv

load_dotenv()

loader = DirectoryLoader('data', glob="**/*.txt")

documents = loader.load()

In [None]:
print(documents)
print(documents[0].page_content)

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=250,
    chunk_overlap=10,
    length_function=len,
    is_separator_regex=False,
)

In [None]:
chunks = text_splitter.split_documents(documents)
chunks

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

index = Chroma.from_documents(chunks, OpenAIEmbeddings())

In [None]:
retriever = index.as_retriever()

In [None]:
retriever.get_relevant_documents("How long does it take to prepare a pizza")

In [None]:
from operator import itemgetter

template = """
Answer the question based only on the following context:
{context}

Answer the following question:
Question: {question}
"""

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(model_name="gpt-3.5-turbo")

In [None]:
rag_chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
rag_chain.invoke({"question": "How long does it take to prepare a pizza"})