In [1]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders.directory import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
import os

app_dir = os.path.join(os.getcwd(), "app")
load_dotenv(os.path.join(app_dir, ".env"))

loader = DirectoryLoader("./data", glob="**/*.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=120,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(docs)

embedding_function = OpenAIEmbeddings()
model = ChatOpenAI()

db = Chroma.from_documents(chunks, embedding_function)
retriever = db.as_retriever()

In [2]:
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
import re

query = "Who owns the restaurant?"


QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from a vector
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search.
    Provide these alternative question like this:
    <<question1>>
    <<question2>>
    Only provide the query, no numbering.
    Original question: {question}""",
)


def split_and_clean_text(input_text):
    return [item for item in re.split(r"<<|>>", input_text) if item.strip()]

In [3]:
model = ChatOpenAI()
multiquery_chain = (
    QUERY_PROMPT | model | StrOutputParser() | RunnableLambda(split_and_clean_text)
)

In [4]:
list_of_questions = multiquery_chain.invoke(query)

In [5]:
list_of_questions

['Who is the proprietor of the restaurant?',
 'What individual or entity possesses the restaurant?',
 'To whom does the restaurant belong?',
 'Which person or company has ownership of the restaurant?',
 "Who is responsible for the restaurant's ownership?"]

In [6]:
docs = [retriever.invoke(q) for q in list_of_questions]

In [7]:
docs

[[Document(page_content='Creating Chef Amico’s Restaurant', metadata={'source': 'data\\founder.txt'}),
  Document(page_content='One evening, as the sun cast a golden glow over the city, a renowned food critic, Elena Rossi, stepped into Chef Amico.', metadata={'source': 'data\\restaurant.txt'}),
  Document(page_content="into Chef Amico. Her mission was to uncover the secret behind the restaurant's growing fame. She was greeted by Amico", metadata={'source': 'data\\restaurant.txt'}),
  Document(page_content='and relish life’s simple pleasures. His restaurant was a haven where strangers became friends over plates of arancini', metadata={'source': 'data\\founder.txt'})],
 [Document(page_content='Creating Chef Amico’s Restaurant', metadata={'source': 'data\\founder.txt'}),
  Document(page_content='Philosophy of Hospitality', metadata={'source': 'data\\founder.txt'}),
  Document(page_content='One evening, as the sun cast a golden glow over the city, a renowned food critic, Elena Rossi, stepp

In [8]:
def flatten_and_unique_documents(documents):
    flattened_docs = [doc for sublist in documents for doc in sublist]

    unique_docs = []
    unique_contents = set()
    for doc in flattened_docs:
        if doc.page_content not in unique_contents:
            unique_docs.append(doc)
            unique_contents.add(doc.page_content)

    return unique_docs

In [9]:
flatten_and_unique_documents(documents=docs)

[Document(page_content='Creating Chef Amico’s Restaurant', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='One evening, as the sun cast a golden glow over the city, a renowned food critic, Elena Rossi, stepped into Chef Amico.', metadata={'source': 'data\\restaurant.txt'}),
 Document(page_content="into Chef Amico. Her mission was to uncover the secret behind the restaurant's growing fame. She was greeted by Amico", metadata={'source': 'data\\restaurant.txt'}),
 Document(page_content='and relish life’s simple pleasures. His restaurant was a haven where strangers became friends over plates of arancini', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='Philosophy of Hospitality', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='the restaurant quickly gained fame for its authentic flavors and Amico’s innovative twists on traditional recipes.', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='culmination of his travels

In [18]:
HYDE_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five hypothetical answers to the user's query. These answers should offer diverse perspectives or interpretations, aiding in a comprehensive understanding of the query. Present the hypothetical answers as follows:

    <<Answer considering a specific perspective>>
    <<Answer from a different angle>>
    <<Answer exploring an alternative possibility>>
    <<Answer providing a contrasting viewpoint>>
    <<Answer that includes a unique insight>>

    Note: Present only the hypothetical answers, without numbering (or "-", "1.", "*") and so on, to provide a range of potential interpretations or solutions related to the query.
    Original question: {question}""",
)

In [19]:
hyde_chain = (
    HYDE_PROMPT | model | StrOutputParser() | RunnableLambda(split_and_clean_text)
)

In [20]:
list_of_questions = hyde_chain.invoke("Who is the owner of the restaurant")
list_of_questions

['The owner of the restaurant is usually the person or group who holds the legal rights and responsibilities for the business.',
 'From a different angle, the owner of the restaurant could be a passionate chef who started the business to share their culinary creations with the world.',
 'An alternative possibility is that the restaurant owner is a corporation or investment group that saw potential in the food industry and decided to open a restaurant.',
 'Contrasting viewpoint: The owner of the restaurant might actually be a silent partner who provides financial backing but has no involvement in the day-to-day operations.',
 'A unique insight could be that the owner of the restaurant is a local community member who wanted to create a gathering place for people to enjoy good food and company.']

In [21]:
docs = [retriever.invoke(q) for q in list_of_questions]
flatten_and_unique_documents(documents=docs)

[Document(page_content='Creating Chef Amico’s Restaurant', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='One evening, as the sun cast a golden glow over the city, a renowned food critic, Elena Rossi, stepped into Chef Amico.', metadata={'source': 'data\\restaurant.txt'}),
 Document(page_content='Philosophy of Hospitality', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='young chefs, shares his knowledge at culinary workshops, and supports local farmers and producers.', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='of his passion for cooking.', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='with the rhythmic chopping of fresh herbs and the sizzling of rich tomato sauces, laid the foundation of his passion', metadata={'source': 'data\\founder.txt'}),
 Document(page_content='the restaurant quickly gained fame for its authentic flavors and Amico’s innovative twists on traditional recipes.', metadata={'source':