In [6]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
import os

app_dir = os.path.join(os.getcwd(), "app")
load_dotenv(os.path.join(app_dir, ".env"))

loader = DirectoryLoader("./data", glob="**/*.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=120,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)
chunks = text_splitter.split_documents(docs)

embedding_function = OpenAIEmbeddings()
model = ChatOpenAI()

db = Chroma.from_documents(docs, embedding_function)
retriever = db.as_retriever()

In [7]:
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
import re

query = "Who owns the restaurant?"


QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from a vector
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search.
    Provide these alternative question like this:
    <<question1>>
    <<question2>>
    Only provide the query, no numbering.
    Original question: {question}""",
)


def split_and_clean_text(input_text):
    return [item for item in re.split(r"<<|>>", input_text) if item.strip()]

In [8]:
model = ChatOpenAI()
rephrase_chain = (
    QUERY_PROMPT | model | StrOutputParser() | RunnableLambda(split_and_clean_text)
)

In [9]:
list_of_questions = rephrase_chain.invoke("Who is the owner of the restaurant")

In [10]:
docs = [retriever.get_relevant_documents(q) for q in list_of_questions]

In [11]:
def flatten_and_unique_documents(documents):
    flattened_docs = [doc for sublist in documents for doc in sublist]

    unique_docs = []
    unique_contents = set()
    for doc in flattened_docs:
        if doc.page_content not in unique_contents:
            unique_docs.append(doc)
            unique_contents.add(doc.page_content)

    return unique_docs

In [12]:
flatten_and_unique_documents(documents=docs)

[Document(page_content="In the charming streets of Palermo, tucked away in a quaint alley, stood Chef Amico, a restaurant that was more than a mere eatery—it was a slice of Sicilian heaven. Founded by Amico, a chef whose name was synonymous with passion and creativity, the restaurant was a mosaic of his life’s journey through the flavors of Italy.\n\nChef Amico’s doors opened to a world where the aromas of garlic and olive oil were as welcoming as a warm embrace. The walls, adorned with photos of Amico’s travels and family recipes, spoke of a rich culinary heritage. The chatter and laughter of patrons filled the air, creating a symphony as delightful as the dishes served.\n\nOne evening, as the sun cast a golden glow over the city, a renowned food critic, Elena Rossi, stepped into Chef Amico. Her mission was to uncover the secret behind the restaurant's growing fame. She was greeted by Amico himself, whose eyes sparkled with the joy of a man who loved his work.\n\nElena was led to a ta

In [15]:
HYDE_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five hypothetical answers to the user's query. These answers should offer diverse perspectives or interpretations, aiding in a comprehensive understanding of the query. Present the hypothetical answers as follows:

    Hypothetical Answer 1: <<Answer considering a specific perspective>>
    Hypothetical Answer 2: <<Answer from a different angle>>
    Hypothetical Answer 3: <<Answer exploring an alternative possibility>>
    Hypothetical Answer 4: <<Answer providing a contrasting viewpoint>>
    Hypothetical Answer 5: <<Answer that includes a unique insight>>

    Note: Present only the hypothetical answers, without numbering, to provide a range of potential interpretations or solutions related to the query.
    Original question: {question}""",
)

In [16]:
hyde_chain = (
    HYDE_PROMPT | model | StrOutputParser() | RunnableLambda(split_and_clean_text)
)

In [17]:
list_of_questions = hyde_chain.invoke("Who is the owner of the restaurant")
list_of_questions

['- The owner of the restaurant is a successful entrepreneur who started the business from scratch and built it into a popular dining establishment in the community.\n- The owner of the restaurant could be a passionate chef who wanted to share their culinary creations with the world, leading them to open their own eatery.\n- It is possible that the restaurant is a family-owned business, with multiple family members sharing ownership and responsibilities in running the establishment.\n- Contrary to popular belief, the owner of the restaurant might be a silent partner who invested in the business but is not actively involved in its day-to-day operations.\n- A unique insight could be that the owner of the restaurant is a well-known celebrity or public figure who decided to venture into the food industry as a side business or passion project.']

In [18]:
docs = [retriever.get_relevant_documents(q) for q in list_of_questions]
flatten_and_unique_documents(documents=docs)

[Document(page_content="In the charming streets of Palermo, tucked away in a quaint alley, stood Chef Amico, a restaurant that was more than a mere eatery—it was a slice of Sicilian heaven. Founded by Amico, a chef whose name was synonymous with passion and creativity, the restaurant was a mosaic of his life’s journey through the flavors of Italy.\n\nChef Amico’s doors opened to a world where the aromas of garlic and olive oil were as welcoming as a warm embrace. The walls, adorned with photos of Amico’s travels and family recipes, spoke of a rich culinary heritage. The chatter and laughter of patrons filled the air, creating a symphony as delightful as the dishes served.\n\nOne evening, as the sun cast a golden glow over the city, a renowned food critic, Elena Rossi, stepped into Chef Amico. Her mission was to uncover the secret behind the restaurant's growing fame. She was greeted by Amico himself, whose eyes sparkled with the joy of a man who loved his work.\n\nElena was led to a ta