In [1]:
from dotenv import load_dotenv
import os

from langchain_community.vectorstores import FAISS
import faiss
from langchain.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_community.docstore.in_memory import InMemoryDocstore
from ingestor.ordine_ingestor import OrderIngestor
from ingestor.menu_chunk_ingestor import PiattoChunkIngestor
from ingestor.menu_ingestor import MenuIngestor
from ingestor.technique_ingestor import TechniqueIngestor
from ingestor.licence_ingestor import LicenceIngestor
from ingestor.config import EmbedderConf, LLMConf
from ingestor.embedder import ChunkEmbedder


from src.agents.llm import fetch_llm

env=load_dotenv("config.env")

In [2]:
embedder_conf = EmbedderConf(
    type="ollama",
    model="mxbai-embed-large",
)

embedder = ChunkEmbedder(embedder_conf)

In [3]:
file_path = "HackapizzaDataset/Misc/Manuale di Cucina.pdf"

dir = "HackapizzaDataset/Menu"

tech_ingestor = TechniqueIngestor()
order_ingestor = OrderIngestor()
license_ingestor = LicenceIngestor()
menu_ingestor = MenuIngestor(llm=None)
piatti_ingestor = PiattoChunkIngestor()

chunks_licenses = license_ingestor.chunks_from_doc(file_path)

chunks_techniques = tech_ingestor.chunks_from_doc(file_path)

chunks_ordini = order_ingestor.chunks_from_doc(file_path)

chunks_menu = menu_ingestor.chunks_from_docs(base_dir=dir)

chunks_piatti = piatti_ingestor.chunk_from_docs(dir)

chunks = chunks_ordini + chunks_menu + chunks_techniques + chunks_licenses + chunks_piatti

In [4]:
index = faiss.IndexFlatL2(len(embedder.embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embedder.embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

doc_chunks = [
    Document(
        page_content=chunk.text, 
        metadata={
            "id":chunk.id,
            "filename": chunk.filename,
            "embeddings_model": chunk.embeddings_model
            }
    ) for chunk in chunks
]

vector_store.add_documents(doc_chunks)

['eb653fdb-3579-4a05-a700-95144fda0a44',
 'df275bc0-0b84-40b6-82d7-ded51f600301',
 '94019235-e54c-4672-a115-e1c0a212d57b',
 '05f1a890-1806-47b6-81a4-9cec684c0242',
 'ff180289-f5b3-451b-b594-51a3f8549d1e',
 '7ad3a27f-db06-4b51-af23-66867821110c',
 '6dbd9ee3-e95a-48f8-8609-58be5f5dd0fb',
 'c93e936b-92c1-45b6-9d80-04b5e5eb07bc',
 '67fabf51-e57e-473c-a352-fc11f6b8e3fc',
 '5d998f80-f1c4-42de-9e71-3d83518cc8be',
 'e5e0e7f6-c88d-4274-8094-165c8f9b81e0',
 '0c59870b-9719-46de-83b2-24b2d2f76ec4',
 '4e72b818-85f8-477b-a732-70f44850c695',
 '358fbd2c-dbc3-4ad8-af18-ad95eb5af71c',
 '074a9766-6ca4-48c3-b460-fd69b4d72d8d',
 '28b0d92e-0b8c-4c5a-a8f0-8ef3118902dc',
 '22c33425-1300-47c7-8452-ed9075614581',
 '3541980b-95cc-49a3-ba1f-2224e35994a4',
 '6df36ee1-354a-4ec6-9f67-25aeedcd15c7',
 '5c6572e2-3bbb-4c4c-a2d8-7c3bc83a2970',
 'b6942d33-a515-4251-af82-4e6c9949e46b',
 '82e238a7-9bc3-4f39-a036-d7e07110b15c',
 '594d4573-8587-48e7-b72a-c0910c060394',
 'ff96a579-21cc-49c6-a37a-e5615cd09eef',
 '1b9b57b1-4a0f-

In [18]:
vector_store.save_local("faiss_index")

new_vector_store = FAISS.load_local(
    "faiss_index", 
    embedder.embeddings, 
    allow_dangerous_deserialization=True
)

In [13]:
# llm_conf = LLMConf(
#     type="ibm",
#     model="mistralai/mistral-large",
#     api_key=os.getenv("API_KEY"),
#     endpoint=os.getenv("ENDPOINT"),
#     deployment=os.getenv("PROJECT_ID")
# )

llm_conf = LLMConf(
    type="ollama",
    model="llama3.2:latest", 
    temperature=0.0, 
)

llm = fetch_llm(llm_conf)

In [14]:
template = """
    You are an AI Assistant specialized in answering questions about Intergalactic Cousine and Restaurants. 

    Your task is to answer the user's question given the provided context.

    Answer to the question as precisely as possible. 
    Do not make things up.
    Do not mention anything not provided in the context.

    USER_QUESTION: {question}
    CONTEXT: {context}
"""

prompt = PromptTemplate.from_template(template)

prompt.input_variables = ["question", "context"]

In [15]:
query = "Quali sono gli alimenti preferiti dai Naturalisti?"

In [16]:
query_2 = "Quali sono i piatti preparati con la Sferificazione con Campi Magnetici Entropici e non contengono Spezie Melange?"

In [17]:
retriever = vector_store.as_retriever(search_kwargs = {"k": 5})

try:
    context_docs = retriever.invoke(query_2)
except Exception as e:
    context_docs = []

context = []
for doc in context_docs:
    context += f"\n {doc.page_content}"

llm.invoke(
    input=prompt.format_prompt(question=query_2, context=context)
)

AIMessage(content='This appears to be a text file containing a large amount of text data, likely from a book or article. The text is in English and seems to be a mix of prose and dialogue.\n\nHere are some observations about the text:\n\n* The text has a total of 1,046 lines.\n* The average line length is around 50-60 words.\n* There are many instances of repeated phrases and sentences, which may indicate that the text was generated using a template or formula.\n* The text contains a mix of formal and informal language, including proper nouns, abbreviations, and colloquial expressions.\n* There are several instances of formatting errors, such as missing quotation marks or incorrect punctuation.\n\nOverall, the text appears to be a large block of unedited text that may require significant editing and proofreading before it is ready for publication.', additional_kwargs={}, response_metadata={'model': 'llama3.2:latest', 'created_at': '2025-01-19T02:52:19.740029Z', 'done': True, 'done_reas