### 1: Imports:

In [4]:
import os
from dotenv import load_dotenv
import nest_asyncio
import pickle
import chromadb
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import FunctionCallingAgentWorker, AgentRunner

# Load environment variables
load_dotenv()

# API keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")

# Configuration
CHROMA_DB_PATH = "local_db_ollama/"
COOKBOOK_PATH = "data/175_choice_recipes_mainly_furnished_by_members_of_the_chicago_womens_club-1887.pdf"
DICTIONARY_PATH = "data/dictionary-of-food.pdf"
DICTIONARY_PICKLE_PATH = 'raw_dictionary_documents'

# Apply nest_asyncio
nest_asyncio.apply()

### 2: Initialize Models

In [5]:
def initialize_models():
    """
    Initialize the language model and embedding model.

    Returns:
        tuple: A tuple containing:
            - llm (Ollama): The initialized language model.
            - embed_model (HuggingFaceEmbedding): The initialized embedding model.
    """
    llm = Ollama(model="llama3.1")
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
    Settings.llm = llm
    Settings.embed_model = embed_model
    return llm, embed_model

llm, embed_model = initialize_models()

### 3: Load Data

In [13]:
# Cell 3:
def load_dictionary_data(
    pickle_path, pdf_path, use_pickle=True, llama_cloud_api_key=None
):
    """
    Load dictionary data from a pickle file or generate it using LlamaParse.

    Args:
        pickle_path (str): The file path to the pickle file.
        pdf_path (str): The file path to the PDF file.
        use_pickle (bool, optional): Whether to use the pickled file. Defaults to True.
        llama_cloud_api_key (str, optional): The API key for LlamaParse. Required if use_pickle is False.

    Returns:
        list: A list of dictionary documents.

    Raises:
        ValueError: If use_pickle is False and llama_cloud_api_key is not provided.
        FileNotFoundError: If the specified file does not exist.
    """
    if use_pickle:
        if not os.path.exists(pickle_path):
            raise FileNotFoundError(f"The pickle file {pickle_path} does not exist.")
        with open(pickle_path, "rb") as fp:
            return pickle.load(fp)
    else:
        if not llama_cloud_api_key:
            raise ValueError(
                "LlamaParse API key is required when not using pickle file."
            )

        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"The PDF file {pdf_path} does not exist.")

        parser = LlamaParse(api_key=llama_cloud_api_key, result_type="markdown")
        dictionary_docs = parser.load_data(pdf_path)

        # Optionally, save the generated data as a pickle file for future use
        with open(pickle_path, "wb") as fp:
            pickle.dump(dictionary_docs, fp)

        return dictionary_docs


def load_cookbook_data(path):
    """
    Load cookbook data from a PDF file.

    Args:
        path (str): The file path to the PDF file.

    Returns:
        list: A list of cookbook documents.
    """
    return SimpleDirectoryReader(input_files=[path]).load_data()



cookbook_docs = load_cookbook_data(COOKBOOK_PATH)
dictionary_docs = load_dictionary_data(
    DICTIONARY_PICKLE_PATH, DICTIONARY_PATH, use_pickle=True
)

### 4: Create Vector Store and Indices

In [14]:
def create_vector_store(db_path, collection_name):
    """
    Create a vector store using ChromaDB.

    Args:
        db_path (str): The path to the ChromaDB database.
        collection_name (str): The name of the collection to create or get.

    Returns:
        StorageContext: A storage context object for the vector store.
    """
    db = chromadb.PersistentClient(path=db_path)
    chroma_collection = db.get_or_create_collection(collection_name)
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    return StorageContext.from_defaults(vector_store=vector_store)

def create_index(documents, storage_context):
    """
    Create a vector store index from documents.

    Args:
        documents (list): A list of documents to index.
        storage_context (StorageContext): The storage context for the index.

    Returns:
        VectorStoreIndex: The created vector store index.
    """
    return VectorStoreIndex.from_documents(documents, storage_context=storage_context)

storage_context = create_vector_store(CHROMA_DB_PATH, "dictionary_food")
cookbook_index = create_index(cookbook_docs, storage_context)
dictionary_index = create_index(dictionary_docs, storage_context)

### 5: Create Query Engines

In [15]:
def create_query_engine(index, similarity_top_k=3):
    """
    Create a query engine from an index.

    Args:
        index (VectorStoreIndex): The index to create the query engine from.
        similarity_top_k (int, optional): The number of top similar results to return. Defaults to 3.

    Returns:
        QueryEngine: The created query engine.
    """
    return index.as_query_engine(similarity_top_k=similarity_top_k)

cookbook_query_engine = create_query_engine(cookbook_index)
dictionary_query_engine = create_query_engine(dictionary_index)

### 6: Create Agent

In [16]:
def create_query_engine_tools(cookbook_engine, dictionary_engine):
    """
    Create query engine tools for the agent.

    Args:
        cookbook_engine (QueryEngine): The query engine for the cookbook.
        dictionary_engine (QueryEngine): The query engine for the dictionary.

    Returns:
        list: A list of QueryEngineTool objects.
    """
    return [
        QueryEngineTool(
            query_engine=cookbook_engine,
            metadata=ToolMetadata(
                name="Food_Cookbook",
                description="Provides a collection of 175 choice recipes from the Chicago Women's Club in 1887.",
            ),
        ),
        QueryEngineTool(
            query_engine=dictionary_engine,
            metadata=ToolMetadata(
                name="Food_Dictionary",
                description="Provides definitions of words related to English cuisine and food industry terms.",
            ),
        ),
    ]

def create_agent(query_engine_tools):
    """
    Create an agent with the given query engine tools.

    Args:
        query_engine_tools (list): A list of QueryEngineTool objects.

    Returns:
        AgentRunner: The created agent runner.
    """
    agent_worker = FunctionCallingAgentWorker.from_tools(
        query_engine_tools,
        verbose=False,
        allow_parallel_tool_calls=False,
    )
    return AgentRunner(agent_worker)

query_engine_tools = create_query_engine_tools(cookbook_query_engine, dictionary_query_engine)
agent = create_agent(query_engine_tools)

### 7: Test the Agent

In [17]:
def query_agent(agent, query):
    """
    Query the agent with a given input.

    Args:
        agent (AgentRunner): The agent to query.
        query (str): The query string.

    Returns:
        str: The agent's response to the query.
    """
    return agent.chat(query)

In [18]:
# Example usage
response1 = query_agent(agent, "What is aloo?")
print("Query: What is aloo?")
print("Response:", response1)

Query: What is aloo?
Response: The output from the ipython tool indicates that "Aloo" refers to potatoes in South Asian cuisine, particularly in India and other surrounding countries.


In [19]:
response2 = query_agent(agent, "How to prepare Marrow Dumpling Soup? Give me steps.",)
print("\nQuery: How to prepare Marrow Dumpling Soup?")
print("Response:", response2)


Query: How to prepare Marrow Dumpling Soup?
Response: The output from the ipython tool provides a detailed recipe for preparing Marrow Dumpling Soup. The steps are:

1. Prepare small round dumplings from a mixture of grated French breakfast-roll crust, crumbled inside, milk-soaked breadcrumbs, chopped fine beef marrow, egg yolks, salt, and pepper.
2. Boil three pints of stock in a large pot.
3. Carefully drop the dumplings into the boiling liquid and let them cook for 20-30 minutes or until they're cooked through.
4. Prepare the vegetables by cutting an equal quantity of carrots, turnips, and onions into small balls or squares.
5. Boil these ingredients in water with a little salt until tender.
6. Carefully remove the pieces of tail from the pot, strain the liquor, and skim off all the fat.
7. Put the cooked vegetables back into the stewpan along with a pound of beef marrow, a teaspoonful of salt, and the prepared dumplings.
8. Let everything simmer together for a while.
9. Add a glas