# Setup

In [1]:
from dotenv import load_dotenv
env = load_dotenv()

In [2]:
from langgraph.graph import StateGraph, END
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_core.output_parsers import JsonOutputParser
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from langchain_openai import ChatOpenAI
from IPython.display import Image
import IPython



# Tools

In [3]:
from typing import TypedDict, Annotated, Dict, Any
import operator

class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [75]:
# Download PDF tool
from langchain.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.pydantic_v1 import BaseModel, Field
from bs4 import BeautifulSoup
import tiktoken
import requests

# Search for relevant menu
search_tool = TavilySearchResults()


# Scrape menu data
class ScrapeInput(BaseModel):
    url: str = Field(description="the URL of the menu page")

@tool("scrape-pdf", args_schema=ScrapeInput, return_direct=True)
def scrape_pdf(url: str):
    """Scrape a webpage that may include links to a restaurants current menu and return the links"""
    pdf_links = []
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        links = soup.find_all('a')
        pdf_links = [link.get('href') for link in links if link.get('href').endswith('.pdf')]
    except Exception as e:
        print(f"failed to scrape {url} ERROR: {e}")
    return pdf_links


# Encode & Upsert menu data
enc = tiktoken.encoding_for_model("gpt-4o")

class tokenizerInput(BaseModel):
    pdf_url: str = Field(description="The url of the PDF file that must be tokenized")

# Tokenize the pdf 
@tool("tokenize-pdf", args_schema=tokenizerInput, return_direct=True)
def tokenize_pdf():
    pass

In [76]:
tools = [search_tool, tokenize_pdf]

# RAG Model

In [4]:
class Model:
    # Define the model
    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.openai_inference)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges(
            "llm",
            self.exists_action,
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile()
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    # Check if llm requires action 
    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0
    # Run a tool ordered by the model
    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}
    
    # Invokes the current message chain
    def openai_inference(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

In [79]:
prompt = """You are tasked with finding and downloading a PDF of a restaurant's menu. 
Use the search tool to find the url of the menu, if you cannot find the pdf url directly use the scrape_pdf tool to scrape webpages for the url.
DO NOT attempt to download blank menus. Save the file using the PDF download tool, name the PDF after the restaurant.
Once you have downloaded the PDF there are no more tasks to be done. 
"""

llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
)

model = Model(llm, tools, system=prompt)

In [84]:
messages = ["CRISP Rochester"]
result = model.graph.invoke({"messages": messages})

Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'CRISP Rochester menu PDF'}, 'id': 'call_ObSUzGd8rUFO4mzKCvz5FQve', 'type': 'tool_call'}
Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'OG Dumpling House Rochester NY menu PDF'}, 'id': 'call_lLKtpD8X6PpSeiKTTxoDOYm5', 'type': 'tool_call'}
Back to the model!
Calling: {'name': 'download-pdf', 'args': {'url': 'https://crisprochester.com/wp-content/uploads/2023/05/Lunch-and-Dinner.pdf', 'filename': 'CRISP_Rochester'}, 'id': 'call_Dy9uSMHh3yanhb3i5re5ICMa', 'type': 'tool_call'}
failed to download CRISP_Rochester ERROR: HTTP Error 404: Not Found
Calling: {'name': 'download-pdf', 'args': {'url': 'https://ogdumplinghouseny.com/images/menu/menu24.6.12.pdf', 'filename': 'OG_Dumpling_House_Rochester_NY'}, 'id': 'call_b7IzX4qp3WXWiqgoC8dWmzuK', 'type': 'tool_call'}
Back to the model!


In [85]:
result

{'messages': ['CRISP Rochester',
  'OG Dumpling House, Rochester NY',
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_ObSUzGd8rUFO4mzKCvz5FQve', 'function': {'arguments': '{"query": "CRISP Rochester menu PDF"}', 'name': 'tavily_search_results_json'}, 'type': 'function'}, {'id': 'call_lLKtpD8X6PpSeiKTTxoDOYm5', 'function': {'arguments': '{"query": "OG Dumpling House Rochester NY menu PDF"}', 'name': 'tavily_search_results_json'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 63, 'prompt_tokens': 265, 'total_tokens': 328}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_d33f7b429e', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-c5ddb629-5a6f-48ad-bf05-413ebdabfa2f-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'CRISP Rochester menu PDF'}, 'id': 'call_ObSUzGd8rUFO4mzKCvz5FQve', 'type': 'tool_call'}, {'name': 'tavily_search_results_json', 'args': {'query': 'OG Dumpling House Rochester

In [17]:
import pygraphviz
Image(model.graph.get_graph().draw_png())

ModuleNotFoundError: No module named 'pygraphviz'

# Datastore

In [None]:
# Tokenize the pdf 
import tiktoken
enc = tiktoken.encoding_for_model("gpt-4o")


In [None]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone()

pc.create_index(
    name="quickstart",
    dimension=5120, # Lamma 2 embedding dim
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)