In [2]:
from dotenv import load_dotenv
env = load_dotenv()

In [38]:
from langgraph.graph import StateGraph, END
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_core.output_parsers import JsonOutputParser
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from langchain_openai import ChatOpenAI
from IPython.display import Image
import IPython

In [37]:
# Tools 

In [41]:
from typing import TypedDict, Annotated, Dict, Any
import operator

class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]
    # output: str

In [58]:
# Download PDF tool
from langchain.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.pydantic_v1 import BaseModel, Field
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen

search_tool = TavilySearchResults()

class ScrapeInput(BaseModel):
    url: str = Field(description="the URL of the menu page")

@tool("scrape-pdf", args_schema=ScrapeInput, return_direct=True)
def scrape_pdf(url: str):
    """Scrape a webpage that may include links to a restaurants current menu and return the links"""
    pdf_links = []
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        links = soup.find_all('a')
        pdf_links = [link.get('href') for link in links if link.get('href').endswith('.pdf')]
    except:
        print(f"failed to scrape {url}")
    return pdf_links

class DowloadInput(BaseModel):
    url: str = Field(description="the URL of the menu")
    filename: str = Field(description="the name of the restaurants")

@tool("download-pdf", args_schema=DowloadInput, return_direct=True)
def download_pdf(url: str, filename: str):
    """Download a pdf file from a given url and filename"""
    try:
        response = urlopen(download_url)
        file = open("pdf/"+filename+".pdf", 'wb')
        file.write(response.read())
        file.close()
    except:
        print(f"failed to download {filename}")

In [59]:
tools = [search_tool, scrape_pdf, download_pdf]

In [60]:
# RAG Model

In [61]:
class Model:
    # Define the model
    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.classify)
        graph.add_node("action", self.take_action)
        #graph.add_node("output-parser", self.classify)
        graph.add_conditional_edges(
            "llm",
            self.exists_action,
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile()
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    # Check if llm requires action 
    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0
    # Run a tool ordered by the model
    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}
    # Perform inference on the gathered context
    def classify(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    # def parse_output_to_string(self, state: AgentState):
    #     message = state['messages'][-1]
    #     json_output = JsonOutputParser(pydantic_object=Restaurants).invoke(message)
    #     return {'json_output': json_output}

In [62]:
prompt = """You are tasked with finding and downloading a PDF of a restaurant's menu. 
Use the search tool to find the url of the menu, if you cannot find the pdf url directly use the scrape_pdf tool to scrape webpages for the url.
DO NOT attempt to download blank menus. Save the file using the PDF download tool, name the PDF after the restaurant.
Once you have downloaded the PDF there are no more tasks to be done. 
"""

llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
)

model = Model(llm, tools, system=prompt)

In [63]:
messages = ["Mondo, Middletown Connecticut"]
result = model.graph.invoke({"messages": messages})

Calling: {'name': 'tavily_search_results_json', 'args': {'query': 'Mondo restaurant menu Middletown Connecticut'}, 'id': 'call_YS7XGjhQjhqTMa6sg6nJAlH9', 'type': 'tool_call'}
Back to the model!
Calling: {'name': 'scrape-pdf', 'args': {'url': 'https://www.mondomiddletown.com/menu'}, 'id': 'call_WQgdEsamGS35uW1JoQukiWCP', 'type': 'tool_call'}
Back to the model!
Calling: {'name': 'download-pdf', 'args': {'url': 'https://www.mondomiddletown.com/s/Mondo_Menu.pdf', 'filename': 'Mondo'}, 'id': 'call_3BjdjIrICRAgRX4GvI8a28mw', 'type': 'tool_call'}
failed to download Mondo
Calling: {'name': 'download-pdf', 'args': {'url': 'https://www.mondomiddletown.com/s/Mondo_lunch-kjp4.pdf', 'filename': 'Mondo_Lunch'}, 'id': 'call_JJeJNHm2YgE5un4ESeMbyqae', 'type': 'tool_call'}
failed to download Mondo_Lunch
Calling: {'name': 'download-pdf', 'args': {'url': 'https://www.mondomiddletown.com/s/Mondo_HappyHourMenu.pdf', 'filename': 'Mondo_HappyHour'}, 'id': 'call_FEkBzFj8xhuCZljoklRu0BJ6', 'type': 'tool_call'}

In [64]:
result

{'messages': ['Mondo, Middletown Connecticut',
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_YS7XGjhQjhqTMa6sg6nJAlH9', 'function': {'arguments': '{"query":"Mondo restaurant menu Middletown Connecticut"}', 'name': 'tavily_search_results_json'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 258, 'total_tokens': 283}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_d33f7b429e', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-26ecd029-bad8-42b3-9b7d-7b1e3ce55d05-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'Mondo restaurant menu Middletown Connecticut'}, 'id': 'call_YS7XGjhQjhqTMa6sg6nJAlH9', 'type': 'tool_call'}], usage_metadata={'input_tokens': 258, 'output_tokens': 25, 'total_tokens': 283}),
  ToolMessage(content='[{\'url\': \'https://www.mondomiddletown.com/menu\', \'content\': \'10 Main Street, Middletown, CT 06457 • (860) 343-3300 . Photography by: RC Zaj

In [17]:
import pygraphviz
Image(model.graph.get_graph().draw_png())

ModuleNotFoundError: No module named 'pygraphviz'

In [None]:
# Datastore

In [None]:
# Tokenize the pdf 
import tiktoken
enc = tiktoken.encoding_for_model("gpt-4o")




In [None]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone()

pc.create_index(
    name="quickstart",
    dimension=5120, # Lamma 2 embedding dim
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)