In [2]:
# Import things that are needed generically
from pydantic import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.prebuilt import ToolNode
# from langchain_openai.chat_models.base import _convert_message_to_dict
from langchain_core.messages import SystemMessage
from typing import Optional

class SearchInput(BaseModel):
    query: str = Field(description="a detailed sentence for efficient search")

class WeatherInput(BaseModel):
    location: str = Field(description="The city and state, e.g. San Francisco, CA")
    unit: str = Field(enum=["celsius", "fahrenheit"])

@tool("search_online", args_schema=SearchInput)
def search(query: str):
    """An online search tool to retrieve the most accurate and up-to-date, lastest information available."""

    tool = TavilySearchResults(max_results=2)
    results = tool.invoke(query) 
    content = '\n\n'.join([f"{index+1}. {i['content']}" for index, i in enumerate(results)])

    return content

@tool("get_current_weather", args_schema=WeatherInput)
def get_weather(location: str, unit:str):
    """This function retrieves the latest weather information for a specified location. """

    return f"Now the weather in {location} is 22 {unit}"

In [3]:
from langgraph.prebuilt import ToolExecutor

tool_executor = ToolExecutor([search, get_weather])

  tool_executor = ToolExecutor([search, get_weather])


In [5]:


import multiprocessing
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.chat_models import ChatLlamaCpp
from llamacpp_function_calling.chat_format import hermes_pro_function_calling

local_model = "../models/Hermes-2-Pro-Llama-3-8B-Q8.gguf"

model = ChatLlamaCpp(
    temperature = 0.5,
    model_path =local_model,
    n_ctx = 10000,
    n_gpu_layers = 20,
    n_batch = 850,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_threads = multiprocessing.cpu_count()-5,
    # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]),# Callbacks support token-wise streaming
    max_tokens = 512,
    verbose= True,
    chat_handler=hermes_pro_function_calling
)



ModuleNotFoundError: No module named 'llamacpp_function_calling'