Skip to content

Commit

Permalink
feat(tool): Add URLReaderTool (#2577)
Browse files Browse the repository at this point in the history
This pull request adds the URLReaderTool to the list of tools in the
GPT4Brain class. The URLReaderTool allows for reading the content of a
URL.
  • Loading branch information
StanGirard committed May 10, 2024
1 parent 62f9b5b commit 105a2b8
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
4 changes: 2 additions & 2 deletions backend/modules/brain/integrations/GPT4/Brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from modules.chat.dto.chats import ChatQuestion
from modules.chat.dto.outputs import GetChatHistoryOutput
from modules.chat.service.chat_service import ChatService
from modules.tools import ImageGeneratorTool, WebSearchTool
from modules.tools import ImageGeneratorTool, URLReaderTool, WebSearchTool


class AgentState(TypedDict):
Expand All @@ -37,7 +37,7 @@ class GPT4Brain(KnowledgeBrainQA):
KnowledgeBrainQA (_type_): A brain that store the knowledge internaly
"""

tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool()]
tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool(), URLReaderTool()]
tool_executor: ToolExecutor = ToolExecutor(tools)
model_function: ChatOpenAI = None

Expand Down
1 change: 1 addition & 0 deletions backend/modules/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .image_generator import ImageGeneratorTool
from .web_search import WebSearchTool
from .url_reader import URLReaderTool

53 changes: 53 additions & 0 deletions backend/modules/tools/url_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Extract and combine content recursively
import os
from typing import Dict, Optional, Type

from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.pydantic_v1 import BaseModel as BaseModelV1
from langchain.pydantic_v1 import Field as FieldV1
from langchain_community.document_loaders import PlaywrightURLLoader
from langchain_core.tools import BaseTool
from logger import get_logger
from pydantic import BaseModel

logger = get_logger(__name__)


class URLReaderInput(BaseModelV1):
url: str = FieldV1(..., title="url", description="url to read")


class URLReaderTool(BaseTool):
name = "url-reader"
description = "useful for when you need to read the content of a url."
args_schema: Type[BaseModel] = URLReaderInput
api_key = os.getenv("BRAVE_SEARCH_API_KEY")

def _run(
self, url: str, run_manager: Optional[CallbackManagerForToolRun] = None
) -> Dict:

loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"])
data = loader.load()

extracted_content = ""
for page in data:
extracted_content += page.page_content

return {"content": extracted_content}

async def _arun(
self, url: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
) -> Dict:
"""Run the tool asynchronously."""
loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"])
data = loader.load()

extracted_content = ""
for page in data:
extracted_content += page.page_content

return {"content": extracted_content}

0 comments on commit 105a2b8

Please sign in to comment.