Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(tool): Add URLReaderTool #2577

Merged
merged 1 commit into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backend/modules/brain/integrations/GPT4/Brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from modules.chat.dto.chats import ChatQuestion
from modules.chat.dto.outputs import GetChatHistoryOutput
from modules.chat.service.chat_service import ChatService
from modules.tools import ImageGeneratorTool, WebSearchTool
from modules.tools import ImageGeneratorTool, URLReaderTool, WebSearchTool


class AgentState(TypedDict):
Expand All @@ -37,7 +37,7 @@ class GPT4Brain(KnowledgeBrainQA):
KnowledgeBrainQA (_type_): A brain that store the knowledge internaly
"""

tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool()]
tools: List[BaseTool] = [WebSearchTool(), ImageGeneratorTool(), URLReaderTool()]
tool_executor: ToolExecutor = ToolExecutor(tools)
model_function: ChatOpenAI = None

Expand Down
1 change: 1 addition & 0 deletions backend/modules/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .image_generator import ImageGeneratorTool
from .web_search import WebSearchTool
from .url_reader import URLReaderTool

53 changes: 53 additions & 0 deletions backend/modules/tools/url_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Extract and combine content recursively
import os
from typing import Dict, Optional, Type

from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.pydantic_v1 import BaseModel as BaseModelV1
from langchain.pydantic_v1 import Field as FieldV1
from langchain_community.document_loaders import PlaywrightURLLoader
from langchain_core.tools import BaseTool
from logger import get_logger
from pydantic import BaseModel

logger = get_logger(__name__)


class URLReaderInput(BaseModelV1):
url: str = FieldV1(..., title="url", description="url to read")


class URLReaderTool(BaseTool):
name = "url-reader"
description = "useful for when you need to read the content of a url."
args_schema: Type[BaseModel] = URLReaderInput
api_key = os.getenv("BRAVE_SEARCH_API_KEY")

def _run(
self, url: str, run_manager: Optional[CallbackManagerForToolRun] = None
) -> Dict:

loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"])
data = loader.load()

extracted_content = ""
for page in data:
extracted_content += page.page_content

return {"content": extracted_content}

async def _arun(
self, url: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
) -> Dict:
"""Run the tool asynchronously."""
loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"])
data = loader.load()

extracted_content = ""
for page in data:
extracted_content += page.page_content

return {"content": extracted_content}