-
-
Notifications
You must be signed in to change notification settings - Fork 3.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(tool): Add URLReaderTool (#2577)
This pull request adds the URLReaderTool to the list of tools in the GPT4Brain class. The URLReaderTool allows for reading the content of a URL.
- Loading branch information
1 parent
62f9b5b
commit 105a2b8
Showing
3 changed files
with
56 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from .image_generator import ImageGeneratorTool | ||
from .web_search import WebSearchTool | ||
from .url_reader import URLReaderTool | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Extract and combine content recursively | ||
import os | ||
from typing import Dict, Optional, Type | ||
|
||
from langchain.callbacks.manager import ( | ||
AsyncCallbackManagerForToolRun, | ||
CallbackManagerForToolRun, | ||
) | ||
from langchain.pydantic_v1 import BaseModel as BaseModelV1 | ||
from langchain.pydantic_v1 import Field as FieldV1 | ||
from langchain_community.document_loaders import PlaywrightURLLoader | ||
from langchain_core.tools import BaseTool | ||
from logger import get_logger | ||
from pydantic import BaseModel | ||
|
||
logger = get_logger(__name__) | ||
|
||
|
||
class URLReaderInput(BaseModelV1): | ||
url: str = FieldV1(..., title="url", description="url to read") | ||
|
||
|
||
class URLReaderTool(BaseTool): | ||
name = "url-reader" | ||
description = "useful for when you need to read the content of a url." | ||
args_schema: Type[BaseModel] = URLReaderInput | ||
api_key = os.getenv("BRAVE_SEARCH_API_KEY") | ||
|
||
def _run( | ||
self, url: str, run_manager: Optional[CallbackManagerForToolRun] = None | ||
) -> Dict: | ||
|
||
loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"]) | ||
data = loader.load() | ||
|
||
extracted_content = "" | ||
for page in data: | ||
extracted_content += page.page_content | ||
|
||
return {"content": extracted_content} | ||
|
||
async def _arun( | ||
self, url: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None | ||
) -> Dict: | ||
"""Run the tool asynchronously.""" | ||
loader = PlaywrightURLLoader(urls=[url], remove_selectors=["header", "footer"]) | ||
data = loader.load() | ||
|
||
extracted_content = "" | ||
for page in data: | ||
extracted_content += page.page_content | ||
|
||
return {"content": extracted_content} |