Skip to content

Commit

Permalink
Merge branch 'master' into setup_optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
lengweiping1983 committed Apr 24, 2023
2 parents 252d8d7 + 31abb42 commit c536de9
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 82 deletions.
3 changes: 3 additions & 0 deletions autogpt/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from autogpt.processing.text import summarize_text
from autogpt.prompts.generator import PromptGenerator
from autogpt.speech import say_text
from autogpt.url_utils.validators import validate_url

CFG = Config()
AGENT_MANAGER = AgentManager()
Expand Down Expand Up @@ -141,6 +142,7 @@ def execute_command(
@command(
"get_text_summary", "Get text summary", '"url": "<url>", "question": "<question>"'
)
@validate_url
def get_text_summary(url: str, question: str) -> str:
"""Return the results of a Google search
Expand All @@ -157,6 +159,7 @@ def get_text_summary(url: str, question: str) -> str:


@command("get_hyperlinks", "Get text summary", '"url": "<url>"')
@validate_url
def get_hyperlinks(url: str) -> Union[str, List[str]]:
"""Return the results of a Google search
Expand Down
2 changes: 2 additions & 0 deletions autogpt/commands/git_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from autogpt.commands.command import command
from autogpt.config import Config
from autogpt.url_utils.validators import validate_url

CFG = Config()

Expand All @@ -14,6 +15,7 @@
CFG.github_username and CFG.github_api_key,
"Configure github_username and github_api_key.",
)
@validate_url
def clone_repository(repository_url: str, clone_path: str) -> str:
"""Clone a GitHub repository locally.
Expand Down
82 changes: 3 additions & 79 deletions autogpt/commands/web_requests.py
Original file line number Diff line number Diff line change
@@ -1,87 +1,21 @@
"""Browse a webpage and summarize it using the LLM model"""
from __future__ import annotations

from urllib.parse import urljoin, urlparse

import requests
from bs4 import BeautifulSoup
from requests import Response
from requests.compat import urljoin

from autogpt.config import Config
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
from autogpt.url_utils.validators import validate_url

CFG = Config()

session = requests.Session()
session.headers.update({"User-Agent": CFG.user_agent})


def is_valid_url(url: str) -> bool:
"""Check if the URL is valid
Args:
url (str): The URL to check
Returns:
bool: True if the URL is valid, False otherwise
"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False


def sanitize_url(url: str) -> str:
"""Sanitize the URL
Args:
url (str): The URL to sanitize
Returns:
str: The sanitized URL
"""
return urljoin(url, urlparse(url).path)


def check_local_file_access(url: str) -> bool:
"""Check if the URL is a local file
Args:
url (str): The URL to check
Returns:
bool: True if the URL is a local file, False otherwise
"""
local_prefixes = [
"file:///",
"file://localhost/",
"file://localhost",
"http://localhost",
"http://localhost/",
"https://localhost",
"https://localhost/",
"http://2130706433",
"http://2130706433/",
"https://2130706433",
"https://2130706433/",
"http://127.0.0.1/",
"http://127.0.0.1",
"https://127.0.0.1/",
"https://127.0.0.1",
"https://0.0.0.0/",
"https://0.0.0.0",
"http://0.0.0.0/",
"http://0.0.0.0",
"http://0000",
"http://0000/",
"https://0000",
"https://0000/",
]
return any(url.startswith(prefix) for prefix in local_prefixes)


@validate_url
def get_response(
url: str, timeout: int = 10
) -> tuple[None, str] | tuple[Response, None]:
Expand All @@ -99,17 +33,7 @@ def get_response(
requests.exceptions.RequestException: If the HTTP request fails
"""
try:
# Restrict access to local files
if check_local_file_access(url):
raise ValueError("Access to local files is restricted")

# Most basic check if the URL is valid:
if not url.startswith("http://") and not url.startswith("https://"):
raise ValueError("Invalid URL format")

sanitized_url = sanitize_url(url)

response = session.get(sanitized_url, timeout=timeout)
response = session.get(url, timeout=timeout)

# Check if the response contains an HTTP error
if response.status_code >= 400:
Expand Down
2 changes: 2 additions & 0 deletions autogpt/commands/web_selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from autogpt.commands.command import command
from autogpt.config import Config
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
from autogpt.url_utils.validators import validate_url

FILE_DIR = Path(__file__).parent.parent
CFG = Config()
Expand All @@ -31,6 +32,7 @@
"Browse Website",
'"url": "<url>", "question": "<what_you_want_to_find_on_website>"',
)
@validate_url
def browse_website(url: str, question: str) -> tuple[str, WebDriver]:
"""Browse a website and return the answer and links to the user
Expand Down
Empty file added autogpt/url_utils/__init__.py
Empty file.
101 changes: 101 additions & 0 deletions autogpt/url_utils/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import functools
from typing import Any, Callable
from urllib.parse import urljoin, urlparse

from requests.compat import urljoin


def validate_url(func: Callable[..., Any]) -> Any:
"""The method decorator validate_url is used to validate urls for any command that requires
a url as an arugment"""

@functools.wraps(func)
def wrapper(url: str, *args, **kwargs) -> Any:
"""Check if the URL is valid using a basic check, urllib check, and local file check
Args:
url (str): The URL to check
Returns:
the result of the wrapped function
Raises:
ValueError if the url fails any of the validation tests
"""
# Most basic check if the URL is valid:
if not url.startswith("http://") and not url.startswith("https://"):
raise ValueError("Invalid URL format")
if not is_valid_url(url):
raise ValueError("Missing Scheme or Network location")
# Restrict access to local files
if check_local_file_access(url):
raise ValueError("Access to local files is restricted")

return func(sanitize_url(url), *args, **kwargs)

return wrapper


def is_valid_url(url: str) -> bool:
"""Check if the URL is valid
Args:
url (str): The URL to check
Returns:
bool: True if the URL is valid, False otherwise
"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False


def sanitize_url(url: str) -> str:
"""Sanitize the URL
Args:
url (str): The URL to sanitize
Returns:
str: The sanitized URL
"""
return urljoin(url, urlparse(url).path)


def check_local_file_access(url: str) -> bool:
"""Check if the URL is a local file
Args:
url (str): The URL to check
Returns:
bool: True if the URL is a local file, False otherwise
"""
local_prefixes = [
"file:///",
"file://localhost/",
"file://localhost",
"http://localhost",
"http://localhost/",
"https://localhost",
"https://localhost/",
"http://2130706433",
"http://2130706433/",
"https://2130706433",
"https://2130706433/",
"http://127.0.0.1/",
"http://127.0.0.1",
"https://127.0.0.1/",
"https://127.0.0.1",
"https://0.0.0.0/",
"https://0.0.0.0",
"http://0.0.0.0/",
"http://0.0.0.0",
"http://0000",
"http://0000/",
"https://0000",
"https://0000/",
]
return any(url.startswith(prefix) for prefix in local_prefixes)
12 changes: 9 additions & 3 deletions tests/unit/test_browse_scrape_text.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by CodiumAI

import pytest
import requests

from autogpt.commands.web_requests import scrape_text
Expand Down Expand Up @@ -58,17 +59,22 @@ def test_scrape_text_with_valid_url(self, mocker):
url = "http://www.example.com"
assert scrape_text(url) == expected_text

# Tests that the function returns an error message when an invalid or unreachable
# Tests that an error is raised when an invalid url is provided.
def test_invalid_url(self):
url = "invalidurl.com"
pytest.raises(ValueError, scrape_text, url)

# Tests that the function returns an error message when an unreachable
# url is provided.
def test_invalid_url(self, mocker):
def test_unreachable_url(self, mocker):
# Mock the requests.get() method to raise an exception
mocker.patch(
"requests.Session.get", side_effect=requests.exceptions.RequestException
)

# Call the function with an invalid URL and assert that it returns an error
# message
url = "http://www.invalidurl.com"
url = "http://thiswebsitedoesnotexist.net/"
error_message = scrape_text(url)
assert "Error:" in error_message

Expand Down
28 changes: 28 additions & 0 deletions tests/unit/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,34 @@ def test_prompt_user_manual_mode(self):
self.assertEqual(ai_config.ai_role, "an AI designed to browse bake a cake.")
self.assertEqual(ai_config.ai_goals, ["Purchase ingredients", "Bake a cake"])

@requires_api_key("OPENAI_API_KEY")
def test_prompt_user_manual_mode_default(self):
user_inputs = [
"--manual",
"",
"",
"",
"",
]
with patch("builtins.input", side_effect=user_inputs):
ai_config = prompt_user()

default_ai_name = "Entrepreneur-GPT"
default_ai_role = (
"an AI designed to autonomously develop and run businesses with the"
" sole goal of increasing your net worth."
)
default_ai_goals = [
"Increase net worth",
"Grow Twitter Account",
"Develop and manage multiple businesses autonomously",
]

self.assertIsInstance(ai_config, AIConfig)
self.assertEqual(ai_config.ai_name, default_ai_name)
self.assertEqual(ai_config.ai_role, default_ai_role)
self.assertEqual(ai_config.ai_goals, default_ai_goals)


if __name__ == "__main__":
unittest.main()

0 comments on commit c536de9

Please sign in to comment.