Merge branch 'master' into fix/default-config

Significant-Gravitas · Apr 19, 2023 · 8581ee2 · 8581ee2
2 parents 2db4a5d + ecf2ba1
commit 8581ee2
Show file tree

Hide file tree

Showing 31 changed files with 426 additions and 3,650 deletions.
diff --git a/.env.template b/.env.template
@@ -108,15 +108,23 @@ OPENAI_API_KEY=your-openai-api-key
 
 ### OPEN AI
 ## IMAGE_PROVIDER - Image provider (Example: dalle)
+## IMAGE_SIZE - Image size (Example: 256)
+##   DALLE: 256, 512, 1024
 # IMAGE_PROVIDER=dalle
+# IMAGE_SIZE=256
 
 ### HUGGINGFACE
-## STABLE DIFFUSION
-## (Default URL: https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4)
-## Set in image_gen.py)
+## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
 ## HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
+# HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
 # HUGGINGFACE_API_TOKEN=your-huggingface-api-token
 
+### STABLE DIFFUSION WEBUI
+## SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password)
+## SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860)
+# SD_WEBUI_AUTH=
+# SD_WEBUI_URL=http://127.0.0.1:7860
+
 ################################################################################
 ### AUDIO TO TEXT PROVIDER
 ################################################################################

diff --git a/.gitignore b/.gitignore
@@ -10,7 +10,6 @@ auto_gpt_workspace/*
 *.mpeg
 .env
 azure.yaml
-outputs/*
 ai_settings.yaml
 last_run_ai_settings.yaml
 .vscode

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
@@ -3,8 +3,8 @@
 from autogpt.app import execute_command, get_command
 from autogpt.chat import chat_with_ai, create_chat_message
 from autogpt.config import Config
-from autogpt.json_fixes.master_json_fix_method import fix_json_using_multiple_techniques
-from autogpt.json_validation.validate_json import validate_json
+from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
+from autogpt.json_utils.utilities import validate_json
 from autogpt.logs import logger, print_assistant_thoughts
 from autogpt.speech import say_text
 from autogpt.spinner import Spinner

diff --git a/autogpt/app.py b/autogpt/app.py
@@ -3,8 +3,8 @@
 from typing import Dict, List, NoReturn, Union
 
 from autogpt.agent.agent_manager import AgentManager
+from autogpt.commands.analyze_code import analyze_code
 from autogpt.commands.audio_text import read_audio_from_file
-from autogpt.commands.evaluate_code import evaluate_code
 from autogpt.commands.execute_code import (
     execute_python_file,
     execute_shell,
@@ -27,7 +27,7 @@
 from autogpt.commands.web_selenium import browse_website
 from autogpt.commands.write_tests import write_tests
 from autogpt.config import Config
-from autogpt.json_fixes.parsing import fix_and_parse_json
+from autogpt.json_utils.json_fix_llm import fix_and_parse_json
 from autogpt.memory import get_memory
 from autogpt.processing.text import summarize_text
 from autogpt.speech import say_text
@@ -181,8 +181,8 @@ def execute_command(command_name: str, arguments):
         # TODO: Change these to take in a file rather than pasted code, if
         # non-file is given, return instructions "Input should be a python
         # filepath, write your code to file and try again"
-        elif command_name == "evaluate_code":
-            return evaluate_code(arguments["code"])
+        elif command_name == "analyze_code":
+            return analyze_code(arguments["code"])
         elif command_name == "improve_code":
             return improve_code(arguments["suggestions"], arguments["code"])
         elif command_name == "write_tests":

diff --git a/autogpt/commands/evaluate_code.py → autogpt/commands/analyze_code.py b/autogpt/commands/evaluate_code.py → autogpt/commands/analyze_code.py
@@ -4,7 +4,7 @@
 from autogpt.llm_utils import call_ai_function
 
 
-def evaluate_code(code: str) -> list[str]:
+def analyze_code(code: str) -> list[str]:
     """
     A function that takes in a string and returns a response from create chat
       completion api call.

diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
@@ -14,11 +14,12 @@
 CFG = Config()
 
 
-def generate_image(prompt: str) -> str:
+def generate_image(prompt: str, size: int = 256) -> str:
     """Generate an image from a prompt.
 
     Args:
         prompt (str): The prompt to use
+        size (int, optional): The size of the image. Defaults to 256. (Not supported by HuggingFace)
 
     Returns:
         str: The filename of the image
@@ -27,11 +28,14 @@ def generate_image(prompt: str) -> str:
 
     # DALL-E
     if CFG.image_provider == "dalle":
-        return generate_image_with_dalle(prompt, filename)
-    elif CFG.image_provider == "sd":
+        return generate_image_with_dalle(prompt, filename, size)
+    # HuggingFace
+    elif CFG.image_provider == "huggingface":
         return generate_image_with_hf(prompt, filename)
-    else:
-        return "No Image Provider Set"
+    # SD WebUI
+    elif CFG.image_provider == "sdwebui":
+        return generate_image_with_sd_webui(prompt, filename, size)
+    return "No Image Provider Set"
 
 
 def generate_image_with_hf(prompt: str, filename: str) -> str:
@@ -45,13 +49,16 @@ def generate_image_with_hf(prompt: str, filename: str) -> str:
         str: The filename of the image
     """
     API_URL = (
-        "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4"
+        f"https://api-inference.huggingface.co/models/{CFG.huggingface_image_model}"
     )
     if CFG.huggingface_api_token is None:
         raise ValueError(
             "You need to set your Hugging Face API token in the config file."
         )
-    headers = {"Authorization": f"Bearer {CFG.huggingface_api_token}"}
+    headers = {
+        "Authorization": f"Bearer {CFG.huggingface_api_token}",
+        "X-Use-Cache": "false",
+    }
 
     response = requests.post(
         API_URL,
@@ -81,10 +88,18 @@ def generate_image_with_dalle(prompt: str, filename: str) -> str:
     """
     openai.api_key = CFG.openai_api_key
 
+    # Check for supported image sizes
+    if size not in [256, 512, 1024]:
+        closest = min([256, 512, 1024], key=lambda x: abs(x - size))
+        print(
+            f"DALL-E only supports image sizes of 256x256, 512x512, or 1024x1024. Setting to {closest}, was {size}."
+        )
+        size = closest
+
     response = openai.Image.create(
         prompt=prompt,
         n=1,
-        size="256x256",
+        size=f"{size}x{size}",
         response_format="b64_json",
     )
 
@@ -96,3 +111,53 @@ def generate_image_with_dalle(prompt: str, filename: str) -> str:
         png.write(image_data)
 
     return f"Saved to disk:{filename}"
+
+
+def generate_image_with_sd_webui(
+    prompt: str,
+    filename: str,
+    size: int = 512,
+    negative_prompt: str = "",
+    extra: dict = {},
+) -> str:
+    """Generate an image with Stable Diffusion webui.
+    Args:
+        prompt (str): The prompt to use
+        filename (str): The filename to save the image to
+        size (int, optional): The size of the image. Defaults to 256.
+        negative_prompt (str, optional): The negative prompt to use. Defaults to "".
+        extra (dict, optional): Extra parameters to pass to the API. Defaults to {}.
+    Returns:
+        str: The filename of the image
+    """
+    # Create a session and set the basic auth if needed
+    s = requests.Session()
+    if CFG.sd_webui_auth:
+        username, password = CFG.sd_webui_auth.split(":")
+        s.auth = (username, password or "")
+
+    # Generate the images
+    response = requests.post(
+        f"{CFG.sd_webui_url}/sdapi/v1/txt2img",
+        json={
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "sampler_index": "DDIM",
+            "steps": 20,
+            "cfg_scale": 7.0,
+            "width": size,
+            "height": size,
+            "n_iter": 1,
+            **extra,
+        },
+    )
+
+    print(f"Image Generated for prompt:{prompt}")
+
+    # Save the image to disk
+    response = response.json()
+    b64 = b64decode(response["images"][0].split(",", 1)[0])
+    image = Image.open(io.BytesIO(b64))
+    image.save(path_in_workspace(filename))
+
+    return f"Saved to disk:{filename}"
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
@@ -85,10 +85,16 @@ def __init__(self) -> None:
         self.milvus_collection = os.getenv("MILVUS_COLLECTION", "autogpt")
 
         self.image_provider = os.getenv("IMAGE_PROVIDER")
+        self.image_size = int(os.getenv("IMAGE_SIZE", 256))
         self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
+        self.huggingface_image_model = os.getenv(
+            "HUGGINGFACE_IMAGE_MODEL", "CompVis/stable-diffusion-v1-4"
+        )
         self.huggingface_audio_to_text_model = os.getenv(
             "HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
         )
+        self.sd_webui_url = os.getenv("SD_WEBUI_URL", "http://localhost:7860")
+        self.sd_webui_auth = os.getenv("SD_WEBUI_AUTH")
 
         # Selenium browser settings
         self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")

diff --git a/autogpt/json_fixes/auto_fix.py b/autogpt/json_fixes/auto_fix.py
diff --git a/autogpt/json_fixes/bracket_termination.py b/autogpt/json_fixes/bracket_termination.py
diff --git a/autogpt/json_fixes/escaping.py b/autogpt/json_fixes/escaping.py
diff --git a/autogpt/json_fixes/master_json_fix_method.py b/autogpt/json_fixes/master_json_fix_method.py