diff --git a/.dockerignore b/.dockerignore
index 9ac551095c7..fef46549abe 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -2,6 +2,7 @@
 *.template
 *.yaml
 *.yml
+!prompt_settings.yaml
 
 *.md
 *.png
diff --git a/.env.template b/.env.template
index d4d99baa2cf..06745245793 100644
--- a/.env.template
+++ b/.env.template
@@ -1,10 +1,16 @@
+# For further descriptions of these settings see docs/configuration/options.md or go to docs.agpt.co
+
 ################################################################################
 ### AUTO-GPT - GENERAL SETTINGS
 ################################################################################
 
+## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
+OPENAI_API_KEY=your-openai-api-key
+
 ## EXECUTE_LOCAL_COMMANDS - Allow local command execution (Default: False)
-## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True)
 # EXECUTE_LOCAL_COMMANDS=False
+
+## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True)
 # RESTRICT_TO_WORKSPACE=True
 
 ## USER_AGENT - Define the user-agent used by the requests library to browse website (string)
@@ -13,211 +19,186 @@
 ## AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
 # AI_SETTINGS_FILE=ai_settings.yaml
 
+## PLUGINS_CONFIG_FILE - The path to the plugins_config.yaml file (Default plugins_config.yaml)
+# PLUGINS_CONFIG_FILE=plugins_config.yaml
+
 ## PROMPT_SETTINGS_FILE - Specifies which Prompt Settings file to use (defaults to prompt_settings.yaml)
 # PROMPT_SETTINGS_FILE=prompt_settings.yaml
 
+## OPENAI_API_BASE_URL - Custom url for the OpenAI API, useful for connecting to custom backends. No effect if USE_AZURE is true, leave blank to keep the default url 
+# the following is an example:
+# OPENAI_API_BASE_URL=http://localhost:443/v1
+
 ## AUTHORISE COMMAND KEY - Key to authorise commands
 # AUTHORISE_COMMAND_KEY=y
+
 ## EXIT_KEY - Key to exit AUTO-GPT
 # EXIT_KEY=n
 
-## PLAIN_OUTPUT - Enabeling plain output will disable spinner (Default: False)
-## Note: Spinner is used to indicate that Auto-GPT is working on something in the background
+## PLAIN_OUTPUT - Plain output, which disables the spinner (Default: False)
 # PLAIN_OUTPUT=False
 
-## DISABLED_COMMAND_CATEGORIES - The list of categories of commands that are disabled. Each of the below are an option:
-## autogpt.commands.analyze_code
-## autogpt.commands.audio_text
-## autogpt.commands.execute_code
-## autogpt.commands.file_operations
-## autogpt.commands.git_operations
-## autogpt.commands.google_search
-## autogpt.commands.image_gen
-## autogpt.commands.improve_code
-## autogpt.commands.web_selenium
-## autogpt.commands.write_tests
-## autogpt.app
-## autogpt.commands.task_statuses
-## For example, to disable coding related features, uncomment the next line
-# DISABLED_COMMAND_CATEGORIES=autogpt.commands.analyze_code,autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
-
-## DENY_COMMANDS - The list of commands that are not allowed to be executed by Auto-GPT (Default: None)
-# the following are examples:
-# DENY_COMMANDS=cd,nano,vim,vi,emacs,rm,sudo,top,ping,ssh,scp
-
-## ALLOW_COMMANDS - ONLY those commands will be allowed to be executed by Auto-GPT
-# the following are examples:
-# ALLOW_COMMANDS=ls,git,cat,grep,find,echo,ps,curl,wget
-
+## DISABLED_COMMAND_CATEGORIES - The list of categories of commands that are disabled (Default: None)
+# DISABLED_COMMAND_CATEGORIES=
 
 ################################################################################
 ### LLM PROVIDER
 ################################################################################
 
-### OPENAI
-## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
-
-
-## NOTE: https://platform.openai.com/docs/api-reference/completions
-# The temperature setting in language models like GPT controls the balance between predictable and random responses. 
-# Lower temperature makes the responses more focused and deterministic, while higher temperature makes them more 
-# creative and varied. The temperature range typically goes from 0 to 2 in OpenAI's implementation.
-##
 ## TEMPERATURE - Sets temperature in OpenAI (Default: 0)
-##
-###
+# TEMPERATURE=0
+
+## OPENAI_ORGANIZATION - Your OpenAI Organization key (Default: None)
+# OPENAI_ORGANIZATION=
 
 ## USE_AZURE - Use Azure OpenAI or not (Default: False)
-OPENAI_API_KEY=your-openai-api-key
-# TEMPERATURE=0
 # USE_AZURE=False
-# OPENAI_ORGANIZATION=your-openai-organization-key-if-applicable
-
-### AZURE
-# moved to `azure.yaml.template`
 
 ################################################################################
 ### LLM MODELS
 ################################################################################
 
-## SMART_LLM_MODEL - Smart language model (Default: gpt-4)
+## SMART_LLM_MODEL - Smart language model (Default: gpt-3.5-turbo)
+# SMART_LLM_MODEL=gpt-3.5-turbo
+
 ## FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo)
-# SMART_LLM_MODEL=gpt-4
 # FAST_LLM_MODEL=gpt-3.5-turbo
 
-### LLM MODEL SETTINGS
-## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000)
-## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000)
-## When using --gpt3only this needs to be set to 4000.
-# FAST_TOKEN_LIMIT=4000
-# SMART_TOKEN_LIMIT=8000
-
-### EMBEDDINGS
-## EMBEDDING_MODEL       - Model to use for creating embeddings
+## EMBEDDING_MODEL - Model to use for creating embeddings
 # EMBEDDING_MODEL=text-embedding-ada-002
 
+################################################################################
+### SHELL EXECUTION
+################################################################################
+
+## SHELL_COMMAND_CONTROL - Whether to use "allowlist" or "denylist" to determine what shell commands can be executed (Default: denylist)
+# SHELL_COMMAND_CONTROL=denylist
+
+## ONLY if SHELL_COMMAND_CONTROL is set to denylist:
+## SHELL_DENYLIST - List of shell commands that ARE NOT allowed to be executed by Auto-GPT (Default: sudo,su)
+# SHELL_DENYLIST=sudo,su
+
+## ONLY if SHELL_COMMAND_CONTROL is set to allowlist:
+## SHELL_ALLOWLIST - List of shell commands that ARE allowed to be executed by Auto-GPT (Default: None)
+# SHELL_ALLOWLIST=
+
 ################################################################################
 ### MEMORY
 ################################################################################
 
-### MEMORY_BACKEND - Memory backend type
-## json_file - Default
-## redis - Redis (if configured)
-## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt)
+### General
+
+## MEMORY_BACKEND - Memory backend type
 # MEMORY_BACKEND=json_file
-# MEMORY_INDEX=auto-gpt-memory
 
-### REDIS
+## MEMORY_INDEX - Value used in the Memory backend for scoping, naming, or indexing (Default: auto-gpt)
+# MEMORY_INDEX=auto-gpt
+
+### Redis
+
 ## REDIS_HOST - Redis host (Default: localhost, use "redis" for docker-compose)
-## REDIS_PORT - Redis port (Default: 6379)
-## REDIS_PASSWORD - Redis password (Default: "")
-## WIPE_REDIS_ON_START - Wipes data / index on start (Default: True)
 # REDIS_HOST=localhost
+
+## REDIS_PORT - Redis port (Default: 6379)
 # REDIS_PORT=6379
+
+## REDIS_PASSWORD - Redis password (Default: "")
 # REDIS_PASSWORD=
+
+## WIPE_REDIS_ON_START - Wipes data / index on start (Default: True)
 # WIPE_REDIS_ON_START=True
 
 ################################################################################
 ### IMAGE GENERATION PROVIDER
 ################################################################################
 
-### COMMON SETTINGS
-## IMAGE_PROVIDER - Image provider - dalle, huggingface, or sdwebui
-## IMAGE_SIZE - Image size (Example: 256)
-## Image sizes for dalle: 256, 512, 1024
+### Common
+
+## IMAGE_PROVIDER - Image provider (Default: dalle)
 # IMAGE_PROVIDER=dalle
+
+## IMAGE_SIZE - Image size (Default: 256)
 # IMAGE_SIZE=256
 
-### HUGGINGFACE
+### Huggingface (IMAGE_PROVIDER=huggingface)
+
 ## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
-## HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
 # HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
-# HUGGINGFACE_API_TOKEN=your-huggingface-api-token
 
-### STABLE DIFFUSION WEBUI
-## SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password)
-## SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860)
+## HUGGINGFACE_API_TOKEN - HuggingFace API token (Default: None)
+# HUGGINGFACE_API_TOKEN=
+
+### Stable Diffusion (IMAGE_PROVIDER=sdwebui)
+
+## SD_WEBUI_AUTH - Stable Diffusion Web UI username:password pair (Default: None)
 # SD_WEBUI_AUTH=
-# SD_WEBUI_URL=http://127.0.0.1:7860
+
+## SD_WEBUI_URL - Stable Diffusion Web UI API URL (Default: http://localhost:7860)
+# SD_WEBUI_URL=http://localhost:7860
 
 ################################################################################
 ### AUDIO TO TEXT PROVIDER
 ################################################################################
 
-### HUGGINGFACE
-# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
+## AUDIO_TO_TEXT_PROVIDER - Audio-to-text provider (Default: huggingface)
+# AUDIO_TO_TEXT_PROVIDER=huggingface
+
+## HUGGINGFACE_AUDIO_TO_TEXT_MODEL - The model for HuggingFace to use (Default: CompVis/stable-diffusion-v1-4)
+# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=CompVis/stable-diffusion-v1-4
 
 ################################################################################
-### GIT Provider for repository actions
+### GITHUB
 ################################################################################
 
-### GITHUB
-## GITHUB_API_KEY - Github API key / PAT (Example: github_pat_123)
-## GITHUB_USERNAME - Github username
-# GITHUB_API_KEY=github_pat_123
-# GITHUB_USERNAME=your-github-username
+## GITHUB_API_KEY - Github API key / PAT (Default: None)
+# GITHUB_API_KEY=
+
+## GITHUB_USERNAME - Github username (Default: None)
+# GITHUB_USERNAME=
 
 ################################################################################
 ### WEB BROWSING
 ################################################################################
 
-### BROWSER
 ## HEADLESS_BROWSER - Whether to run the browser in headless mode (default: True)
-## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome).
-##   Note: set this to either 'chrome', 'firefox', 'safari' or 'edge' depending on your current browser
 # HEADLESS_BROWSER=True
+
+## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome)
 # USE_WEB_BROWSER=chrome
-## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (in number of tokens, excluding the response. 75 % of FAST_TOKEN_LIMIT is usually wise )
+
+## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (Default: 3000)
 # BROWSE_CHUNK_MAX_LENGTH=3000
-## BROWSE_SPACY_LANGUAGE_MODEL is used to split sentences. Install additional languages via pip, and set the model name here. Example Chinese:  python -m spacy download zh_core_web_sm
+
+## BROWSE_SPACY_LANGUAGE_MODEL - spaCy language model](https://spacy.io/usage/models) to use when creating chunks. (Default: en_core_web_sm)
 # BROWSE_SPACY_LANGUAGE_MODEL=en_core_web_sm
 
-### GOOGLE
-## GOOGLE_API_KEY - Google API key (Example: my-google-api-key)
-## CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id)
-# GOOGLE_API_KEY=your-google-api-key
-# CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id
+## GOOGLE_API_KEY - Google API key (Default: None)
+# GOOGLE_API_KEY=
+
+## GOOGLE_CUSTOM_SEARCH_ENGINE_ID - Google custom search engine ID (Default: None)
+# GOOGLE_CUSTOM_SEARCH_ENGINE_ID=
 
 ################################################################################
-### TTS PROVIDER
+### TEXT TO SPEECH PROVIDER
 ################################################################################
 
-### MAC OS
-## USE_MAC_OS_TTS - Use Mac OS TTS or not (Default: False)
-# USE_MAC_OS_TTS=False
-
-### STREAMELEMENTS
-## USE_BRIAN_TTS - Use Brian TTS or not (Default: False)
-# USE_BRIAN_TTS=False
+## TEXT_TO_SPEECH_PROVIDER - Which Text to Speech provider to use (Default: gtts)
+# TEXT_TO_SPEECH_PROVIDER=gtts
 
-### ELEVENLABS
-## ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key)
-## ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1)
-## ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2)
-# ELEVENLABS_API_KEY=your-elevenlabs-api-key
-# ELEVENLABS_VOICE_1_ID=your-voice-id-1
-# ELEVENLABS_VOICE_2_ID=your-voice-id-2
+### Only if TEXT_TO_SPEECH_PROVIDER=streamelements
+## STREAMELEMENTS_VOICE - Voice to use for StreamElements (Default: Brian)
+# STREAMELEMENTS_VOICE=Brian
 
-################################################################################
-### TWITTER API
-################################################################################
+### Only if TEXT_TO_SPEECH_PROVIDER=elevenlabs
+## ELEVENLABS_API_KEY - Eleven Labs API key (Default: None)
+# ELEVENLABS_API_KEY=
 
-# TW_CONSUMER_KEY=
-# TW_CONSUMER_SECRET=
-# TW_ACCESS_TOKEN=
-# TW_ACCESS_TOKEN_SECRET=
+## ELEVENLABS_VOICE_ID - Eleven Labs voice ID (Example: None)
+# ELEVENLABS_VOICE_ID=
 
 ################################################################################
-### ALLOWLISTED PLUGINS
+### CHAT MESSAGES
 ################################################################################
 
-#ALLOWLISTED_PLUGINS - Sets the listed plugins that are allowed (Example: plugin1,plugin2,plugin3)
-#DENYLISTED_PLUGINS - Sets the listed plugins that are not allowed (Example: plugin1,plugin2,plugin3)
-ALLOWLISTED_PLUGINS=
-DENYLISTED_PLUGINS=
-
-################################################################################
-### CHAT PLUGIN SETTINGS
-################################################################################
-# CHAT_MESSAGES_ENABLED - Enable chat messages (Default: False)
+## CHAT_MESSAGES_ENABLED - Enable chat messages (Default: False)
 # CHAT_MESSAGES_ENABLED=False
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index f159c646d68..efb67868819 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -41,7 +41,7 @@ By following these guidelines, your PRs are more likely to be merged quickly aft
     black .
     isort .
     mypy
-    autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports autogpt tests --in-place
+    autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring autogpt tests --in-place
     ```
 
 <!-- If you haven't added tests, please explain why. If you have, check the appropriate box. If you've ensured your PR is atomic and well-documented, check the corresponding boxes. -->
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 272fca17bdc..e40abf2f625 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -1,31 +1,73 @@
-name: Run Benchmarks
+name: Benchmarks
 
 on:
+  schedule:
+    - cron: '0 8 * * *'
   workflow_dispatch:
 
 jobs:
-  build:
+  Benchmark:
+    name: ${{ matrix.config.task-name }}
     runs-on: ubuntu-latest
-
-    env:
-      python-version: '3.10'
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - python-version: "3.10"
+            task: "tests/challenges"
+            task-name: "Mandatory Tasks"
+          - python-version: "3.10"
+            task: "--beat-challenges -ra tests/challenges"
+            task-name: "Challenging Tasks"
 
     steps:
-    - name: Checkout repository
-      uses: actions/checkout@v3
-
-    - name: Set up Python ${{ env.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ env.python-version }}
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-
-    - name: benchmark
-      env:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      run: |
-         python benchmark/benchmark_entrepreneur_gpt_with_undecisive_user.py
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          ref: master
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.config.python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run pytest with coverage
+        run: |
+          rm -rf tests/Auto-GPT-test-cassettes
+          pytest -n auto --record-mode=all ${{ matrix.config.task }}
+        env:
+          CI: true
+          PROXY: ${{ secrets.PROXY }}
+          AGENT_MODE: ${{ secrets.AGENT_MODE }}
+          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PLAIN_OUTPUT: True
+
+      - name: Upload logs as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-logs-${{ matrix.config.task-name }}
+          path: logs/
+
+      - name: Upload cassettes as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: cassettes-${{ matrix.config.task-name }}
+          path: tests/Auto-GPT-test-cassettes/
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a9656fc161a..3e21d1d701f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,20 +5,20 @@ on:
     branches: [ master, ci-test* ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   pull_request:
-    branches: [ stable, master ]
+    branches: [ stable, master, release-* ]
   pull_request_target:
-    branches: [ master, ci-test* ]
+    branches: [ master, release-*, ci-test* ]
 
 concurrency:
-  group: ${{ format('ci-{0}', github.head_ref && format('pr-{0}', github.event.pull_request.number) || github.sha) }}
-  cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') && github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target') }}
+  group: ${{ format('ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
+  cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') }}
 
 jobs:
   lint:
-    # eliminate duplicate runs on master
-    if: github.event_name == 'push' || github.ref_name != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
+    # eliminate duplicate runs
+    if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
 
     runs-on: ubuntu-latest
     env:
@@ -37,6 +37,16 @@ jobs:
         with:
           python-version: ${{ env.min-python-version }}
 
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -59,12 +69,12 @@ jobs:
 
       - name: Check for unused imports and pass statements
         run: |
-          cmd="autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports autogpt tests"
+          cmd="autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring autogpt tests"
           $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
 
   test:
-    # eliminate duplicate runs on master
-    if: github.event_name == 'push' || github.ref_name != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
+    # eliminate duplicate runs
+    if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
 
     permissions:
       # Gives the action the necessary permissions for publishing new
@@ -81,7 +91,7 @@ jobs:
         python-version: ["3.10"]
 
     steps:
-      - name: Check out repository
+      - name: Checkout repository
         uses: actions/checkout@v3
         with:
           fetch-depth: 0
@@ -89,8 +99,12 @@ jobs:
           repository: ${{ github.event.pull_request.head.repo.full_name }}
           submodules: true
 
-      - id: checkout_cassettes
-        name: Check out cassettes
+      - name: Configure git user Auto-GPT-Bot
+        run: |
+          git config --global user.name "Auto-GPT-Bot"
+          git config --global user.email "github-bot@agpt.co"
+
+      - name: Checkout cassettes
         if: ${{ startsWith(github.event_name, 'pull_request') }}
         run: |
           cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
@@ -102,21 +116,14 @@ jobs:
 
             git checkout $cassette_branch
 
-            if git merge --no-commit --no-ff ${{ github.event.pull_request.base.ref }}; then
-              echo "Using cassettes from mirror branch, synced to upstream branch '${{ github.event.pull_request.base.ref }}'"
-            else
-              echo "Could not merge upstream changes to cassettes. Using cassettes from ${{ github.event.pull_request.base.ref }}."
-              git merge --abort
-              git checkout ${{ github.event.pull_request.base.ref }}
-
-              # Delete branch to prevent conflict when re-creating it
-              git branch -D $cassette_branch
-            fi
-            echo "cassette_branch=$(git branch --show-current)" >> $GITHUB_OUTPUT
+            # Pick non-conflicting cassette updates from the base branch
+            git merge --no-commit --strategy-option=ours origin/${{ github.event.pull_request.base.ref }}
+            echo "Using cassettes from mirror branch '$cassette_branch'," \
+              "synced to upstream branch '${{ github.event.pull_request.base.ref }}'."
           else
-            echo "Branch '$cassette_branch' does not exist in cassette submodule."\
-              "Using cassettes from ${{ github.event.pull_request.base.ref }}."
-            echo "cassette_branch=${{ github.event.pull_request.base.ref }}" >> $GITHUB_OUTPUT
+            git checkout -b $cassette_branch
+            echo "Branch '$cassette_branch' does not exist in cassette submodule." \
+              "Using cassettes from '${{ github.event.pull_request.base.ref }}'."
           fi
 
       - name: Set up Python ${{ matrix.python-version }}
@@ -124,30 +131,41 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install dependencies
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Install Python dependencies
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
 
-      - name: Run pytest tests with coverage
+      - name: Run pytest with coverage
         run: |
-          pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term
-          python tests/integration/challenges/utils/build_current_score.py
+          pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+            tests/unit tests/integration tests/challenges
+          python tests/challenges/utils/build_current_score.py
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
-          AGENT_MODE: ${{ vars.AGENT_MODE }}
-          AGENT_TYPE: ${{ vars.AGENT_TYPE }}
+          AGENT_MODE: ${{ secrets.AGENT_MODE }}
+          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PLAIN_OUTPUT: True
 
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v3
 
       - id: setup_git_auth
         name: Set up git token authentication
+        # Cassettes may be pushed even when tests fail
+        if: success() || failure()
         run: |
-          git config --global user.name "Auto-GPT-Bot"
-          git config --global user.email "github-bot@agpt.co"
-
           config_key="http.${{ github.server_url }}/.extraheader"
           base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64 -w0)
 
@@ -163,63 +181,44 @@ jobs:
       - name: Push updated challenge scores
         if: github.event_name == 'push'
         run: |
-          score_file="tests/integration/challenges/current_score.json"
+          score_file="tests/challenges/current_score.json"
 
           if ! git diff --quiet $score_file; then
             git add $score_file
             git commit -m "Update challenge scores"
-            git push origin HEAD:${{ github.ref }}
+            git push origin HEAD:${{ github.ref_name }}
           else
             echo "The challenge scores didn't change."
           fi
 
       - id: push_cassettes
         name: Push updated cassettes
+        # For pull requests, push updated cassettes even when tests fail
+        if: github.event_name == 'push' || success() || failure()
         run: |
           if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
             is_pull_request=true
             cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
-            cassette_source_branch="${{ steps.checkout_cassettes.outputs.cassette_branch }}"
-            base_branch="${{ github.event.pull_request.base.ref }}"
           else
-            current_branch=$(echo ${{ github.ref }} | sed -e "s/refs\/heads\///g")
-            cassette_branch=$current_branch
+            cassette_branch="${{ github.ref_name }}"
           fi
 
           cd tests/Auto-GPT-test-cassettes
-          git fetch origin $cassette_source_branch:$cassette_source_branch
-
           # Commit & push changes to cassettes if any
-          if ! git diff --quiet $cassette_source_branch --; then
-            if [ "$cassette_branch" != "$cassette_source_branch" ]; then
-              git checkout -b $cassette_branch
-            fi
+          if ! git diff --quiet; then
             git add .
             git commit -m "Auto-update cassettes"
-
-            if [ $is_pull_request ]; then
-              git push --force origin HEAD:$cassette_branch
-            else
-              git push origin HEAD:$cassette_branch
-            fi
-
-            cd ../..
-            if [ $is_pull_request ]; then
-              git fetch origin $base_branch
-              cassette_diff=$(git diff origin/$base_branch)
-            else
+            git push origin HEAD:$cassette_branch
+            if [ ! $is_pull_request ]; then
+              cd ../..
               git add tests/Auto-GPT-test-cassettes
               git commit -m "Update cassette submodule"
-              git push origin HEAD:$current_branch
+              git push origin HEAD:$cassette_branch
             fi
-          else
-            echo "No cassette changes to commit"
-          fi
-
-          if [ -n "$cassette_diff" ]; then
             echo "updated=true" >> $GITHUB_OUTPUT
           else
             echo "updated=false" >> $GITHUB_OUTPUT
+            echo "No cassette changes to commit"
           fi
 
       - name: Post Set up git token auth
@@ -228,7 +227,7 @@ jobs:
           git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
           git submodule foreach git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
 
-      - name: Apply or remove behaviour change label and comment on PR
+      - name: Apply "behaviour change" label and comment on PR
         if: ${{ startsWith(github.event_name, 'pull_request') }}
         run: |
           PR_NUMBER=${{ github.event.pull_request.number }}
@@ -245,10 +244,11 @@ jobs:
 
             echo $TOKEN | gh auth login --with-token
             gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
-          else
-            echo "Removing label..."
-            curl -X DELETE \
-            -H "Authorization: Bearer $TOKEN" \
-            -H "Accept: application/vnd.github.v3+json" \
-            https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/behaviour%20change
           fi
+
+      - name: Upload logs as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-logs
+          path: logs/
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
index a61b707d8e7..3da88891ed7 100644
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -5,9 +5,9 @@ on:
     branches: [ master ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   pull_request:
-    branches: [ master, stable ]
+    branches: [ master, release-*, stable ]
 
 concurrency:
   group: ${{ format('docker-ci-{0}', github.head_ref && format('pr-{0}', github.event.pull_request.number) || github.sha) }}
@@ -102,21 +102,25 @@ jobs:
       - id: test
         name: Run tests
         env:
+          PLAIN_OUTPUT: True
           CI: true
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         run: |
           set +e
           test_output=$(
             docker run --env CI --env OPENAI_API_KEY --entrypoint python ${{ env.IMAGE_NAME }} -m \
-            pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term 2>&1
+            pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing \
+              tests/unit tests/integration 2>&1
           )
           test_failure=$?
-  
+
           echo "$test_output"
-  
+
           cat << $EOF >> $GITHUB_STEP_SUMMARY
           # Tests $([ $test_failure = 0 ] && echo '✅' || echo '❌')
           \`\`\`
           $test_output
           \`\`\`
           $EOF
+
+          exit $test_failure
diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml
index 0bab56385b3..ebeb7305ed3 100644
--- a/.github/workflows/pr-label.yml
+++ b/.github/workflows/pr-label.yml
@@ -3,10 +3,10 @@ name: "Pull Request auto-label"
 on:
   # So that PRs touching the same files as the push are updated
   push:
-    branches: [ master ]
+    branches: [ master, release-* ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   # So that the `dirtyLabel` is removed if conflicts are resolve
   # We recommend `pull_request_target` so that github secrets are available.
   # In `pull_request` we wouldn't be able to change labels of fork PRs
@@ -48,11 +48,10 @@ jobs:
           s_label: 'size/s'
           s_max_size: 10
           m_label: 'size/m'
-          m_max_size: 50
+          m_max_size: 100
           l_label: 'size/l'
-          l_max_size: 200
+          l_max_size: 500
           xl_label: 'size/xl'
           message_if_xl: >
-            This PR exceeds the recommended size of 200 lines.
+            This PR exceeds the recommended size of 500 lines.
             Please make sure you are NOT addressing multiple issues with one PR.
-            Note this PR might be rejected due to its size  
diff --git a/.gitignore b/.gitignore
index 971c3368919..29a0285a86b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,7 @@
 ## Original ignores
 autogpt/keys.py
-autogpt/*json
-autogpt/node_modules/
-autogpt/__pycache__/keys.cpython-310.pyc
-autogpt/auto_gpt_workspace
-package-lock.json
-*.pyc
-auto_gpt_workspace/*
+autogpt/*.json
+**/auto_gpt_workspace/*
 *.mpeg
 .env
 azure.yaml
@@ -37,6 +32,7 @@ build/
 develop-eggs/
 dist/
 plugins/
+plugins_config.yaml
 downloads/
 eggs/
 .eggs/
@@ -163,4 +159,4 @@ vicuna-*
 openai/
 
 # news
-CURRENT_BULLETIN.md
\ No newline at end of file
+CURRENT_BULLETIN.md
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 53928603176..0aaad2578c7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,7 +31,7 @@ repos:
     hooks:
       - id: autoflake
         name: autoflake
-        entry: autoflake --in-place --remove-all-unused-imports --recursive --ignore-init-module-imports autogpt tests
+        entry: autoflake --in-place --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring autogpt tests
         language: python
         types: [ python ]
       - id: pytest-check
diff --git a/BULLETIN.md b/BULLETIN.md
index 4c858b733c1..ba1de5a1107 100644
--- a/BULLETIN.md
+++ b/BULLETIN.md
@@ -3,45 +3,25 @@ Check out *https://agpt.co*, the official news & updates site for Auto-GPT!
 The documentation also has a place here, at *https://docs.agpt.co*
 
 # For contributors 👷🏼
-Since releasing v0.3.0, we are working on re-architecting the Auto-GPT core to make
-it more extensible and to make room for structural performance-oriented R&D.
-In the meantime, we have less time to process incoming pull requests and issues,
-so we focus on high-value contributions:
- * significant bugfixes
- * *major* improvements to existing functionality and/or docs (so no single-typo fixes)
- * contributions that help us with re-architecture and other roadmapped items
-We have to be somewhat selective in order to keep making progress, but this does not
-mean you can't contribute. Check out the contribution guide on our wiki:
-https://github.com/Significant-Gravitas/Auto-GPT/wiki/Contributing
-
-# 🚀 v0.4.0 Release 🚀
-Two weeks and 76 pull requests have passed since v0.3.1, and we are happy to announce
-the release of v0.4.0!
-
-Highlights and notable changes since v0.3.0:
+Since releasing v0.3.0, whave been working on re-architecting the Auto-GPT core to make it more extensible and make room for structural performance-oriented R&D.
 
-## ⚠️ Command `send_tweet` is REMOVED
-Twitter functionality (and more) is now covered by plugins.
-
-## ⚠️ Memory backend deprecation 💾
-The Milvus, Pinecone and Weaviate memory backends were rendered incompatible
-by work on the memory system, and have been removed in `master`. The Redis
-memory store was also temporarily removed; we will merge a new implementation ASAP.
-Whether built-in support for the others will be added back in the future is subject to
-discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280
+Check out the contribution guide on our wiki:
+https://github.com/Significant-Gravitas/Auto-GPT/wiki/Contributing
 
-## Document support in `read_file` 📄
-Auto-GPT can now read text from document files, with support added for PDF, DOCX, CSV,
-HTML, TeX and more!
+# 🚀 v0.4.1 Release 🚀
+Two weeks and 50+ pull requests have passed since v0.4.0, and we are happy to announce the release of v0.4.1!
 
-## Managing Auto-GPT's access to commands ❌🔧
-You can now disable set of built-in commands through the *DISABLED_COMMAND_CATEGORIES*
-variable in .env. Specific shell commands can also be disabled using *DENY_COMMANDS*,
-or selectively enabled using *ALLOW_COMMANDS*.
+Highlights and notable changes since v0.4.0:
+- The .env.template is more readable and better explains the purpose of each environment variable.
+- More dependable search
+    - The CUSTOM_SEARCH_ENGINE_ID variable has been replaced to GOOGLE_CUSTOM_SEARCH_ENGINE_ID, make sure you update it.
+- Better read_file
+- More reliable python code execution
+- Lots of JSON error fixes
+- Directory-based plugins
 
 ## Further fixes and changes 🛠️
-Other highlights include improvements to self-feedback mode and continuous mode,
-documentation, docker and devcontainer setups, and much more. Most of the improvements
-that were made are not yet visible to users, but will pay off in the long term.
-Take a look at the Release Notes on Github for the full changelog!
+Under the hood, we've done a bunch of work improving architectures and streamlining code. Most of that won't be user-visible
+
+## Take a look at the Release Notes on Github for the full changelog!
 https://github.com/Significant-Gravitas/Auto-GPT/releases
diff --git a/Dockerfile b/Dockerfile
index 3bcfddeadf5..a31c78abffa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,11 +6,13 @@ FROM python:3.10-slim AS autogpt-base
 
 # Install browsers
 RUN apt-get update && apt-get install -y \
-    chromium-driver firefox-esr \
-    ca-certificates
+    chromium-driver firefox-esr ca-certificates \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 
 # Install utilities
-RUN apt-get install -y curl jq wget git
+RUN apt-get update && apt-get install -y \
+    curl jq wget git \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 
 # Set environment variables
 ENV PIP_NO_CACHE_DIR=yes \
@@ -38,6 +40,7 @@ WORKDIR /app
 ONBUILD COPY autogpt/ ./autogpt
 ONBUILD COPY scripts/ ./scripts
 ONBUILD COPY plugins/ ./plugins
+ONBUILD COPY prompt_settings.yaml ./prompt_settings.yaml
 ONBUILD RUN mkdir ./data
 
 FROM autogpt-${BUILD_TYPE} AS auto-gpt
diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 3dc4d390092..1f31be165cc 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -1,17 +1,17 @@
+import json
 import signal
 import sys
 from datetime import datetime
 
 from colorama import Fore, Style
 
-from autogpt.app import execute_command, get_command
 from autogpt.commands.command import CommandRegistry
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
-from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
-from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json
+from autogpt.json_utils.utilities import extract_json_from_response, validate_json
 from autogpt.llm.base import ChatSequence
 from autogpt.llm.chat import chat_with_ai, create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.log_cycle.log_cycle import (
     FULL_MESSAGE_HISTORY_FILE_NAME,
@@ -44,7 +44,7 @@ class Agent:
 
         triggering_prompt: The last sentence the AI will see before answering.
             For Auto-GPT, this prompt is:
-            Determine which next command to use, and respond using the format specified
+            Determine exactly one command to use, and respond using the format specified
               above:
             The triggering prompt is not part of the system prompt because between the
               system prompt and the triggering
@@ -64,28 +64,34 @@ def __init__(
         memory: VectorMemory,
         next_action_count: int,
         command_registry: CommandRegistry,
-        config: AIConfig,
+        ai_config: AIConfig,
         system_prompt: str,
         triggering_prompt: str,
         workspace_directory: str,
+        config: Config,
     ):
-        cfg = Config()
         self.ai_name = ai_name
         self.memory = memory
         self.history = MessageHistory(self)
         self.next_action_count = next_action_count
         self.command_registry = command_registry
         self.config = config
+        self.ai_config = ai_config
         self.system_prompt = system_prompt
         self.triggering_prompt = triggering_prompt
-        self.workspace = Workspace(workspace_directory, cfg.restrict_to_workspace)
+        self.workspace = Workspace(workspace_directory, config.restrict_to_workspace)
         self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.cycle_count = 0
         self.log_cycle_handler = LogCycleHandler()
+        self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(
+            config.fast_llm_model
+        ).max_tokens
 
     def start_interaction_loop(self):
+        # Avoid circular imports
+        from autogpt.app import execute_command, get_command
+
         # Interaction Loop
-        cfg = Config()
         self.cycle_count = 0
         command_name = None
         arguments = None
@@ -110,48 +116,55 @@ def signal_handler(signum, frame):
             self.cycle_count += 1
             self.log_cycle_handler.log_count_within_cycle = 0
             self.log_cycle_handler.log_cycle(
-                self.config.ai_name,
+                self.ai_config.ai_name,
                 self.created_at,
                 self.cycle_count,
                 [m.raw() for m in self.history],
                 FULL_MESSAGE_HISTORY_FILE_NAME,
             )
             if (
-                cfg.continuous_mode
-                and cfg.continuous_limit > 0
-                and self.cycle_count > cfg.continuous_limit
+                self.config.continuous_mode
+                and self.config.continuous_limit > 0
+                and self.cycle_count > self.config.continuous_limit
             ):
                 logger.typewriter_log(
-                    "Continuous Limit Reached: ", Fore.YELLOW, f"{cfg.continuous_limit}"
+                    "Continuous Limit Reached: ",
+                    Fore.YELLOW,
+                    f"{self.config.continuous_limit}",
                 )
                 break
             # Send message to AI, get response
-            with Spinner("Thinking... ", plain_output=cfg.plain_output):
+            with Spinner("Thinking... ", plain_output=self.config.plain_output):
                 assistant_reply = chat_with_ai(
-                    cfg,
+                    self.config,
                     self,
                     self.system_prompt,
                     self.triggering_prompt,
-                    cfg.fast_token_limit,
-                    cfg.fast_llm_model,
+                    self.fast_token_limit,
+                    self.config.fast_llm_model,
                 )
 
-            assistant_reply_json = fix_json_using_multiple_techniques(assistant_reply)
-            for plugin in cfg.plugins:
+            try:
+                assistant_reply_json = extract_json_from_response(assistant_reply)
+                validate_json(assistant_reply_json)
+            except json.JSONDecodeError as e:
+                logger.error(f"Exception while validating assistant reply JSON: {e}")
+                assistant_reply_json = {}
+
+            for plugin in self.config.plugins:
                 if not plugin.can_handle_post_planning():
                     continue
                 assistant_reply_json = plugin.post_planning(assistant_reply_json)
 
             # Print Assistant thoughts
             if assistant_reply_json != {}:
-                validate_json(assistant_reply_json, LLM_DEFAULT_RESPONSE_FORMAT)
                 # Get command name and arguments
                 try:
                     print_assistant_thoughts(
-                        self.ai_name, assistant_reply_json, cfg.speak_mode
+                        self.ai_name, assistant_reply_json, self.config.speak_mode
                     )
                     command_name, arguments = get_command(assistant_reply_json)
-                    if cfg.speak_mode:
+                    if self.config.speak_mode:
                         say_text(f"I want to execute {command_name}")
 
                     arguments = self._resolve_pathlike_command_args(arguments)
@@ -159,13 +172,15 @@ def signal_handler(signum, frame):
                 except Exception as e:
                     logger.error("Error: \n", str(e))
             self.log_cycle_handler.log_cycle(
-                self.config.ai_name,
+                self.ai_config.ai_name,
                 self.created_at,
                 self.cycle_count,
                 assistant_reply_json,
                 NEXT_ACTION_FILE_NAME,
             )
 
+            # First log new-line so user can differentiate sections better in console
+            logger.typewriter_log("\n")
             logger.typewriter_log(
                 "NEXT ACTION: ",
                 Fore.CYAN,
@@ -173,7 +188,7 @@ def signal_handler(signum, frame):
                 f"ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}",
             )
 
-            if not cfg.continuous_mode and self.next_action_count == 0:
+            if not self.config.continuous_mode and self.next_action_count == 0:
                 # ### GET USER AUTHORIZATION TO EXECUTE COMMAND ###
                 # Get key press: Prompt the user to press enter to continue or escape
                 # to exit
@@ -184,13 +199,13 @@ def signal_handler(signum, frame):
                     f"{self.ai_name}..."
                 )
                 while True:
-                    if cfg.chat_messages_enabled:
+                    if self.config.chat_messages_enabled:
                         console_input = clean_input("Waiting for your response...")
                     else:
                         console_input = clean_input(
                             Fore.MAGENTA + "Input:" + Style.RESET_ALL
                         )
-                    if console_input.lower().strip() == cfg.authorise_key:
+                    if console_input.lower().strip() == self.config.authorise_key:
                         user_input = "GENERATE NEXT COMMAND JSON"
                         break
                     elif console_input.lower().strip() == "s":
@@ -201,7 +216,7 @@ def signal_handler(signum, frame):
                         )
                         thoughts = assistant_reply_json.get("thoughts", {})
                         self_feedback_resp = self.get_self_feedback(
-                            thoughts, cfg.fast_llm_model
+                            thoughts, self.config.fast_llm_model
                         )
                         logger.typewriter_log(
                             f"SELF FEEDBACK: {self_feedback_resp}",
@@ -214,7 +229,9 @@ def signal_handler(signum, frame):
                     elif console_input.lower().strip() == "":
                         logger.warn("Invalid input format.")
                         continue
-                    elif console_input.lower().startswith(f"{cfg.authorise_key} -"):
+                    elif console_input.lower().startswith(
+                        f"{self.config.authorise_key} -"
+                    ):
                         try:
                             self.next_action_count = abs(
                                 int(console_input.split(" ")[1])
@@ -227,14 +244,14 @@ def signal_handler(signum, frame):
                             )
                             continue
                         break
-                    elif console_input.lower() == cfg.exit_key:
+                    elif console_input.lower() == self.config.exit_key:
                         user_input = "EXIT"
                         break
                     else:
                         user_input = console_input
                         command_name = "human_feedback"
                         self.log_cycle_handler.log_cycle(
-                            self.config.ai_name,
+                            self.ai_config.ai_name,
                             self.created_at,
                             self.cycle_count,
                             user_input,
@@ -252,6 +269,8 @@ def signal_handler(signum, frame):
                     logger.info("Exiting...")
                     break
             else:
+                # First log new-line so user can differentiate sections better in console
+                logger.typewriter_log("\n")
                 # Print authorized commands left value
                 logger.typewriter_log(
                     f"{Fore.CYAN}AUTHORISED COMMANDS LEFT: {Style.RESET_ALL}{self.next_action_count}"
@@ -265,32 +284,30 @@ def signal_handler(signum, frame):
             elif command_name == "self_feedback":
                 result = f"Self feedback: {user_input}"
             else:
-                for plugin in cfg.plugins:
+                for plugin in self.config.plugins:
                     if not plugin.can_handle_pre_command():
                         continue
                     command_name, arguments = plugin.pre_command(
                         command_name, arguments
                     )
                 command_result = execute_command(
-                    self.command_registry,
-                    command_name,
-                    arguments,
-                    self.config.prompt_generator,
-                    config=cfg,
+                    command_name=command_name,
+                    arguments=arguments,
+                    agent=self,
                 )
                 result = f"Command {command_name} returned: " f"{command_result}"
 
                 result_tlength = count_string_tokens(
-                    str(command_result), cfg.fast_llm_model
+                    str(command_result), self.config.fast_llm_model
                 )
                 memory_tlength = count_string_tokens(
-                    str(self.history.summary_message()), cfg.fast_llm_model
+                    str(self.history.summary_message()), self.config.fast_llm_model
                 )
-                if result_tlength + memory_tlength + 600 > cfg.fast_token_limit:
+                if result_tlength + memory_tlength + 600 > self.fast_token_limit:
                     result = f"Failure: command {command_name} returned too much output. \
                         Do not execute this command again with the same arguments."
 
-                for plugin in cfg.plugins:
+                for plugin in self.config.plugins:
                     if not plugin.can_handle_post_command():
                         continue
                     result = plugin.post_command(command_name, result)
@@ -331,7 +348,7 @@ def get_self_feedback(self, thoughts: dict, llm_model: str) -> str:
         Returns:
             str: A feedback response generated using the provided thoughts dictionary.
         """
-        ai_role = self.config.ai_role
+        ai_role = self.ai_config.ai_role
 
         feedback_prompt = f"Below is a message from me, an AI Agent, assuming the role of {ai_role}. whilst keeping knowledge of my slight limitations as an AI Agent Please evaluate my thought process, reasoning, and plan, and provide a concise paragraph outlining potential improvements. Consider adding or removing ideas that do not align with my role and explaining why, prioritizing thoughts based on their significance, or simply refining my overall thought process."
         reasoning = thoughts.get("reasoning", "")
@@ -343,7 +360,7 @@ def get_self_feedback(self, thoughts: dict, llm_model: str) -> str:
         prompt.add("user", feedback_prompt + feedback_thoughts)
 
         self.log_cycle_handler.log_cycle(
-            self.config.ai_name,
+            self.ai_config.ai_name,
             self.created_at,
             self.cycle_count,
             prompt.raw(),
@@ -353,7 +370,7 @@ def get_self_feedback(self, thoughts: dict, llm_model: str) -> str:
         feedback = create_chat_completion(prompt)
 
         self.log_cycle_handler.log_cycle(
-            self.config.ai_name,
+            self.ai_config.ai_name,
             self.created_at,
             self.cycle_count,
             feedback,
diff --git a/autogpt/app.py b/autogpt/app.py
index 0804b482715..780b74a019a 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -2,12 +2,11 @@
 import json
 from typing import Dict, List, Union
 
+from autogpt.agent.agent import Agent
 from autogpt.agent.agent_manager import AgentManager
-from autogpt.commands.command import CommandRegistry, command
+from autogpt.commands.command import command
 from autogpt.commands.web_requests import scrape_links, scrape_text
-from autogpt.config import Config
 from autogpt.processing.text import summarize_text
-from autogpt.prompts.generator import PromptGenerator
 from autogpt.speech import say_text
 from autogpt.url_utils.validators import validate_url
 
@@ -85,27 +84,26 @@ def map_command_synonyms(command_name: str):
 
 
 def execute_command(
-    command_registry: CommandRegistry,
     command_name: str,
-    arguments,
-    prompt: PromptGenerator,
-    config: Config,
+    arguments: dict[str, str],
+    agent: Agent,
 ):
     """Execute the command and return the result
 
     Args:
         command_name (str): The name of the command to execute
         arguments (dict): The arguments for the command
+        agent (Agent): The agent that is executing the command
 
     Returns:
         str: The result of the command
     """
     try:
-        cmd = command_registry.commands.get(command_name)
+        cmd = agent.command_registry.commands.get(command_name)
 
         # If the command is found, call it with the provided arguments
         if cmd:
-            return cmd(**arguments, config=config)
+            return cmd(**arguments, agent=agent)
 
         # TODO: Remove commands below after they are moved to the command registry.
         command_name = map_command_synonyms(command_name.lower())
@@ -113,7 +111,7 @@ def execute_command(
         # TODO: Change these to take in a file rather than pasted code, if
         # non-file is given, return instructions "Input should be a python
         # filepath, write your code to file and try again
-        for command in prompt.commands:
+        for command in agent.ai_config.prompt_generator.commands:
             if (
                 command_name == command["label"].lower()
                 or command_name == command["name"].lower()
@@ -132,7 +130,7 @@ def execute_command(
     "get_text_summary", "Get text summary", '"url": "<url>", "question": "<question>"'
 )
 @validate_url
-def get_text_summary(url: str, question: str, config: Config) -> str:
+def get_text_summary(url: str, question: str, agent: Agent) -> str:
     """Get the text summary of a webpage
 
     Args:
@@ -142,7 +140,7 @@ def get_text_summary(url: str, question: str, config: Config) -> str:
     Returns:
         str: The summary of the text
     """
-    text = scrape_text(url)
+    text = scrape_text(url, agent)
     summary, _ = summarize_text(text, question=question)
 
     return f""" "Result" : {summary}"""
@@ -150,7 +148,7 @@ def get_text_summary(url: str, question: str, config: Config) -> str:
 
 @command("get_hyperlinks", "Get hyperlinks", '"url": "<url>"')
 @validate_url
-def get_hyperlinks(url: str, config: Config) -> Union[str, List[str]]:
+def get_hyperlinks(url: str, agent: Agent) -> Union[str, List[str]]:
     """Get all hyperlinks on a webpage
 
     Args:
@@ -159,7 +157,7 @@ def get_hyperlinks(url: str, config: Config) -> Union[str, List[str]]:
     Returns:
         str or list: The hyperlinks on the page
     """
-    return scrape_links(url, config)
+    return scrape_links(url, agent)
 
 
 @command(
@@ -167,7 +165,7 @@ def get_hyperlinks(url: str, config: Config) -> Union[str, List[str]]:
     "Start GPT Agent",
     '"name": "<name>", "task": "<short_task_desc>", "prompt": "<prompt>"',
 )
-def start_agent(name: str, task: str, prompt: str, config: Config, model=None) -> str:
+def start_agent(name: str, task: str, prompt: str, agent: Agent, model=None) -> str:
     """Start an agent with a given name, task, and prompt
 
     Args:
@@ -188,11 +186,11 @@ def start_agent(name: str, task: str, prompt: str, config: Config, model=None) -
     agent_intro = f"{voice_name} here, Reporting for duty!"
 
     # Create agent
-    if config.speak_mode:
+    if agent.config.speak_mode:
         say_text(agent_intro, 1)
     key, ack = agent_manager.create_agent(task, first_message, model)
 
-    if config.speak_mode:
+    if agent.config.speak_mode:
         say_text(f"Hello {voice_name}. Your task is as follows. {task}.")
 
     # Assign task (prompt), get response
@@ -202,7 +200,7 @@ def start_agent(name: str, task: str, prompt: str, config: Config, model=None) -
 
 
 @command("message_agent", "Message GPT Agent", '"key": "<key>", "message": "<message>"')
-def message_agent(key: str, message: str, config: Config) -> str:
+def message_agent(key: str, message: str, agent: Agent) -> str:
     """Message an agent with a given key and message"""
     # Check if the key is a valid integer
     if is_valid_int(key):
@@ -211,13 +209,13 @@ def message_agent(key: str, message: str, config: Config) -> str:
         return "Invalid key, must be an integer."
 
     # Speak response
-    if config.speak_mode:
+    if agent.config.speak_mode:
         say_text(agent_response, 1)
     return agent_response
 
 
 @command("list_agents", "List GPT Agents", "() -> str")
-def list_agents(config: Config) -> str:
+def list_agents(agent: Agent) -> str:
     """List all agents
 
     Returns:
@@ -229,7 +227,7 @@ def list_agents(config: Config) -> str:
 
 
 @command("delete_agent", "Delete GPT Agent", '"key": "<key>"')
-def delete_agent(key: str, config: Config) -> str:
+def delete_agent(key: str, agent: Agent) -> str:
     """Delete an agent with a given key
 
     Args:
diff --git a/autogpt/commands/analyze_code.py b/autogpt/commands/analyze_code.py
index 4de68334d89..ca7fcb015f4 100644
--- a/autogpt/commands/analyze_code.py
+++ b/autogpt/commands/analyze_code.py
@@ -1,21 +1,17 @@
 """Code evaluation module."""
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.llm.utils import call_ai_function
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "analyze_code",
     "Analyze Code",
     '"code": "<full_code_string>"',
 )
-def analyze_code(code: str, config: Config) -> list[str]:
+def analyze_code(code: str, agent: Agent) -> list[str]:
     """
     A function that takes in a string and returns a response from create chat
       completion api call.
@@ -33,4 +29,6 @@ def analyze_code(code: str, config: Config) -> list[str]:
         "Analyzes the given code and returns a list of suggestions for improvements."
     )
 
-    return call_ai_function(function_string, args, description_string, config=config)
+    return call_ai_function(
+        function_string, args, description_string, config=agent.config
+    )
diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py
index ba4fb3474d6..2991fff32c3 100644
--- a/autogpt/commands/audio_text.py
+++ b/autogpt/commands/audio_text.py
@@ -1,14 +1,10 @@
 """Commands for converting audio to text."""
 import json
-from typing import TYPE_CHECKING
 
 import requests
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
-
-if TYPE_CHECKING:
-    from autogpt.config import Config
 
 
 @command(
@@ -19,7 +15,7 @@
     and config.huggingface_api_token,
     "Configure huggingface_audio_to_text_model and Hugging Face api token.",
 )
-def read_audio_from_file(filename: str, config: Config) -> str:
+def read_audio_from_file(filename: str, agent: Agent) -> str:
     """
     Convert audio to text.
 
@@ -31,10 +27,10 @@ def read_audio_from_file(filename: str, config: Config) -> str:
     """
     with open(filename, "rb") as audio_file:
         audio = audio_file.read()
-    return read_audio(audio, config)
+    return read_audio(audio, agent.config)
 
 
-def read_audio(audio: bytes, config: Config) -> str:
+def read_audio(audio: bytes, agent: Agent) -> str:
     """
     Convert audio to text.
 
@@ -44,9 +40,20 @@ def read_audio(audio: bytes, config: Config) -> str:
     Returns:
         str: The text from the audio
     """
-    model = config.huggingface_audio_to_text_model
+    if agent.config.audio_to_text_provider == "huggingface":
+        text = read_huggingface_audio(audio, agent.config)
+        if text:
+            return f"The audio says: {text}"
+        else:
+            return f"Error, couldn't convert audio to text"
+
+    return "Error: No audio to text provider given"
+
+
+def read_huggingface_audio(audio: bytes, agent: Agent) -> str:
+    model = agent.config.huggingface_audio_to_text_model
     api_url = f"https://api-inference.huggingface.co/models/{model}"
-    api_token = config.huggingface_api_token
+    api_token = agent.config.huggingface_api_token
     headers = {"Authorization": f"Bearer {api_token}"}
 
     if api_token is None:
@@ -60,5 +67,5 @@ def read_audio(audio: bytes, config: Config) -> str:
         data=audio,
     )
 
-    text = json.loads(response.content.decode("utf-8"))["text"]
-    return f"The audio says: {text}"
+    response_json = json.loads(response.content.decode("utf-8"))
+    return response_json.get("text")
diff --git a/autogpt/commands/command.py b/autogpt/commands/command.py
index 742cc8df649..ed93589fe28 100644
--- a/autogpt/commands/command.py
+++ b/autogpt/commands/command.py
@@ -1,6 +1,7 @@
 import functools
 import importlib
 import inspect
+from inspect import Parameter
 from typing import Any, Callable, Optional
 
 from autogpt.config import Config
@@ -175,3 +176,32 @@ def wrapper(*args, **kwargs) -> Any:
         return wrapper
 
     return decorator
+
+
+def ignore_unexpected_kwargs(func: Callable[..., Any]) -> Callable[..., Any]:
+    def filter_kwargs(kwargs: dict) -> dict:
+        sig = inspect.signature(func)
+        # Parameter.VAR_KEYWORD - a dict of keyword arguments that aren't bound to any other
+        if any(map(lambda p: p.kind == Parameter.VAR_KEYWORD, sig.parameters.values())):
+            # if **kwargs exist, return directly
+            return kwargs
+
+        _params = list(
+            filter(
+                lambda p: p.kind
+                in {Parameter.KEYWORD_ONLY, Parameter.POSITIONAL_OR_KEYWORD},
+                sig.parameters.values(),
+            )
+        )
+
+        res_kwargs = {
+            param.name: kwargs[param.name] for param in _params if param.name in kwargs
+        }
+        return res_kwargs
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> Any:
+        kwargs = filter_kwargs(kwargs)
+        return func(*args, **kwargs)
+
+    return wrapper
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 20c5e1a27e7..109caa3aa60 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -6,13 +6,53 @@
 import docker
 from docker.errors import ImageNotFound
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.config import Config
 from autogpt.logs import logger
+from autogpt.setup import CFG
+from autogpt.workspace.workspace import Workspace
+
+ALLOWLIST_CONTROL = "allowlist"
+DENYLIST_CONTROL = "denylist"
+
+
+@command(
+    "execute_python_code",
+    "Create a Python file and execute it",
+    '"code": "<code>", "basename": "<basename>"',
+)
+def execute_python_code(code: str, basename: str, agent: Agent) -> str:
+    """Create and execute a Python file in a Docker container and return the STDOUT of the
+    executed code. If there is any data that needs to be captured use a print statement
+
+    Args:
+        code (str): The Python code to run
+        basename (str): A name to be given to the Python file
+
+    Returns:
+        str: The STDOUT captured from the code when it ran
+    """
+    ai_name = agent.ai_name
+    directory = os.path.join(agent.config.workspace_path, ai_name, "executed_code")
+    os.makedirs(directory, exist_ok=True)
+
+    if not basename.endswith(".py"):
+        basename = basename + ".py"
+
+    path = os.path.join(directory, basename)
+
+    try:
+        with open(path, "w+", encoding="utf-8") as f:
+            f.write(code)
+
+        return execute_python_file(f.name, agent)
+    except Exception as e:
+        return f"Error: {str(e)}"
 
 
 @command("execute_python_file", "Execute Python File", '"filename": "<filename>"')
-def execute_python_file(filename: str, config: Config) -> str:
+def execute_python_file(filename: str, agent: Agent) -> str:
     """Execute a Python file in a Docker container and return the output
 
     Args:
@@ -21,17 +61,30 @@ def execute_python_file(filename: str, config: Config) -> str:
     Returns:
         str: The output of the file
     """
-    logger.info(f"Executing file '{filename}'")
+    logger.info(
+        f"Executing python file '{filename}' in working directory '{CFG.workspace_path}'"
+    )
 
     if not filename.endswith(".py"):
         return "Error: Invalid file type. Only .py files are allowed."
 
-    if not os.path.isfile(filename):
-        return f"Error: File '{filename}' does not exist."
+    workspace = Workspace(
+        agent.config.workspace_path, agent.config.restrict_to_workspace
+    )
+
+    path = workspace.get_path(filename)
+    if not path.is_file():
+        # Mimic the response that you get from the command line so that it's easier to identify
+        return (
+            f"python: can't open file '{filename}': [Errno 2] No such file or directory"
+        )
 
     if we_are_running_in_a_docker_container():
         result = subprocess.run(
-            ["python", filename], capture_output=True, encoding="utf8"
+            ["python", str(path)],
+            capture_output=True,
+            encoding="utf8",
+            cwd=CFG.workspace_path,
         )
         if result.returncode == 0:
             return result.stdout
@@ -63,9 +116,9 @@ def execute_python_file(filename: str, config: Config) -> str:
                     logger.info(status)
         container = client.containers.run(
             image_name,
-            ["python", str(Path(filename).relative_to(config.workspace_path))],
+            ["python", str(path.relative_to(workspace.root))],
             volumes={
-                config.workspace_path: {
+                agent.config.workspace_path: {
                     "bind": "/workspace",
                     "mode": "ro",
                 }
@@ -104,21 +157,15 @@ def validate_command(command: str, config: Config) -> bool:
     Returns:
         bool: True if the command is allowed, False otherwise
     """
-    tokens = command.split()
-
-    if not tokens:
+    if not command:
         return False
 
-    if config.deny_commands and tokens[0] not in config.deny_commands:
-        return False
-
-    for keyword in config.allow_commands:
-        if keyword in tokens:
-            return True
-    if config.allow_commands:
-        return False
+    command_name = command.split()[0]
 
-    return True
+    if config.shell_command_control == ALLOWLIST_CONTROL:
+        return command_name in config.shell_allowlist
+    else:
+        return command_name not in config.shell_denylist
 
 
 @command(
@@ -130,7 +177,7 @@ def validate_command(command: str, config: Config) -> bool:
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config file: .env - do not attempt to bypass the restriction.",
 )
-def execute_shell(command_line: str, config: Config) -> str:
+def execute_shell(command_line: str, agent: Agent) -> str:
     """Execute a shell command and return the output
 
     Args:
@@ -139,14 +186,14 @@ def execute_shell(command_line: str, config: Config) -> str:
     Returns:
         str: The output of the command
     """
-    if not validate_command(command_line, config):
+    if not validate_command(command_line, agent.config):
         logger.info(f"Command '{command_line}' not allowed")
         return "Error: This Shell Command is not allowed."
 
     current_dir = Path.cwd()
     # Change dir into workspace if necessary
-    if not current_dir.is_relative_to(config.workspace_path):
-        os.chdir(config.workspace_path)
+    if not current_dir.is_relative_to(agent.config.workspace_path):
+        os.chdir(agent.config.workspace_path)
 
     logger.info(
         f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
@@ -170,7 +217,7 @@ def execute_shell(command_line: str, config: Config) -> str:
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config. Do not attempt to bypass the restriction.",
 )
-def execute_shell_popen(command_line, config: Config) -> str:
+def execute_shell_popen(command_line, agent: Agent) -> str:
     """Execute a shell command with Popen and returns an english description
     of the event and the process id
 
@@ -180,14 +227,14 @@ def execute_shell_popen(command_line, config: Config) -> str:
     Returns:
         str: Description of the fact that the process started and its id
     """
-    if not validate_command(command_line, config):
+    if not validate_command(command_line, agent.config):
         logger.info(f"Command '{command_line}' not allowed")
         return "Error: This Shell Command is not allowed."
 
     current_dir = os.getcwd()
     # Change dir into workspace if necessary
-    if config.workspace_path not in current_dir:
-        os.chdir(config.workspace_path)
+    if agent.config.workspace_path not in current_dir:
+        os.chdir(agent.config.workspace_path)
 
     logger.info(
         f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 824db50c8bd..b851d662550 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -4,23 +4,23 @@
 import hashlib
 import os
 import os.path
-from typing import TYPE_CHECKING, Generator, Literal
+import re
+from typing import Generator, Literal
 
 import requests
 from colorama import Back, Fore
+from confection import Config
 from requests.adapters import HTTPAdapter, Retry
 
-from autogpt.commands.command import command
+from autogpt.agent.agent import Agent
+from autogpt.commands.command import command, ignore_unexpected_kwargs
 from autogpt.commands.file_operations_utils import read_textual_file
+from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
 from autogpt.spinner import Spinner
 from autogpt.utils import readable_file_size
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
-
 Operation = Literal["write", "append", "delete"]
 
 
@@ -102,7 +102,7 @@ def is_duplicate_operation(
 
 
 def log_operation(
-    operation: str, filename: str, config: Config, checksum: str | None = None
+    operation: str, filename: str, agent: Agent, checksum: str | None = None
 ) -> None:
     """Log the file operation to the file_logger.txt
 
@@ -115,43 +115,13 @@ def log_operation(
     if checksum is not None:
         log_entry += f" #{checksum}"
     logger.debug(f"Logging file operation: {log_entry}")
-    append_to_file(config.file_logger_path, f"{log_entry}\n", config, should_log=False)
-
-
-def split_file(
-    content: str, max_length: int = 4000, overlap: int = 0
-) -> Generator[str, None, None]:
-    """
-    Split text into chunks of a specified maximum length with a specified overlap
-    between chunks.
-
-    :param content: The input text to be split into chunks
-    :param max_length: The maximum length of each chunk,
-        default is 4000 (about 1k token)
-    :param overlap: The number of overlapping characters between chunks,
-        default is no overlap
-    :return: A generator yielding chunks of text
-    """
-    start = 0
-    content_length = len(content)
-
-    while start < content_length:
-        end = start + max_length
-        if end + overlap < content_length:
-            chunk = content[start : end + max(overlap - 1, 0)]
-        else:
-            chunk = content[start:content_length]
-
-            # Account for the case where the last chunk is shorter than the overlap, so it has already been consumed
-            if len(chunk) <= overlap:
-                break
-
-        yield chunk
-        start += max_length - overlap
+    append_to_file(
+        agent.config.file_logger_path, f"{log_entry}\n", agent, should_log=False
+    )
 
 
 @command("read_file", "Read a file", '"filename": "<filename>"')
-def read_file(filename: str, config: Config) -> str:
+def read_file(filename: str, agent: Agent) -> str:
     """Read a file and return the contents
 
     Args:
@@ -200,7 +170,7 @@ def ingest_file(
 
 
 @command("write_to_file", "Write to file", '"filename": "<filename>", "text": "<text>"')
-def write_to_file(filename: str, text: str, config: Config) -> str:
+def write_to_file(filename: str, text: str, agent: Agent) -> str:
     """Write text to a file
 
     Args:
@@ -211,24 +181,86 @@ def write_to_file(filename: str, text: str, config: Config) -> str:
         str: A message indicating success or failure
     """
     checksum = text_checksum(text)
-    if is_duplicate_operation("write", filename, config, checksum):
+    if is_duplicate_operation("write", filename, agent.config, checksum):
         return "Error: File has already been updated."
     try:
         directory = os.path.dirname(filename)
         os.makedirs(directory, exist_ok=True)
         with open(filename, "w", encoding="utf-8") as f:
             f.write(text)
-        log_operation("write", filename, config, checksum)
+        log_operation("write", filename, agent, checksum)
         return "File written to successfully."
     except Exception as err:
         return f"Error: {err}"
 
 
+@command(
+    "replace_in_file",
+    "Replace text or code in a file",
+    '"filename": "<filename>", '
+    '"old_text": "<old_text>", "new_text": "<new_text>", '
+    '"occurrence_index": "<occurrence_index>"',
+)
+def replace_in_file(
+    filename: str, old_text: str, new_text: str, agent: Agent, occurrence_index=None
+):
+    """Update a file by replacing one or all occurrences of old_text with new_text using Python's built-in string
+    manipulation and regular expression modules for cross-platform file editing similar to sed and awk.
+
+    Args:
+        filename (str): The name of the file
+        old_text (str): String to be replaced. \n will be stripped from the end.
+        new_text (str): New string. \n will be stripped from the end.
+        occurrence_index (int): Optional index of the occurrence to replace. If None, all occurrences will be replaced.
+
+    Returns:
+        str: A message indicating whether the file was updated successfully or if there were no matches found for old_text
+        in the file.
+
+    Raises:
+        Exception: If there was an error updating the file.
+    """
+    try:
+        with open(filename, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        old_text = old_text.rstrip("\n")
+        new_text = new_text.rstrip("\n")
+
+        if occurrence_index is None:
+            new_content = content.replace(old_text, new_text)
+        else:
+            matches = list(re.finditer(re.escape(old_text), content))
+            if not matches:
+                return f"No matches found for {old_text} in {filename}"
+
+            if int(occurrence_index) >= len(matches):
+                return f"Occurrence index {occurrence_index} is out of range for {old_text} in {filename}"
+
+            match = matches[int(occurrence_index)]
+            start, end = match.start(), match.end()
+            new_content = content[:start] + new_text + content[end:]
+
+        if content == new_content:
+            return f"No matches found for {old_text} in {filename}"
+
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(new_content)
+
+        with open(filename, "r", encoding="utf-8") as f:
+            checksum = text_checksum(f.read())
+        log_operation("update", filename, agent, checksum=checksum)
+
+        return f"File {filename} updated successfully."
+    except Exception as e:
+        return "Error: " + str(e)
+
+
 @command(
     "append_to_file", "Append to file", '"filename": "<filename>", "text": "<text>"'
 )
 def append_to_file(
-    filename: str, text: str, config: Config, should_log: bool = True
+    filename: str, text: str, agent: Agent, should_log: bool = True
 ) -> str:
     """Append text to a file
 
@@ -249,7 +281,7 @@ def append_to_file(
         if should_log:
             with open(filename, "r", encoding="utf-8") as f:
                 checksum = text_checksum(f.read())
-            log_operation("append", filename, config, checksum=checksum)
+            log_operation("append", filename, agent, checksum=checksum)
 
         return "Text appended successfully."
     except Exception as err:
@@ -257,7 +289,7 @@ def append_to_file(
 
 
 @command("delete_file", "Delete file", '"filename": "<filename>"')
-def delete_file(filename: str, config: Config) -> str:
+def delete_file(filename: str, agent: Agent) -> str:
     """Delete a file
 
     Args:
@@ -266,18 +298,19 @@ def delete_file(filename: str, config: Config) -> str:
     Returns:
         str: A message indicating success or failure
     """
-    if is_duplicate_operation("delete", filename, config):
+    if is_duplicate_operation("delete", filename, agent.config):
         return "Error: File has already been deleted."
     try:
         os.remove(filename)
-        log_operation("delete", filename, config)
+        log_operation("delete", filename, agent)
         return "File deleted successfully."
     except Exception as err:
         return f"Error: {err}"
 
 
 @command("list_files", "List Files in Directory", '"directory": "<directory>"')
-def list_files(directory: str, config: Config) -> list[str]:
+@ignore_unexpected_kwargs
+def list_files(directory: str, agent: Agent) -> list[str]:
     """lists files in a directory recursively
 
     Args:
@@ -293,7 +326,7 @@ def list_files(directory: str, config: Config) -> list[str]:
             if file.startswith("."):
                 continue
             relative_path = os.path.relpath(
-                os.path.join(root, file), config.workspace_path
+                os.path.join(root, file), agent.config.workspace_path
             )
             found_files.append(relative_path)
 
@@ -307,7 +340,7 @@ def list_files(directory: str, config: Config) -> list[str]:
     lambda config: config.allow_downloads,
     "Error: You do not have user authorization to download files locally.",
 )
-def download_file(url, filename, config: Config):
+def download_file(url, filename, agent: Agent):
     """Downloads a file
     Args:
         url (str): URL of the file to download
@@ -317,7 +350,7 @@ def download_file(url, filename, config: Config):
         directory = os.path.dirname(filename)
         os.makedirs(directory, exist_ok=True)
         message = f"{Fore.YELLOW}Downloading file from {Back.LIGHTBLUE_EX}{url}{Back.RESET}{Fore.RESET}"
-        with Spinner(message, plain_output=config.plain_output) as spinner:
+        with Spinner(message, plain_output=agent.config.plain_output) as spinner:
             session = requests.Session()
             retry = Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
             adapter = HTTPAdapter(max_retries=retry)
diff --git a/autogpt/commands/file_operations_utils.py b/autogpt/commands/file_operations_utils.py
index 7f3e418da96..b00779688e0 100644
--- a/autogpt/commands/file_operations_utils.py
+++ b/autogpt/commands/file_operations_utils.py
@@ -146,7 +146,9 @@ def is_file_binary_fn(file_path: str):
 
 def read_textual_file(file_path: str, logger: logs.Logger) -> str:
     if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"{file_path} not found!")
+        raise FileNotFoundError(
+            f"read_file {file_path} failed: no such file or directory"
+        )
     is_binary = is_file_binary_fn(file_path)
     file_extension = os.path.splitext(file_path)[1].lower()
     parser = extension_to_parser.get(file_extension)
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index c32a8cc30bc..e844fd4151f 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -1,15 +1,11 @@
 """Git operations for autogpt"""
-from typing import TYPE_CHECKING
 
 from git.repo import Repo
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
 from autogpt.url_utils.validators import validate_url
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "clone_repository",
@@ -19,7 +15,7 @@
     "Configure github_username and github_api_key.",
 )
 @validate_url
-def clone_repository(url: str, clone_path: str, config: Config) -> str:
+def clone_repository(url: str, clone_path: str, agent: Agent) -> str:
     """Clone a GitHub repository locally.
 
     Args:
@@ -30,8 +26,10 @@ def clone_repository(url: str, clone_path: str, config: Config) -> str:
         str: The result of the clone operation.
     """
     split_url = url.split("//")
-    auth_repo_url = f"//{config.github_username}:{config.github_api_key}@".join(
-        split_url
+    auth_repo_url = (
+        f"//{agent.config.github_username}:{agent.config.github_api_key}@".join(
+            split_url
+        )
     )
     try:
         Repo.clone_from(url=auth_repo_url, to_path=clone_path)
diff --git a/autogpt/commands/google_search.py b/autogpt/commands/google_search.py
index c01ec0a12a7..b9d243f9788 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/google_search.py
@@ -2,15 +2,15 @@
 from __future__ import annotations
 
 import json
+import time
 from itertools import islice
-from typing import TYPE_CHECKING
 
 from duckduckgo_search import DDGS
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
+DUCKDUCKGO_MAX_ATTEMPTS = 3
 
 
 @command(
@@ -19,7 +19,7 @@
     '"query": "<query>"',
     lambda config: not config.google_api_key,
 )
-def google_search(query: str, config: Config, num_results: int = 8) -> str:
+def google_search(query: str, agent: Agent, num_results: int = 8) -> str:
     """Return the results of a Google search
 
     Args:
@@ -30,15 +30,20 @@ def google_search(query: str, config: Config, num_results: int = 8) -> str:
         str: The results of the search.
     """
     search_results = []
-    if not query:
-        return json.dumps(search_results)
+    attempts = 0
 
-    results = DDGS().text(query)
-    if not results:
-        return json.dumps(search_results)
+    while attempts < DUCKDUCKGO_MAX_ATTEMPTS:
+        if not query:
+            return json.dumps(search_results)
 
-    for item in islice(results, num_results):
-        search_results.append(item)
+        results = DDGS().text(query)
+        search_results = list(islice(results, num_results))
+
+        if search_results:
+            break
+
+        time.sleep(1)
+        attempts += 1
 
     results = json.dumps(search_results, ensure_ascii=False, indent=4)
     return safe_google_results(results)
@@ -48,11 +53,12 @@ def google_search(query: str, config: Config, num_results: int = 8) -> str:
     "google",
     "Google Search",
     '"query": "<query>"',
-    lambda config: bool(config.google_api_key) and bool(config.custom_search_engine_id),
+    lambda config: bool(config.google_api_key)
+    and bool(config.google_custom_search_engine_id),
     "Configure google_api_key and custom_search_engine_id.",
 )
 def google_official_search(
-    query: str, config: Config, num_results: int = 8
+    query: str, agent: Agent, num_results: int = 8
 ) -> str | list[str]:
     """Return the results of a Google search using the official Google API
 
@@ -69,8 +75,8 @@ def google_official_search(
 
     try:
         # Get the Google API key and Custom Search Engine ID from the config file
-        api_key = config.google_api_key
-        custom_search_engine_id = config.custom_search_engine_id
+        api_key = agent.config.google_api_key
+        custom_search_engine_id = agent.config.google_custom_search_engine_id
 
         # Initialize the Custom Search API service
         service = build("customsearch", "v1", developerKey=api_key)
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index 04d8656442b..b2dc9ea4802 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -4,19 +4,15 @@
 import time
 import uuid
 from base64 import b64decode
-from typing import TYPE_CHECKING
 
 import openai
 import requests
 from PIL import Image
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
 from autogpt.logs import logger
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "generate_image",
@@ -25,7 +21,7 @@
     lambda config: config.image_provider,
     "Requires a image provider to be set.",
 )
-def generate_image(prompt: str, config: Config, size: int = 256) -> str:
+def generate_image(prompt: str, agent: Agent, size: int = 256) -> str:
     """Generate an image from a prompt.
 
     Args:
@@ -35,21 +31,21 @@ def generate_image(prompt: str, config: Config, size: int = 256) -> str:
     Returns:
         str: The filename of the image
     """
-    filename = f"{config.workspace_path}/{str(uuid.uuid4())}.jpg"
+    filename = f"{agent.config.workspace_path}/{str(uuid.uuid4())}.jpg"
 
     # DALL-E
-    if config.image_provider == "dalle":
-        return generate_image_with_dalle(prompt, filename, size, config)
+    if agent.config.image_provider == "dalle":
+        return generate_image_with_dalle(prompt, filename, size, agent)
     # HuggingFace
-    elif config.image_provider == "huggingface":
-        return generate_image_with_hf(prompt, filename, config)
+    elif agent.config.image_provider == "huggingface":
+        return generate_image_with_hf(prompt, filename, agent)
     # SD WebUI
-    elif config.image_provider == "sdwebui":
-        return generate_image_with_sd_webui(prompt, filename, config, size)
+    elif agent.config.image_provider == "sdwebui":
+        return generate_image_with_sd_webui(prompt, filename, agent, size)
     return "No Image Provider Set"
 
 
-def generate_image_with_hf(prompt: str, filename: str, config: Config) -> str:
+def generate_image_with_hf(prompt: str, filename: str, agent: Agent) -> str:
     """Generate an image with HuggingFace's API.
 
     Args:
@@ -59,15 +55,13 @@ def generate_image_with_hf(prompt: str, filename: str, config: Config) -> str:
     Returns:
         str: The filename of the image
     """
-    API_URL = (
-        f"https://api-inference.huggingface.co/models/{config.huggingface_image_model}"
-    )
-    if config.huggingface_api_token is None:
+    API_URL = f"https://api-inference.huggingface.co/models/{agent.config.huggingface_image_model}"
+    if agent.config.huggingface_api_token is None:
         raise ValueError(
             "You need to set your Hugging Face API token in the config file."
         )
     headers = {
-        "Authorization": f"Bearer {config.huggingface_api_token}",
+        "Authorization": f"Bearer {agent.config.huggingface_api_token}",
         "X-Use-Cache": "false",
     }
 
@@ -110,7 +104,7 @@ def generate_image_with_hf(prompt: str, filename: str, config: Config) -> str:
 
 
 def generate_image_with_dalle(
-    prompt: str, filename: str, size: int, config: Config
+    prompt: str, filename: str, size: int, agent: Agent
 ) -> str:
     """Generate an image with DALL-E.
 
@@ -136,7 +130,7 @@ def generate_image_with_dalle(
         n=1,
         size=f"{size}x{size}",
         response_format="b64_json",
-        api_key=config.openai_api_key,
+        api_key=agent.config.openai_api_key,
     )
 
     logger.info(f"Image Generated for prompt:{prompt}")
@@ -152,7 +146,7 @@ def generate_image_with_dalle(
 def generate_image_with_sd_webui(
     prompt: str,
     filename: str,
-    config: Config,
+    agent: Agent,
     size: int = 512,
     negative_prompt: str = "",
     extra: dict = {},
@@ -169,13 +163,13 @@ def generate_image_with_sd_webui(
     """
     # Create a session and set the basic auth if needed
     s = requests.Session()
-    if config.sd_webui_auth:
-        username, password = config.sd_webui_auth.split(":")
+    if agent.config.sd_webui_auth:
+        username, password = agent.config.sd_webui_auth.split(":")
         s.auth = (username, password or "")
 
     # Generate the images
     response = requests.post(
-        f"{config.sd_webui_url}/sdapi/v1/txt2img",
+        f"{agent.config.sd_webui_url}/sdapi/v1/txt2img",
         json={
             "prompt": prompt,
             "negative_prompt": negative_prompt,
diff --git a/autogpt/commands/improve_code.py b/autogpt/commands/improve_code.py
index 60e517ef4e1..05e9b51c1c0 100644
--- a/autogpt/commands/improve_code.py
+++ b/autogpt/commands/improve_code.py
@@ -1,21 +1,18 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.llm.utils import call_ai_function
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "improve_code",
     "Get Improved Code",
     '"suggestions": "<list_of_suggestions>", "code": "<full_code_string>"',
 )
-def improve_code(suggestions: list[str], code: str, config: Config) -> str:
+def improve_code(suggestions: list[str], code: str, agent: Agent) -> str:
     """
     A function that takes in code and suggestions and returns a response from create
       chat completion api call.
@@ -36,4 +33,6 @@ def improve_code(suggestions: list[str], code: str, config: Config) -> str:
         " provided, making no other changes."
     )
 
-    return call_ai_function(function_string, args, description_string, config=config)
+    return call_ai_function(
+        function_string, args, description_string, config=agent.config
+    )
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/task_statuses.py
index 9f60209cb8a..283328a3661 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -1,21 +1,19 @@
 """Task Statuses module."""
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, NoReturn
+from typing import NoReturn
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.logs import logger
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "task_complete",
     "Task Complete (Shutdown)",
     '"reason": "<reason>"',
 )
-def task_complete(reason: str, config: Config) -> NoReturn:
+def task_complete(reason: str, agent: Agent) -> NoReturn:
     """
     A function that takes in a string and exits the program
 
diff --git a/autogpt/commands/web_requests.py b/autogpt/commands/web_requests.py
index d7de8dc93ce..765c37781dd 100644
--- a/autogpt/commands/web_requests.py
+++ b/autogpt/commands/web_requests.py
@@ -1,20 +1,24 @@
 """Browse a webpage and summarize it using the LLM model"""
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import requests
 from bs4 import BeautifulSoup
 from requests import Response
 
-from autogpt.config import Config
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
 from autogpt.url_utils.validators import validate_url
 
 session = requests.Session()
 
+if TYPE_CHECKING:
+    from autogpt.agent.agent import Agent
+
 
 @validate_url
 def get_response(
-    url: str, config: Config, timeout: int = 10
+    url: str, agent: Agent, timeout: int = 10
 ) -> tuple[None, str] | tuple[Response, None]:
     """Get the response from a URL
 
@@ -30,7 +34,7 @@ def get_response(
         requests.exceptions.RequestException: If the HTTP request fails
     """
     try:
-        session.headers.update({"User-Agent": config.user_agent})
+        session.headers.update({"User-Agent": agent.config.user_agent})
         response = session.get(url, timeout=timeout)
 
         # Check if the response contains an HTTP error
@@ -48,7 +52,7 @@ def get_response(
         return None, f"Error: {str(re)}"
 
 
-def scrape_text(url: str, config: Config) -> str:
+def scrape_text(url: str, agent: Agent) -> str:
     """Scrape text from a webpage
 
     Args:
@@ -57,7 +61,7 @@ def scrape_text(url: str, config: Config) -> str:
     Returns:
         str: The scraped text
     """
-    response, error_message = get_response(url, config)
+    response, error_message = get_response(url, agent)
     if error_message:
         return error_message
     if not response:
@@ -76,7 +80,7 @@ def scrape_text(url: str, config: Config) -> str:
     return text
 
 
-def scrape_links(url: str, config: Config) -> str | list[str]:
+def scrape_links(url: str, agent: Agent) -> str | list[str]:
     """Scrape links from a webpage
 
     Args:
@@ -85,7 +89,7 @@ def scrape_links(url: str, config: Config) -> str | list[str]:
     Returns:
        str | list[str]: The scraped links
     """
-    response, error_message = get_response(url, config)
+    response, error_message = get_response(url, agent)
     if error_message:
         return error_message
     if not response:
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 3cc99282b03..14036c85e09 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -4,7 +4,7 @@
 import logging
 from pathlib import Path
 from sys import platform
-from typing import TYPE_CHECKING, Optional, Type
+from typing import Optional, Type
 
 from bs4 import BeautifulSoup
 from selenium.common.exceptions import WebDriverException
@@ -27,15 +27,13 @@
 from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
 from autogpt.url_utils.validators import validate_url
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 BrowserOptions = ChromeOptions | EdgeOptions | FirefoxOptions | SafariOptions
 
 FILE_DIR = Path(__file__).parent.parent
@@ -47,7 +45,7 @@
     '"url": "<url>", "question": "<what_you_want_to_find_on_website>"',
 )
 @validate_url
-def browse_website(url: str, question: str, config: Config) -> str:
+def browse_website(url: str, question: str, agent: Agent) -> str:
     """Browse a website and return the answer and links to the user
 
     Args:
@@ -58,7 +56,7 @@ def browse_website(url: str, question: str, config: Config) -> str:
         Tuple[str, WebDriver]: The answer and links to the user and the webdriver
     """
     try:
-        driver, text = scrape_text_with_selenium(url, config)
+        driver, text = scrape_text_with_selenium(url, agent)
     except WebDriverException as e:
         # These errors are often quite long and include lots of context.
         # Just grab the first line.
@@ -66,7 +64,7 @@ def browse_website(url: str, question: str, config: Config) -> str:
         return f"Error: {msg}"
 
     add_header(driver)
-    summary = summarize_memorize_webpage(url, text, question, config, driver)
+    summary = summarize_memorize_webpage(url, text, question, agent, driver)
     links = scrape_links_with_selenium(driver, url)
 
     # Limit links to 5
@@ -76,7 +74,7 @@ def browse_website(url: str, question: str, config: Config) -> str:
     return f"Answer gathered from website: {summary}\n\nLinks: {links}"
 
 
-def scrape_text_with_selenium(url: str, config: Config) -> tuple[WebDriver, str]:
+def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
     """Scrape text from a website using selenium
 
     Args:
@@ -94,23 +92,23 @@ def scrape_text_with_selenium(url: str, config: Config) -> tuple[WebDriver, str]
         "safari": SafariOptions,
     }
 
-    options: BrowserOptions = options_available[config.selenium_web_browser]()
+    options: BrowserOptions = options_available[agent.config.selenium_web_browser]()
     options.add_argument(
         "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36"
     )
 
-    if config.selenium_web_browser == "firefox":
-        if config.selenium_headless:
+    if agent.config.selenium_web_browser == "firefox":
+        if agent.config.selenium_headless:
             options.headless = True
             options.add_argument("--disable-gpu")
         driver = FirefoxDriver(
             service=GeckoDriverService(GeckoDriverManager().install()), options=options
         )
-    elif config.selenium_web_browser == "edge":
+    elif agent.config.selenium_web_browser == "edge":
         driver = EdgeDriver(
             service=EdgeDriverService(EdgeDriverManager().install()), options=options
         )
-    elif config.selenium_web_browser == "safari":
+    elif agent.config.selenium_web_browser == "safari":
         # Requires a bit more setup on the users end
         # See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari
         driver = SafariDriver(options=options)
@@ -120,7 +118,7 @@ def scrape_text_with_selenium(url: str, config: Config) -> tuple[WebDriver, str]
             options.add_argument("--remote-debugging-port=9222")
 
         options.add_argument("--no-sandbox")
-        if config.selenium_headless:
+        if agent.config.selenium_headless:
             options.add_argument("--headless=new")
             options.add_argument("--disable-gpu")
 
@@ -205,7 +203,7 @@ def summarize_memorize_webpage(
     url: str,
     text: str,
     question: str,
-    config: Config,
+    agent: Agent,
     driver: Optional[WebDriver] = None,
 ) -> str:
     """Summarize text using the OpenAI API
@@ -225,7 +223,7 @@ def summarize_memorize_webpage(
     text_length = len(text)
     logger.info(f"Text length: {text_length} characters")
 
-    memory = get_memory(config)
+    memory = get_memory(agent.config)
 
     new_memory = MemoryItem.from_webpage(text, url, question=question)
     memory.add(new_memory)
diff --git a/autogpt/commands/write_tests.py b/autogpt/commands/write_tests.py
index a63c265f8cd..c09930b9f3c 100644
--- a/autogpt/commands/write_tests.py
+++ b/autogpt/commands/write_tests.py
@@ -2,21 +2,18 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.llm.utils import call_ai_function
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "write_tests",
     "Write Tests",
     '"code": "<full_code_string>", "focus": "<list_of_focus_areas>"',
 )
-def write_tests(code: str, focus: list[str], config: Config) -> str:
+def write_tests(code: str, focus: list[str], agent: Agent) -> str:
     """
     A function that takes in code and focus topics and returns a response from create
       chat completion api call.
@@ -38,4 +35,6 @@ def write_tests(code: str, focus: list[str], config: Config) -> str:
         " specific areas if required."
     )
 
-    return call_ai_function(function_string, args, description_string, config=config)
+    return call_ai_function(
+        function_string, args, description_string, config=agent.config
+    )
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 5f76bb74550..92712dd7d41 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -7,6 +7,7 @@
 from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from colorama import Fore
 
+import autogpt
 from autogpt.singleton import Singleton
 
 
@@ -38,27 +39,30 @@ def __init__(self) -> None:
         else:
             self.disabled_command_categories = []
 
-        deny_commands = os.getenv("DENY_COMMANDS")
-        if deny_commands:
-            self.deny_commands = deny_commands.split(",")
+        self.shell_command_control = os.getenv("SHELL_COMMAND_CONTROL", "denylist")
+
+        # DENY_COMMANDS is deprecated and included for backwards-compatibility
+        shell_denylist = os.getenv("SHELL_DENYLIST", os.getenv("DENY_COMMANDS"))
+        if shell_denylist:
+            self.shell_denylist = shell_denylist.split(",")
         else:
-            self.deny_commands = []
+            self.shell_denylist = ["sudo", "su"]
 
-        allow_commands = os.getenv("ALLOW_COMMANDS")
-        if allow_commands:
-            self.allow_commands = allow_commands.split(",")
+        # ALLOW_COMMANDS is deprecated and included for backwards-compatibility
+        shell_allowlist = os.getenv("SHELL_ALLOWLIST", os.getenv("ALLOW_COMMANDS"))
+        if shell_allowlist:
+            self.shell_allowlist = shell_allowlist.split(",")
         else:
-            self.allow_commands = []
+            self.shell_allowlist = []
 
         self.ai_settings_file = os.getenv("AI_SETTINGS_FILE", "ai_settings.yaml")
         self.prompt_settings_file = os.getenv(
             "PROMPT_SETTINGS_FILE", "prompt_settings.yaml"
         )
         self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
-        self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
-        self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
-        self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
+        self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-3.5-turbo")
         self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
+
         self.browse_spacy_language_model = os.getenv(
             "BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
         )
@@ -79,27 +83,41 @@ def __init__(self) -> None:
             openai.api_type = self.openai_api_type
             openai.api_base = self.openai_api_base
             openai.api_version = self.openai_api_version
+        elif os.getenv("OPENAI_API_BASE_URL", None):
+            openai.api_base = os.getenv("OPENAI_API_BASE_URL")
 
         if self.openai_organization is not None:
             openai.organization = self.openai_organization
 
         self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
-        self.elevenlabs_voice_1_id = os.getenv("ELEVENLABS_VOICE_1_ID")
-        self.elevenlabs_voice_2_id = os.getenv("ELEVENLABS_VOICE_2_ID")
-
-        self.use_mac_os_tts = False
-        self.use_mac_os_tts = os.getenv("USE_MAC_OS_TTS")
-
-        self.chat_messages_enabled = os.getenv("CHAT_MESSAGES_ENABLED") == "True"
+        # ELEVENLABS_VOICE_1_ID is deprecated and included for backwards-compatibility
+        self.elevenlabs_voice_id = os.getenv(
+            "ELEVENLABS_VOICE_ID", os.getenv("ELEVENLABS_VOICE_1_ID")
+        )
+        self.streamelements_voice = os.getenv("STREAMELEMENTS_VOICE", "Brian")
+
+        # Backwards-compatibility shim for deprecated env variables
+        if os.getenv("USE_MAC_OS_TTS"):
+            default_tts_provider = "macos"
+        elif self.elevenlabs_api_key:
+            default_tts_provider = "elevenlabs"
+        elif os.getenv("USE_BRIAN_TTS"):
+            default_tts_provider = "streamelements"
+        else:
+            default_tts_provider = "gtts"
 
-        self.use_brian_tts = False
-        self.use_brian_tts = os.getenv("USE_BRIAN_TTS")
+        self.text_to_speech_provider = os.getenv(
+            "TEXT_TO_SPEECH_PROVIDER", default_tts_provider
+        )
 
         self.github_api_key = os.getenv("GITHUB_API_KEY")
         self.github_username = os.getenv("GITHUB_USERNAME")
 
         self.google_api_key = os.getenv("GOOGLE_API_KEY")
-        self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+        # CUSTOM_SEARCH_ENGINE_ID is deprecated and included for backwards-compatibility
+        self.google_custom_search_engine_id = os.getenv(
+            "GOOGLE_CUSTOM_SEARCH_ENGINE_ID", os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+        )
 
         self.image_provider = os.getenv("IMAGE_PROVIDER")
         self.image_size = int(os.getenv("IMAGE_SIZE", 256))
@@ -107,6 +125,7 @@ def __init__(self) -> None:
         self.huggingface_image_model = os.getenv(
             "HUGGINGFACE_IMAGE_MODEL", "CompVis/stable-diffusion-v1-4"
         )
+        self.audio_to_text_provider = os.getenv("AUDIO_TO_TEXT_PROVIDER", "huggingface")
         self.huggingface_audio_to_text_model = os.getenv(
             "HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
         )
@@ -138,18 +157,37 @@ def __init__(self) -> None:
         self.plugins: List[AutoGPTPluginTemplate] = []
         self.plugins_openai = []
 
+        # Deprecated. Kept for backwards-compatibility. Will remove in a future version.
         plugins_allowlist = os.getenv("ALLOWLISTED_PLUGINS")
         if plugins_allowlist:
             self.plugins_allowlist = plugins_allowlist.split(",")
         else:
             self.plugins_allowlist = []
 
+        # Deprecated. Kept for backwards-compatibility. Will remove in a future version.
         plugins_denylist = os.getenv("DENYLISTED_PLUGINS")
         if plugins_denylist:
             self.plugins_denylist = plugins_denylist.split(",")
         else:
             self.plugins_denylist = []
 
+        # Avoid circular imports
+        from autogpt.plugins import DEFAULT_PLUGINS_CONFIG_FILE
+
+        self.plugins_config_file = os.getenv(
+            "PLUGINS_CONFIG_FILE", DEFAULT_PLUGINS_CONFIG_FILE
+        )
+        self.load_plugins_config()
+
+        self.chat_messages_enabled = os.getenv("CHAT_MESSAGES_ENABLED") == "True"
+
+    def load_plugins_config(self) -> "autogpt.plugins.PluginsConfig":
+        # Avoid circular import
+        from autogpt.plugins.plugins_config import PluginsConfig
+
+        self.plugins_config = PluginsConfig.load_config(global_config=self)
+        return self.plugins_config
+
     def get_azure_deployment_id_for_model(self, model: str) -> str:
         """
         Returns the relevant deployment id for the model specified.
@@ -217,14 +255,6 @@ def set_smart_llm_model(self, value: str) -> None:
         """Set the smart LLM model value."""
         self.smart_llm_model = value
 
-    def set_fast_token_limit(self, value: int) -> None:
-        """Set the fast token limit value."""
-        self.fast_token_limit = value
-
-    def set_smart_token_limit(self, value: int) -> None:
-        """Set the smart token limit value."""
-        self.smart_token_limit = value
-
     def set_embedding_model(self, value: str) -> None:
         """Set the model to use for creating embeddings."""
         self.embedding_model = value
@@ -239,7 +269,7 @@ def set_elevenlabs_api_key(self, value: str) -> None:
 
     def set_elevenlabs_voice_1_id(self, value: str) -> None:
         """Set the ElevenLabs Voice 1 ID value."""
-        self.elevenlabs_voice_1_id = value
+        self.elevenlabs_voice_id = value
 
     def set_elevenlabs_voice_2_id(self, value: str) -> None:
         """Set the ElevenLabs Voice 2 ID value."""
@@ -251,7 +281,7 @@ def set_google_api_key(self, value: str) -> None:
 
     def set_custom_search_engine_id(self, value: str) -> None:
         """Set the custom search engine id value."""
-        self.custom_search_engine_id = value
+        self.google_custom_search_engine_id = value
 
     def set_debug_mode(self, value: bool) -> None:
         """Set the debug mode value."""
diff --git a/autogpt/json_utils/json_fix_general.py b/autogpt/json_utils/json_fix_general.py
deleted file mode 100644
index e485aca120a..00000000000
--- a/autogpt/json_utils/json_fix_general.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""This module contains functions to fix JSON strings using general programmatic approaches, suitable for addressing
-common JSON formatting issues."""
-from __future__ import annotations
-
-import contextlib
-import json
-import re
-from typing import Optional
-
-from autogpt.config import Config
-from autogpt.json_utils.utilities import extract_char_position
-from autogpt.logs import logger
-
-CFG = Config()
-
-
-def fix_invalid_escape(json_to_load: str, error_message: str) -> str:
-    """Fix invalid escape sequences in JSON strings.
-
-    Args:
-        json_to_load (str): The JSON string.
-        error_message (str): The error message from the JSONDecodeError
-          exception.
-
-    Returns:
-        str: The JSON string with invalid escape sequences fixed.
-    """
-    while error_message.startswith("Invalid \\escape"):
-        bad_escape_location = extract_char_position(error_message)
-        json_to_load = (
-            json_to_load[:bad_escape_location] + json_to_load[bad_escape_location + 1 :]
-        )
-        try:
-            json.loads(json_to_load)
-            return json_to_load
-        except json.JSONDecodeError as e:
-            logger.debug("json loads error - fix invalid escape", e)
-            error_message = str(e)
-    return json_to_load
-
-
-def balance_braces(json_string: str) -> Optional[str]:
-    """
-    Balance the braces in a JSON string.
-
-    Args:
-        json_string (str): The JSON string.
-
-    Returns:
-        str: The JSON string with braces balanced.
-    """
-
-    open_braces_count = json_string.count("{")
-    close_braces_count = json_string.count("}")
-
-    while open_braces_count > close_braces_count:
-        json_string += "}"
-        close_braces_count += 1
-
-    while close_braces_count > open_braces_count:
-        json_string = json_string.rstrip("}")
-        close_braces_count -= 1
-
-    with contextlib.suppress(json.JSONDecodeError):
-        json.loads(json_string)
-        return json_string
-
-
-def add_quotes_to_property_names(json_string: str) -> str:
-    """
-    Add quotes to property names in a JSON string.
-
-    Args:
-        json_string (str): The JSON string.
-
-    Returns:
-        str: The JSON string with quotes added to property names.
-    """
-
-    def replace_func(match: re.Match) -> str:
-        return f'"{match[1]}":'
-
-    property_name_pattern = re.compile(r"(\w+):")
-    corrected_json_string = property_name_pattern.sub(replace_func, json_string)
-
-    try:
-        json.loads(corrected_json_string)
-        return corrected_json_string
-    except json.JSONDecodeError as e:
-        raise e
-
-
-def correct_json(json_to_load: str) -> str:
-    """
-    Correct common JSON errors.
-    Args:
-        json_to_load (str): The JSON string.
-    """
-
-    try:
-        logger.debug("json", json_to_load)
-        json.loads(json_to_load)
-        return json_to_load
-    except json.JSONDecodeError as e:
-        logger.debug("json loads error", e)
-        error_message = str(e)
-        if error_message.startswith("Invalid \\escape"):
-            json_to_load = fix_invalid_escape(json_to_load, error_message)
-        if error_message.startswith(
-            "Expecting property name enclosed in double quotes"
-        ):
-            json_to_load = add_quotes_to_property_names(json_to_load)
-            try:
-                json.loads(json_to_load)
-                return json_to_load
-            except json.JSONDecodeError as e:
-                logger.debug("json loads error - add quotes", e)
-                error_message = str(e)
-        if balanced_str := balance_braces(json_to_load):
-            return balanced_str
-    return json_to_load
diff --git a/autogpt/json_utils/json_fix_llm.py b/autogpt/json_utils/json_fix_llm.py
deleted file mode 100644
index 9e9fe5338d2..00000000000
--- a/autogpt/json_utils/json_fix_llm.py
+++ /dev/null
@@ -1,239 +0,0 @@
-"""This module contains functions to fix JSON strings generated by LLM models, such as ChatGPT, using the assistance
-of the ChatGPT API or LLM models."""
-from __future__ import annotations
-
-import contextlib
-import json
-from typing import Any, Dict
-
-from colorama import Fore
-from regex import regex
-
-from autogpt.config import Config
-from autogpt.json_utils.json_fix_general import correct_json
-from autogpt.llm.utils import call_ai_function
-from autogpt.logs import logger
-from autogpt.speech import say_text
-
-JSON_SCHEMA = """
-{
-    "command": {
-        "name": "command name",
-        "args": {
-            "arg name": "value"
-        }
-    },
-    "thoughts":
-    {
-        "text": "thought",
-        "reasoning": "reasoning",
-        "plan": "- short bulleted\n- list that conveys\n- long-term plan",
-        "criticism": "constructive self-criticism",
-        "speak": "thoughts summary to say to user"
-    }
-}
-"""
-
-CFG = Config()
-
-
-def auto_fix_json(json_string: str, schema: str) -> str:
-    """Fix the given JSON string to make it parseable and fully compliant with
-        the provided schema using GPT-3.
-
-    Args:
-        json_string (str): The JSON string to fix.
-        schema (str): The schema to use to fix the JSON.
-    Returns:
-        str: The fixed JSON string.
-    """
-    # Try to fix the JSON using GPT:
-    function_string = "def fix_json(json_string: str, schema:str=None) -> str:"
-    args = [f"'''{json_string}'''", f"'''{schema}'''"]
-    description_string = (
-        "This function takes a JSON string and ensures that it"
-        " is parseable and fully compliant with the provided schema. If an object"
-        " or field specified in the schema isn't contained within the correct JSON,"
-        " it is omitted. The function also escapes any double quotes within JSON"
-        " string values to ensure that they are valid. If the JSON string contains"
-        " any None or NaN values, they are replaced with null before being parsed."
-    )
-
-    # If it doesn't already start with a "`", add one:
-    if not json_string.startswith("`"):
-        json_string = "```json\n" + json_string + "\n```"
-    result_string = call_ai_function(
-        function_string, args, description_string, model=CFG.fast_llm_model
-    )
-    logger.debug("------------ JSON FIX ATTEMPT ---------------")
-    logger.debug(f"Original JSON: {json_string}")
-    logger.debug("-----------")
-    logger.debug(f"Fixed JSON: {result_string}")
-    logger.debug("----------- END OF FIX ATTEMPT ----------------")
-
-    try:
-        json.loads(result_string)  # just check the validity
-        return result_string
-    except json.JSONDecodeError:  # noqa: E722
-        # Get the call stack:
-        # import traceback
-        # call_stack = traceback.format_exc()
-        # print(f"Failed to fix JSON: '{json_string}' "+call_stack)
-        return "failed"
-
-
-def fix_json_using_multiple_techniques(assistant_reply: str) -> Dict[Any, Any]:
-    """Fix the given JSON string to make it parseable and fully compliant with two techniques.
-
-    Args:
-        json_string (str): The JSON string to fix.
-
-    Returns:
-        str: The fixed JSON string.
-    """
-    assistant_reply = assistant_reply.strip()
-    if assistant_reply.startswith("```json"):
-        assistant_reply = assistant_reply[7:]
-    if assistant_reply.endswith("```"):
-        assistant_reply = assistant_reply[:-3]
-    try:
-        return json.loads(assistant_reply)  # just check the validity
-    except json.JSONDecodeError:  # noqa: E722
-        pass
-
-    if assistant_reply.startswith("json "):
-        assistant_reply = assistant_reply[5:]
-        assistant_reply = assistant_reply.strip()
-    try:
-        return json.loads(assistant_reply)  # just check the validity
-    except json.JSONDecodeError:  # noqa: E722
-        pass
-
-    # Parse and print Assistant response
-    assistant_reply_json = fix_and_parse_json(assistant_reply)
-    logger.debug("Assistant reply JSON: %s", str(assistant_reply_json))
-    if assistant_reply_json == {}:
-        assistant_reply_json = attempt_to_fix_json_by_finding_outermost_brackets(
-            assistant_reply
-        )
-
-    logger.debug("Assistant reply JSON 2: %s", str(assistant_reply_json))
-    if assistant_reply_json != {}:
-        return assistant_reply_json
-
-    logger.error(
-        "Error: The following AI output couldn't be converted to a JSON:\n",
-        assistant_reply,
-    )
-    if CFG.speak_mode:
-        say_text("I have received an invalid JSON response from the OpenAI API.")
-
-    return {}
-
-
-def fix_and_parse_json(
-    json_to_load: str, try_to_fix_with_gpt: bool = True
-) -> Dict[Any, Any]:
-    """Fix and parse JSON string
-
-    Args:
-        json_to_load (str): The JSON string.
-        try_to_fix_with_gpt (bool, optional): Try to fix the JSON with GPT.
-            Defaults to True.
-
-    Returns:
-        str or dict[Any, Any]: The parsed JSON.
-    """
-
-    with contextlib.suppress(json.JSONDecodeError):
-        json_to_load = json_to_load.replace("\t", "")
-        return json.loads(json_to_load)
-
-    with contextlib.suppress(json.JSONDecodeError):
-        json_to_load = correct_json(json_to_load)
-        return json.loads(json_to_load)
-    # Let's do something manually:
-    # sometimes GPT responds with something BEFORE the braces:
-    # "I'm sorry, I don't understand. Please try again."
-    # {"text": "I'm sorry, I don't understand. Please try again.",
-    #  "confidence": 0.0}
-    # So let's try to find the first brace and then parse the rest
-    #  of the string
-    try:
-        brace_index = json_to_load.index("{")
-        maybe_fixed_json = json_to_load[brace_index:]
-        last_brace_index = maybe_fixed_json.rindex("}")
-        maybe_fixed_json = maybe_fixed_json[: last_brace_index + 1]
-        return json.loads(maybe_fixed_json)
-    except (json.JSONDecodeError, ValueError) as e:
-        return try_ai_fix(try_to_fix_with_gpt, e, json_to_load)
-
-
-def try_ai_fix(
-    try_to_fix_with_gpt: bool, exception: Exception, json_to_load: str
-) -> Dict[Any, Any]:
-    """Try to fix the JSON with the AI
-
-    Args:
-        try_to_fix_with_gpt (bool): Whether to try to fix the JSON with the AI.
-        exception (Exception): The exception that was raised.
-        json_to_load (str): The JSON string to load.
-
-    Raises:
-        exception: If try_to_fix_with_gpt is False.
-
-    Returns:
-        str or dict[Any, Any]: The JSON string or dictionary.
-    """
-    if not try_to_fix_with_gpt:
-        raise exception
-    if CFG.debug_mode:
-        logger.warn(
-            "Warning: Failed to parse AI output, attempting to fix."
-            "\n If you see this warning frequently, it's likely that"
-            " your prompt is confusing the AI. Try changing it up"
-            " slightly."
-        )
-    # Now try to fix this up using the ai_functions
-    ai_fixed_json = auto_fix_json(json_to_load, JSON_SCHEMA)
-
-    if ai_fixed_json != "failed":
-        return json.loads(ai_fixed_json)
-    # This allows the AI to react to the error message,
-    #   which usually results in it correcting its ways.
-    # logger.error("Failed to fix AI output, telling the AI.")
-    return {}
-
-
-def attempt_to_fix_json_by_finding_outermost_brackets(json_string: str):
-    if CFG.speak_mode and CFG.debug_mode:
-        say_text(
-            "I have received an invalid JSON response from the OpenAI API. "
-            "Trying to fix it now."
-        )
-        logger.error("Attempting to fix JSON by finding outermost brackets\n")
-
-    try:
-        json_pattern = regex.compile(r"\{(?:[^{}]|(?R))*\}")
-        json_match = json_pattern.search(json_string)
-
-        if json_match:
-            # Extract the valid JSON object from the string
-            json_string = json_match.group(0)
-            logger.typewriter_log(
-                title="Apparently json was fixed.", title_color=Fore.GREEN
-            )
-            if CFG.speak_mode and CFG.debug_mode:
-                say_text("Apparently json was fixed.")
-        else:
-            return {}
-
-    except (json.JSONDecodeError, ValueError):
-        if CFG.debug_mode:
-            logger.error(f"Error: Invalid JSON: {json_string}\n")
-        if CFG.speak_mode:
-            say_text("Didn't work. I will have to ignore this response then.")
-        logger.error("Error: Invalid JSON, setting it to empty JSON now.\n")
-        json_string = {}
-
-    return fix_and_parse_json(json_string)
diff --git a/autogpt/json_utils/llm_response_format_1.json b/autogpt/json_utils/llm_response_format_1.json
index 9aa33352511..17101dda110 100644
--- a/autogpt/json_utils/llm_response_format_1.json
+++ b/autogpt/json_utils/llm_response_format_1.json
@@ -5,11 +5,25 @@
         "thoughts": {
             "type": "object",
             "properties": {
-                "text": {"type": "string"},
-                "reasoning": {"type": "string"},
-                "plan": {"type": "string"},
-                "criticism": {"type": "string"},
-                "speak": {"type": "string"}
+                "text": {
+                    "type": "string",
+                    "description": "thoughts"
+                },
+                "reasoning": {
+                    "type": "string"
+                },
+                "plan": {
+                    "type": "string",
+                    "description": "- short bulleted\n- list that conveys\n- long-term plan"
+                },
+                "criticism": {
+                    "type": "string",
+                    "description": "constructive self-criticism"
+                },
+                "speak": {
+                    "type": "string",
+                    "description": "thoughts summary to say to user"
+                }
             },
             "required": ["text", "reasoning", "plan", "criticism", "speak"],
             "additionalProperties": false
diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index 933de8e9307..0184887192f 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -1,7 +1,8 @@
 """Utilities for the json_fixes package."""
+import ast
 import json
 import os.path
-import re
+from typing import Any
 
 from jsonschema import Draft7Validator
 
@@ -12,37 +13,47 @@
 LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"
 
 
-def extract_char_position(error_message: str) -> int:
-    """Extract the character position from the JSONDecodeError message.
+def extract_json_from_response(response_content: str) -> dict:
+    # Sometimes the response includes the JSON in a code block with ```
+    if response_content.startswith("```") and response_content.endswith("```"):
+        # Discard the first and last ```, then re-join in case the response naturally included ```
+        response_content = "```".join(response_content.split("```")[1:-1])
 
-    Args:
-        error_message (str): The error message from the JSONDecodeError
-          exception.
+    # response content comes from OpenAI as a Python `str(content_dict)`, literal_eval reverses this
+    try:
+        return ast.literal_eval(response_content)
+    except BaseException as e:
+        logger.error(f"Error parsing JSON response with literal_eval {e}")
+        # TODO: How to raise an error here without causing the program to exit?
+        return {}
 
-    Returns:
-        int: The character position.
-    """
 
-    char_pattern = re.compile(r"\(char (\d+)\)")
-    if match := char_pattern.search(error_message):
-        return int(match[1])
-    else:
-        raise ValueError("Character position not found in the error message.")
+def llm_response_schema(
+    schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT,
+) -> dict[str, Any]:
+    filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
+    with open(filename, "r") as f:
+        return json.load(f)
 
 
-def validate_json(json_object: object, schema_name: str) -> dict | None:
+def validate_json(
+    json_object: object, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
+) -> bool:
     """
     :type schema_name: object
     :param schema_name: str
     :type json_object: object
+
+    Returns:
+        bool: Whether the json_object is valid or not
     """
-    scheme_file = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
-    with open(scheme_file, "r") as f:
-        schema = json.load(f)
+    schema = llm_response_schema(schema_name)
     validator = Draft7Validator(schema)
 
     if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
-        logger.error("The JSON object is invalid.")
+        for error in errors:
+            logger.error(f"JSON Validation Error: {error}")
+
         if CFG.debug_mode:
             logger.error(
                 json.dumps(json_object, indent=4)
@@ -51,10 +62,11 @@ def validate_json(json_object: object, schema_name: str) -> dict | None:
 
             for error in errors:
                 logger.error(f"Error: {error.message}")
-    else:
-        logger.debug("The JSON object is valid.")
+        return False
+
+    logger.debug("The JSON object is valid.")
 
-    return json_object
+    return True
 
 
 def validate_json_string(json_string: str, schema_name: str) -> dict | None:
@@ -66,7 +78,9 @@ def validate_json_string(json_string: str, schema_name: str) -> dict | None:
 
     try:
         json_loaded = json.loads(json_string)
-        return validate_json(json_loaded, schema_name)
+        if not validate_json(json_loaded, schema_name):
+            return None
+        return json_loaded
     except:
         return None
 
diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index 7442579de86..acc38c44cd2 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -6,8 +6,8 @@
 from openai import Model
 
 from autogpt.config import Config
-from autogpt.llm.base import MessageDict
-from autogpt.llm.modelsinfo import COSTS
+from autogpt.llm.base import CompletionModelInfo, MessageDict
+from autogpt.llm.providers.openai import OPEN_AI_MODELS
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
 
@@ -34,7 +34,7 @@ def create_chat_completion(
         temperature: float = None,
         max_tokens: int | None = None,
         deployment_id=None,
-    ) -> str:
+    ):
         """
         Create a chat completion and update the cost.
         Args:
@@ -83,13 +83,16 @@ def update_cost(self, prompt_tokens, completion_tokens, model: str):
         """
         # the .model property in API responses can contain version suffixes like -v2
         model = model[:-3] if model.endswith("-v2") else model
+        model_info = OPEN_AI_MODELS[model]
 
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
-        self.total_cost += (
-            prompt_tokens * COSTS[model]["prompt"]
-            + completion_tokens * COSTS[model]["completion"]
-        ) / 1000
+        self.total_cost += prompt_tokens * model_info.prompt_token_cost / 1000
+        if issubclass(type(model_info), CompletionModelInfo):
+            self.total_cost += (
+                completion_tokens * model_info.completion_token_cost / 1000
+            )
+
         logger.debug(f"Total running cost: ${self.total_cost:.3f}")
 
     def set_total_budget(self, total_budget):
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index 76bd3db1c8f..43cc0ad93d4 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -31,22 +31,27 @@ class ModelInfo:
 
     Would be lovely to eventually get this directly from APIs, but needs to be scraped from
     websites for now.
-
     """
 
     name: str
+    max_tokens: int
     prompt_token_cost: float
+
+
+@dataclass
+class CompletionModelInfo(ModelInfo):
+    """Struct for generic completion model information."""
+
     completion_token_cost: float
-    max_tokens: int
 
 
 @dataclass
-class ChatModelInfo(ModelInfo):
+class ChatModelInfo(CompletionModelInfo):
     """Struct for chat model information."""
 
 
 @dataclass
-class TextModelInfo(ModelInfo):
+class TextModelInfo(CompletionModelInfo):
     """Struct for text completion model information."""
 
 
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 7cb598256b7..9ed07cb2d4a 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -150,7 +150,7 @@ def chat_with_ai(
         if not plugin.can_handle_on_planning():
             continue
         plugin_response = plugin.on_planning(
-            agent.config.prompt_generator, message_sequence.raw()
+            agent.ai_config.prompt_generator, message_sequence.raw()
         )
         if not plugin_response or plugin_response == "":
             continue
@@ -181,7 +181,7 @@ def chat_with_ai(
         logger.debug("")
     logger.debug("----------- END OF CONTEXT ----------------")
     agent.log_cycle_handler.log_cycle(
-        agent.config.ai_name,
+        agent.ai_name,
         agent.created_at,
         agent.cycle_count,
         message_sequence.raw(),
diff --git a/autogpt/llm/modelsinfo.py b/autogpt/llm/modelsinfo.py
deleted file mode 100644
index 425472dec48..00000000000
--- a/autogpt/llm/modelsinfo.py
+++ /dev/null
@@ -1,11 +0,0 @@
-COSTS = {
-    "gpt-3.5-turbo": {"prompt": 0.002, "completion": 0.002},
-    "gpt-3.5-turbo-0301": {"prompt": 0.002, "completion": 0.002},
-    "gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
-    "gpt-4": {"prompt": 0.03, "completion": 0.06},
-    "gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
-    "gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
-    "gpt-4-32k-0314": {"prompt": 0.06, "completion": 0.12},
-    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "text-davinci-003": {"prompt": 0.02, "completion": 0.02},
-}
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index acaf06719ff..0f24b56e751 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -4,43 +4,60 @@
     info.name: info
     for info in [
         ChatModelInfo(
-            name="gpt-3.5-turbo",
-            prompt_token_cost=0.002,
+            name="gpt-3.5-turbo-0301",
+            prompt_token_cost=0.0015,
             completion_token_cost=0.002,
             max_tokens=4096,
         ),
         ChatModelInfo(
-            name="gpt-3.5-turbo-0301",
-            prompt_token_cost=0.002,
+            name="gpt-3.5-turbo-0613",
+            prompt_token_cost=0.0015,
             completion_token_cost=0.002,
             max_tokens=4096,
         ),
         ChatModelInfo(
-            name="gpt-4",
+            name="gpt-3.5-turbo-16k-0613",
+            prompt_token_cost=0.003,
+            completion_token_cost=0.004,
+            max_tokens=16384,
+        ),
+        ChatModelInfo(
+            name="gpt-4-0314",
             prompt_token_cost=0.03,
             completion_token_cost=0.06,
             max_tokens=8192,
         ),
         ChatModelInfo(
-            name="gpt-4-0314",
+            name="gpt-4-0613",
             prompt_token_cost=0.03,
             completion_token_cost=0.06,
             max_tokens=8192,
         ),
         ChatModelInfo(
-            name="gpt-4-32k",
+            name="gpt-4-32k-0314",
             prompt_token_cost=0.06,
             completion_token_cost=0.12,
             max_tokens=32768,
         ),
         ChatModelInfo(
-            name="gpt-4-32k-0314",
+            name="gpt-4-32k-0613",
             prompt_token_cost=0.06,
             completion_token_cost=0.12,
             max_tokens=32768,
         ),
     ]
 }
+# Set aliases for rolling model IDs
+chat_model_mapping = {
+    "gpt-3.5-turbo": "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
+    "gpt-4": "gpt-4-0314",
+    "gpt-4-32k": "gpt-4-32k-0314",
+}
+for alias, target in chat_model_mapping.items():
+    alias_info = ChatModelInfo(**OPEN_AI_CHAT_MODELS[target].__dict__)
+    alias_info.name = alias
+    OPEN_AI_CHAT_MODELS[alias] = alias_info
 
 OPEN_AI_TEXT_MODELS = {
     info.name: info
@@ -59,8 +76,7 @@
     for info in [
         EmbeddingModelInfo(
             name="text-embedding-ada-002",
-            prompt_token_cost=0.0004,
-            completion_token_cost=0.0,
+            prompt_token_cost=0.0001,
             max_tokens=8191,
             embedding_dimensions=1536,
         ),
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index 756c4bd55de..736745cf89f 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -17,6 +17,7 @@
 
 from ..api_manager import ApiManager
 from ..base import ChatSequence, Message
+from ..providers.openai import OPEN_AI_CHAT_MODELS
 from .token_counter import *
 
 
@@ -205,6 +206,8 @@ def create_chat_completion(
         model = prompt.model.name
     if temperature is None:
         temperature = cfg.temperature
+    if max_tokens is None:
+        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt.token_length
 
     logger.debug(
         f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
@@ -239,7 +242,7 @@ def create_chat_completion(
         max_tokens=max_tokens,
     )
 
-    resp = response.choices[0].message["content"]
+    resp = response.choices[0].message.content
     for plugin in cfg.plugins:
         if not plugin.can_handle_on_response():
             continue
diff --git a/autogpt/llm/utils/token_counter.py b/autogpt/llm/utils/token_counter.py
index bd1dcf1b3b7..e34dbd1cd99 100644
--- a/autogpt/llm/utils/token_counter.py
+++ b/autogpt/llm/utils/token_counter.py
@@ -24,32 +24,28 @@ def count_message_tokens(
     Returns:
         int: The number of tokens used by the list of messages.
     """
-    try:
-        encoding = tiktoken.encoding_for_model(model)
-    except KeyError:
-        logger.warn("Warning: model not found. Using cl100k_base encoding.")
-        encoding = tiktoken.get_encoding("cl100k_base")
-    if model == "gpt-3.5-turbo":
-        # !Note: gpt-3.5-turbo may change over time.
-        # Returning num tokens assuming gpt-3.5-turbo-0301.")
-        return count_message_tokens(messages, model="gpt-3.5-turbo-0301")
-    elif model == "gpt-4":
-        # !Note: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.")
-        return count_message_tokens(messages, model="gpt-4-0314")
-    elif model == "gpt-3.5-turbo-0301":
+    if model.startswith("gpt-3.5-turbo"):
         tokens_per_message = (
             4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
         )
         tokens_per_name = -1  # if there's a name, the role is omitted
-    elif model == "gpt-4-0314":
+        encoding_model = "gpt-3.5-turbo"
+    elif model.startswith("gpt-4"):
         tokens_per_message = 3
         tokens_per_name = 1
+        encoding_model = "gpt-4"
     else:
         raise NotImplementedError(
-            f"num_tokens_from_messages() is not implemented for model {model}.\n"
+            f"count_message_tokens() is not implemented for model {model}.\n"
             " See https://github.com/openai/openai-python/blob/main/chatml.md for"
             " information on how messages are converted to tokens."
         )
+    try:
+        encoding = tiktoken.encoding_for_model(encoding_model)
+    except KeyError:
+        logger.warn("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+
     num_tokens = 0
     for message in messages:
         num_tokens += tokens_per_message
diff --git a/autogpt/log_cycle/log_cycle.py b/autogpt/log_cycle/log_cycle.py
index 8daed25c465..ebceb57ef79 100644
--- a/autogpt/log_cycle/log_cycle.py
+++ b/autogpt/log_cycle/log_cycle.py
@@ -34,7 +34,7 @@ def create_outer_directory(self, ai_name: str, created_at: str) -> str:
         if os.environ.get("OVERWRITE_DEBUG") == "1":
             outer_folder_name = "auto_gpt"
         else:
-            ai_name_short = ai_name[:15] if ai_name else DEFAULT_PREFIX
+            ai_name_short = self.get_agent_short_name(ai_name)
             outer_folder_name = f"{created_at}_{ai_name_short}"
 
         outer_folder_path = os.path.join(log_directory, "DEBUG", outer_folder_name)
@@ -42,6 +42,9 @@ def create_outer_directory(self, ai_name: str, created_at: str) -> str:
 
         return outer_folder_path
 
+    def get_agent_short_name(self, ai_name):
+        return ai_name[:15].rstrip() if ai_name else DEFAULT_PREFIX
+
     def create_inner_directory(self, outer_folder_path: str, cycle_count: int) -> str:
         nested_folder_name = str(cycle_count).zfill(3)
         nested_folder_path = os.path.join(outer_folder_path, nested_folder_name)
diff --git a/autogpt/main.py b/autogpt/main.py
index efc70aae27f..ab0a1533b00 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -189,9 +189,10 @@ def run_auto_gpt(
         memory=memory,
         next_action_count=next_action_count,
         command_registry=command_registry,
-        config=ai_config,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
         workspace_directory=workspace_directory,
+        ai_config=ai_config,
+        config=cfg,
     )
     agent.start_interaction_loop()
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index fcb96a94d13..be524125402 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -11,10 +11,12 @@
 from autogpt.config import Config
 from autogpt.json_utils.utilities import (
     LLM_DEFAULT_RESPONSE_FORMAT,
+    extract_json_from_response,
     is_string_valid_json,
 )
 from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
-from autogpt.llm.utils import create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.llm.utils import count_string_tokens, create_chat_completion
 from autogpt.log_cycle.log_cycle import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME
 from autogpt.logs import logger
 
@@ -152,13 +154,14 @@ def update_running_summary(self, new_events: list[Message]) -> Message:
 
                 # Remove "thoughts" dictionary from "content"
                 try:
-                    content_dict = json.loads(event.content)
+                    content_dict = extract_json_from_response(event.content)
                     if "thoughts" in content_dict:
                         del content_dict["thoughts"]
                     event.content = json.dumps(content_dict)
-                except json.decoder.JSONDecodeError:
+                except json.JSONDecodeError as e:
+                    logger.error(f"Error: Invalid JSON: {e}")
                     if cfg.debug_mode:
-                        logger.error(f"Error: Invalid JSON: {event.content}\n")
+                        logger.error(f"{event.content}")
 
             elif event.role.lower() == "system":
                 event.role = "your computer"
@@ -167,9 +170,45 @@ def update_running_summary(self, new_events: list[Message]) -> Message:
             elif event.role == "user":
                 new_events.remove(event)
 
+        # Summarize events and current summary in batch to a new running summary
+
+        # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
+        # TODO make this default dynamic
+        prompt_template_length = 100
+        max_tokens = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
+        summary_tlength = count_string_tokens(str(self.summary), cfg.fast_llm_model)
+        batch = []
+        batch_tlength = 0
+
+        # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
+        for event in new_events:
+            event_tlength = count_string_tokens(str(event), cfg.fast_llm_model)
+
+            if (
+                batch_tlength + event_tlength
+                > max_tokens - prompt_template_length - summary_tlength
+            ):
+                # The batch is full. Summarize it and start a new one.
+                self.summarize_batch(batch, cfg)
+                summary_tlength = count_string_tokens(
+                    str(self.summary), cfg.fast_llm_model
+                )
+                batch = [event]
+                batch_tlength = event_tlength
+            else:
+                batch.append(event)
+                batch_tlength += event_tlength
+
+        if batch:
+            # There's an unprocessed batch. Summarize it.
+            self.summarize_batch(batch, cfg)
+
+        return self.summary_message()
+
+    def summarize_batch(self, new_events_batch, cfg):
         prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
-You will receive the current summary and the your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
+You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
 
 Summary So Far:
 """
@@ -178,13 +217,13 @@ def update_running_summary(self, new_events: list[Message]) -> Message:
 
 Latest Development:
 """
-{new_events or "Nothing new happened."}
+{new_events_batch or "Nothing new happened."}
 """
 '''
 
         prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
         self.agent.log_cycle_handler.log_cycle(
-            self.agent.config.ai_name,
+            self.agent.ai_name,
             self.agent.created_at,
             self.agent.cycle_count,
             prompt.raw(),
@@ -194,11 +233,9 @@ def update_running_summary(self, new_events: list[Message]) -> Message:
         self.summary = create_chat_completion(prompt)
 
         self.agent.log_cycle_handler.log_cycle(
-            self.agent.config.ai_name,
+            self.agent.ai_name,
             self.agent.created_at,
             self.agent.cycle_count,
             self.summary,
             SUMMARY_FILE_NAME,
         )
-
-        return self.summary_message()
diff --git a/autogpt/plugins.py b/autogpt/plugins/__init__.py
similarity index 77%
rename from autogpt/plugins.py
rename to autogpt/plugins/__init__.py
index f36ba36e0f1..4d84c9b5e87 100644
--- a/autogpt/plugins.py
+++ b/autogpt/plugins/__init__.py
@@ -1,8 +1,10 @@
 """Handles loading of plugins."""
 
 import importlib.util
+import inspect
 import json
 import os
+import sys
 import zipfile
 from pathlib import Path
 from typing import List
@@ -14,10 +16,14 @@
 from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from openapi_python_client.config import Config as OpenAPIConfig
 
-from autogpt.config import Config
+from autogpt.config.config import Config
 from autogpt.logs import logger
 from autogpt.models.base_open_ai_plugin import BaseOpenAIPlugin
 
+DEFAULT_PLUGINS_CONFIG_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "..", "..", "plugins_config.yaml"
+)
+
 
 def inspect_zip_for_modules(zip_path: str, debug: bool = False) -> list[str]:
     """
@@ -213,10 +219,33 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
     loaded_plugins = []
     # Generic plugins
     plugins_path_path = Path(cfg.plugins_dir)
+    plugins_config = cfg.plugins_config
+
+    # Directory-based plugins
+    for plugin_path in [f.path for f in os.scandir(cfg.plugins_dir) if f.is_dir()]:
+        # Avoid going into __pycache__ or other hidden directories
+        if plugin_path.startswith("__"):
+            continue
+
+        plugin_module_path = plugin_path.split(os.path.sep)
+        plugin_module_name = plugin_module_path[-1]
+        qualified_module_name = ".".join(plugin_module_path)
+
+        __import__(qualified_module_name)
+        plugin = sys.modules[qualified_module_name]
+
+        if not plugins_config.is_enabled(plugin_module_name):
+            logger.warn(f"Plugin {plugin_module_name} found but not configured")
+            continue
 
-    logger.debug(f"Allowlisted Plugins: {cfg.plugins_allowlist}")
-    logger.debug(f"Denylisted Plugins: {cfg.plugins_denylist}")
+        for _, class_obj in inspect.getmembers(plugin):
+            if (
+                hasattr(class_obj, "_abc_impl")
+                and AutoGPTPluginTemplate in class_obj.__bases__
+            ):
+                loaded_plugins.append(class_obj())
 
+    # Zip-based plugins
     for plugin in plugins_path_path.glob("*.zip"):
         if moduleList := inspect_zip_for_modules(str(plugin), debug):
             for module in moduleList:
@@ -225,6 +254,7 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                 logger.debug(f"Plugin: {plugin} Module: {module}")
                 zipped_package = zipimporter(str(plugin))
                 zipped_module = zipped_package.load_module(str(module.parent))
+
                 for key in dir(zipped_module):
                     if key.startswith("__"):
                         continue
@@ -233,9 +263,28 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                     if (
                         "_abc_impl" in a_keys
                         and a_module.__name__ != "AutoGPTPluginTemplate"
-                        and denylist_allowlist_check(a_module.__name__, cfg)
                     ):
-                        loaded_plugins.append(a_module())
+                        plugin_name = a_module.__name__
+                        plugin_configured = plugins_config.get(plugin_name) is not None
+                        plugin_enabled = plugins_config.is_enabled(plugin_name)
+
+                        if plugin_configured and plugin_enabled:
+                            logger.debug(
+                                f"Loading plugin {plugin_name} as it was enabled in config."
+                            )
+                            loaded_plugins.append(a_module())
+                        elif plugin_configured and not plugin_enabled:
+                            logger.debug(
+                                f"Not loading plugin {plugin_name} as it was disabled in config."
+                            )
+                        elif not plugin_configured:
+                            logger.warn(
+                                f"Not loading plugin {plugin_name} as it was not found in config. "
+                                f"Please check your config. Starting with 0.4.1, plugins will not be loaded unless "
+                                f"they are enabled in plugins_config.yaml. Zipped plugins should use the class "
+                                f"name ({plugin_name}) as the key."
+                            )
+
     # OpenAI plugins
     if cfg.plugins_openai:
         manifests_specs = fetch_openai_plugins_manifest_and_spec(cfg)
@@ -244,40 +293,15 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                 manifests_specs, cfg, debug
             )
             for url, openai_plugin_meta in manifests_specs_clients.items():
-                if denylist_allowlist_check(url, cfg):
-                    plugin = BaseOpenAIPlugin(openai_plugin_meta)
-                    loaded_plugins.append(plugin)
+                if not plugins_config.is_enabled(url):
+                    logger.warn(f"Plugin {plugin_module_name} found but not configured")
+                    continue
+
+                plugin = BaseOpenAIPlugin(openai_plugin_meta)
+                loaded_plugins.append(plugin)
 
     if loaded_plugins:
         logger.info(f"\nPlugins found: {len(loaded_plugins)}\n" "--------------------")
     for plugin in loaded_plugins:
         logger.info(f"{plugin._name}: {plugin._version} - {plugin._description}")
     return loaded_plugins
-
-
-def denylist_allowlist_check(plugin_name: str, cfg: Config) -> bool:
-    """Check if the plugin is in the allowlist or denylist.
-
-    Args:
-        plugin_name (str): Name of the plugin.
-        cfg (Config): Config object.
-
-    Returns:
-        True or False
-    """
-    logger.debug(f"Checking if plugin {plugin_name} should be loaded")
-    if (
-        plugin_name in cfg.plugins_denylist
-        or "all" in cfg.plugins_denylist
-        or "none" in cfg.plugins_allowlist
-    ):
-        logger.debug(f"Not loading plugin {plugin_name} as it was in the denylist.")
-        return False
-    if plugin_name in cfg.plugins_allowlist or "all" in cfg.plugins_allowlist:
-        logger.debug(f"Loading plugin {plugin_name} as it was in the allowlist.")
-        return True
-    ack = input(
-        f"WARNING: Plugin {plugin_name} found. But not in the"
-        f" allowlist... Load? ({cfg.authorise_key}/{cfg.exit_key}): "
-    )
-    return ack.lower() == cfg.authorise_key
diff --git a/autogpt/plugins/plugin_config.py b/autogpt/plugins/plugin_config.py
new file mode 100644
index 00000000000..53a83b166c3
--- /dev/null
+++ b/autogpt/plugins/plugin_config.py
@@ -0,0 +1,14 @@
+from typing import Any
+
+
+class PluginConfig:
+    """Class for holding configuration of a single plugin"""
+
+    def __init__(self, name: str, enabled: bool = False, config: dict[str, Any] = None):
+        self.name = name
+        self.enabled = enabled
+        # Arbitray config options for this plugin. API keys or plugin-specific options live here.
+        self.config = config or {}
+
+    def __repr__(self):
+        return f"PluginConfig('{self.name}', {self.enabled}, {str(self.config)}"
diff --git a/autogpt/plugins/plugins_config.py b/autogpt/plugins/plugins_config.py
new file mode 100644
index 00000000000..7e04e79533d
--- /dev/null
+++ b/autogpt/plugins/plugins_config.py
@@ -0,0 +1,81 @@
+import os
+from typing import Any, Union
+
+import yaml
+
+from autogpt.config.config import Config
+from autogpt.logs import logger
+from autogpt.plugins.plugin_config import PluginConfig
+
+
+class PluginsConfig:
+    """Class for holding configuration of all plugins"""
+
+    def __init__(self, plugins_config: dict[str, Any]):
+        self.plugins = {}
+        for name, plugin in plugins_config.items():
+            if type(plugin) == dict:
+                self.plugins[name] = PluginConfig(
+                    name,
+                    plugin.get("enabled", False),
+                    plugin.get("config", {}),
+                )
+            elif type(plugin) == PluginConfig:
+                self.plugins[name] = plugin
+            else:
+                raise ValueError(f"Invalid plugin config data type: {type(plugin)}")
+
+    def __repr__(self):
+        return f"PluginsConfig({self.plugins})"
+
+    def get(self, name: str) -> Union[PluginConfig, None]:
+        return self.plugins.get(name)
+
+    def is_enabled(self, name) -> bool:
+        plugin_config = self.plugins.get(name)
+        return plugin_config and plugin_config.enabled
+
+    @classmethod
+    def load_config(cls, global_config: Config) -> "PluginsConfig":
+        empty_config = cls({})
+
+        try:
+            config_data = cls.deserialize_config_file(global_config=global_config)
+            if type(config_data) != dict:
+                logger.error(
+                    f"Expected plugins config to be a dict, got {type(config_data)}, continuing without plugins"
+                )
+                return empty_config
+            return cls(config_data)
+
+        except BaseException as e:
+            logger.error(
+                f"Plugin config is invalid, continuing without plugins. Error: {e}"
+            )
+            return empty_config
+
+    @classmethod
+    def deserialize_config_file(cls, global_config: Config) -> dict[str, Any]:
+        plugins_config_path = global_config.plugins_config_file
+        if not os.path.exists(plugins_config_path):
+            logger.warn("plugins_config.yaml does not exist, creating base config.")
+            cls.create_empty_plugins_config(global_config=global_config)
+
+        with open(plugins_config_path, "r") as f:
+            return yaml.load(f, Loader=yaml.FullLoader)
+
+    @staticmethod
+    def create_empty_plugins_config(global_config: Config):
+        """Create an empty plugins_config.yaml file. Fill it with values from old env variables."""
+        base_config = {}
+
+        # Backwards-compatibility shim
+        for plugin_name in global_config.plugins_denylist:
+            base_config[plugin_name] = {"enabled": False, "config": {}}
+
+        for plugin_name in global_config.plugins_allowlist:
+            base_config[plugin_name] = {"enabled": True, "config": {}}
+
+        with open(global_config.plugins_config_file, "w+") as f:
+            f.write(yaml.dump(base_config))
+            return base_config
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index adf6489432b..7101acfea11 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -1,7 +1,8 @@
 """ A module for generating custom prompt strings."""
-import json
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 
+from autogpt.json_utils.utilities import llm_response_schema
+
 if TYPE_CHECKING:
     from autogpt.commands.command import CommandRegistry
 
@@ -25,16 +26,6 @@ def __init__(self) -> None:
         self.command_registry: CommandRegistry | None = None
         self.name = "Bob"
         self.role = "AI"
-        self.response_format = {
-            "thoughts": {
-                "text": "thought",
-                "reasoning": "reasoning",
-                "plan": "- short bulleted\n- list that conveys\n- long-term plan",
-                "criticism": "constructive self-criticism",
-                "speak": "thoughts summary to say to user",
-            },
-            "command": {"name": "command name", "args": {"arg name": "value"}},
-        }
 
     def add_constraint(self, constraint: str) -> None:
         """
@@ -144,7 +135,6 @@ def generate_prompt_string(self) -> str:
         Returns:
             str: The generated prompt string.
         """
-        formatted_response_format = json.dumps(self.response_format, indent=4)
         return (
             f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
             "Commands:\n"
@@ -152,7 +142,6 @@ def generate_prompt_string(self) -> str:
             f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
             "Performance Evaluation:\n"
             f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
-            "You should only respond in JSON format as described below \nResponse"
-            f" Format: \n{formatted_response_format} \nEnsure the response can be"
-            " parsed by Python json.loads"
+            "Respond with only valid JSON conforming to the following schema: \n"
+            f"{llm_response_schema()}\n"
         )
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index eeeea3f992f..17d78bd1cbd 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -11,9 +11,7 @@
 
 CFG = Config()
 
-DEFAULT_TRIGGERING_PROMPT = (
-    "Determine which next command to use, and respond using the format specified above:"
-)
+DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the JSON schema specified previously:"
 
 
 def build_default_prompt_generator() -> PromptGenerator:
diff --git a/autogpt/speech/base.py b/autogpt/speech/base.py
index a7570d94560..07c8d9fe7bf 100644
--- a/autogpt/speech/base.py
+++ b/autogpt/speech/base.py
@@ -1,5 +1,6 @@
 """Base class for all voice classes."""
 import abc
+import re
 from threading import Lock
 
 from autogpt.singleton import AbstractSingleton
@@ -29,6 +30,11 @@ def say(self, text: str, voice_index: int = 0) -> bool:
             text (str): The text to say.
             voice_index (int): The index of the voice to use.
         """
+        text = re.sub(
+            r"\b(?:https?://[-\w_.]+/?\w[-\w_.]*\.(?:[-\w_.]+/?\w[-\w_.]*\.)?[a-z]+(?:/[-\w_.%]+)*\b(?!\.))",
+            "",
+            text,
+        )
         with self._mutex:
             return self._speech(text, voice_index)
 
diff --git a/autogpt/speech/eleven_labs.py b/autogpt/speech/eleven_labs.py
index c1e3aff52e5..5952508dfb4 100644
--- a/autogpt/speech/eleven_labs.py
+++ b/autogpt/speech/eleven_labs.py
@@ -38,11 +38,11 @@ def _setup(self) -> None:
             "xi-api-key": cfg.elevenlabs_api_key,
         }
         self._voices = default_voices.copy()
-        if cfg.elevenlabs_voice_1_id in voice_options:
-            cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
+        if cfg.elevenlabs_voice_id in voice_options:
+            cfg.elevenlabs_voice_id = voice_options[cfg.elevenlabs_voice_id]
         if cfg.elevenlabs_voice_2_id in voice_options:
             cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
-        self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
+        self._use_custom_voice(cfg.elevenlabs_voice_id, 0)
         self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
 
     def _use_custom_voice(self, voice, voice_index) -> None:
diff --git a/autogpt/speech/say.py b/autogpt/speech/say.py
index 4cc82e198ad..06f580f0c0b 100644
--- a/autogpt/speech/say.py
+++ b/autogpt/speech/say.py
@@ -4,10 +4,10 @@
 
 from autogpt.config.config import Config
 from autogpt.speech.base import VoiceBase
-from autogpt.speech.brian import BrianSpeech
 from autogpt.speech.eleven_labs import ElevenLabsSpeech
 from autogpt.speech.gtts import GTTSVoice
 from autogpt.speech.macos_tts import MacOSTTS
+from autogpt.speech.stream_elements_speech import StreamElementsSpeech
 
 _QUEUE_SEMAPHORE = Semaphore(
     1
@@ -33,14 +33,14 @@ def speak() -> None:
 
 def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
     """Get the voice engine to use for the given configuration"""
-    default_voice_engine = GTTSVoice()
-    if config.elevenlabs_api_key:
+    tts_provider = config.text_to_speech_provider
+    if tts_provider == "elevenlabs":
         voice_engine = ElevenLabsSpeech()
-    elif config.use_mac_os_tts == "True":
+    elif tts_provider == "macos":
         voice_engine = MacOSTTS()
-    elif config.use_brian_tts == "True":
-        voice_engine = BrianSpeech()
+    elif tts_provider == "streamelements":
+        voice_engine = StreamElementsSpeech()
     else:
         voice_engine = GTTSVoice()
 
-    return default_voice_engine, voice_engine
+    return GTTSVoice(), voice_engine
diff --git a/autogpt/speech/brian.py b/autogpt/speech/stream_elements_speech.py
similarity index 77%
rename from autogpt/speech/brian.py
rename to autogpt/speech/stream_elements_speech.py
index f63c206b220..9019cf0954f 100644
--- a/autogpt/speech/brian.py
+++ b/autogpt/speech/stream_elements_speech.py
@@ -7,23 +7,24 @@
 from autogpt.speech.base import VoiceBase
 
 
-class BrianSpeech(VoiceBase):
-    """Brian speech module for autogpt"""
+class StreamElementsSpeech(VoiceBase):
+    """Streamelements speech module for autogpt"""
 
     def _setup(self) -> None:
         """Setup the voices, API key, etc."""
 
-    def _speech(self, text: str, _: int = 0) -> bool:
-        """Speak text using Brian with the streamelements API
+    def _speech(self, text: str, voice: str, _: int = 0) -> bool:
+        """Speak text using the streamelements API
 
         Args:
             text (str): The text to speak
+            voice (str): The voice to use
 
         Returns:
             bool: True if the request was successful, False otherwise
         """
         tts_url = (
-            f"https://api.streamelements.com/kappa/v2/speech?voice=Brian&text={text}"
+            f"https://api.streamelements.com/kappa/v2/speech?voice={voice}&text={text}"
         )
         response = requests.get(tts_url)
 
diff --git a/autogpt/utils.py b/autogpt/utils.py
index 653841a2329..91e570a0f5a 100644
--- a/autogpt/utils.py
+++ b/autogpt/utils.py
@@ -5,10 +5,14 @@
 import yaml
 from colorama import Fore, Style
 from git.repo import Repo
+from prompt_toolkit import ANSI, PromptSession
+from prompt_toolkit.history import InMemoryHistory
 
 from autogpt.config import Config
 from autogpt.logs import logger
 
+session = PromptSession(history=InMemoryHistory())
+
 
 def batch(iterable, max_batch_length: int, overlap: int = 0):
     """Batch data from iterable into slices of length N. The last batch may be shorter."""
@@ -52,7 +56,7 @@ def clean_input(prompt: str = "", talk=False):
 
         # ask for input, default when just pressing Enter is y
         logger.info("Asking user via keyboard...")
-        answer = input(prompt)
+        answer = session.prompt(ANSI(prompt))
         return answer
     except KeyboardInterrupt:
         logger.info("You interrupted Auto-GPT")
diff --git a/docs/challenges/building_challenges.md b/docs/challenges/building_challenges.md
index 09ab3bf513b..0c3d89ac350 100644
--- a/docs/challenges/building_challenges.md
+++ b/docs/challenges/building_challenges.md
@@ -70,7 +70,7 @@ def kubernetes_agent(
 ```
 
 ## Creating your challenge
-Go to `tests/integration/challenges`and create a file that is called `test_your_test_description.py` and add it to the appropriate folder. If no category exists you can create a new one.
+Go to `tests/challenges`and create a file that is called `test_your_test_description.py` and add it to the appropriate folder. If no category exists you can create a new one.
 
 Your test could look something like this 
 
@@ -84,7 +84,7 @@ import yaml
 
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.integration.agent_utils import run_interaction_loop
-from tests.integration.challenges.utils import run_multiple_times
+from tests.challenges.utils import run_multiple_times
 from tests.utils import requires_api_key
 
 
@@ -111,7 +111,7 @@ def test_information_retrieval_challenge_a(kubernetes_agent, monkeypatch) -> Non
     """
     input_sequence = ["s", "s", "s", "s", "s", "EXIT"]
     gen = input_generator(input_sequence)
-    monkeypatch.setattr("builtins.input", lambda _: next(gen))
+    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
 
     with contextlib.suppress(SystemExit):
         run_interaction_loop(kubernetes_agent, None)
diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md
index de21066ea55..bf1b7b104bd 100644
--- a/docs/challenges/information_retrieval/challenge_a.md
+++ b/docs/challenges/information_retrieval/challenge_a.md
@@ -5,7 +5,7 @@
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
+pytest -s tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
 ```
 
 ## Description
diff --git a/docs/challenges/information_retrieval/challenge_b.md b/docs/challenges/information_retrieval/challenge_b.md
index bf77a984f64..f4e68a151ed 100644
--- a/docs/challenges/information_retrieval/challenge_b.md
+++ b/docs/challenges/information_retrieval/challenge_b.md
@@ -5,7 +5,7 @@
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+pytest -s tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
 ```
 
 ## Description
diff --git a/docs/challenges/memory/challenge_b.md b/docs/challenges/memory/challenge_b.md
index 49c7c40f0a9..abc6da6bef8 100644
--- a/docs/challenges/memory/challenge_b.md
+++ b/docs/challenges/memory/challenge_b.md
@@ -4,7 +4,7 @@
 
 **Command to try**: 
 ```
-pytest -s tests/integration/challenges/memory/test_memory_challenge_b.py --level=3
+pytest -s tests/challenges/memory/test_memory_challenge_b.py --level=3
 ``
 
 ## Description
@@ -41,4 +41,3 @@ Write all the task_ids into the file output.txt. The file has not been created y
 ## Objective
 
 The objective of this challenge is to test the agent's ability to follow instructions and maintain memory of the task IDs throughout the process. The agent successfully completed this challenge if it wrote the task ids in a file.
-
diff --git a/docs/challenges/memory/challenge_c.md b/docs/challenges/memory/challenge_c.md
index fd02a4a56c6..e197ddbd2c6 100644
--- a/docs/challenges/memory/challenge_c.md
+++ b/docs/challenges/memory/challenge_c.md
@@ -4,7 +4,7 @@
 
 **Command to try**: 
 ```
-pytest -s tests/integration/challenges/memory/test_memory_challenge_c.py --level=2
+pytest -s tests/challenges/memory/test_memory_challenge_c.py --level=2
 ``
 
 ## Description
diff --git a/docs/challenges/memory/challenge_d.md b/docs/challenges/memory/challenge_d.md
new file mode 100644
index 00000000000..7563cce5939
--- /dev/null
+++ b/docs/challenges/memory/challenge_d.md
@@ -0,0 +1,75 @@
+# Memory Challenge C
+
+**Status**: Current level to beat: level 1
+
+**Command to try**: 
+```
+pytest -s tests/challenges/memory/test_memory_challenge_d.py --level=1
+``
+
+## Description
+
+The provided code is a unit test designed to validate an AI's ability to track events and beliefs of characters in a story involving moving objects, specifically marbles. This scenario is an advanced form of the classic "Sally-Anne test", a psychological test used to measure a child's social cognitive ability to understand that others' perspectives and beliefs may differ from their own.
+
+Here is an explanation of the challenge:
+
+The AI is given a series of events involving characters Sally, Anne, Bob, and Charlie, and the movements of different marbles. These events are designed as tests at increasing levels of complexity.
+
+For each level, the AI is expected to keep track of the events and the resulting beliefs of each character about the locations of each marble. These beliefs are affected by whether the character was inside or outside the room when events occurred, as characters inside the room are aware of the actions, while characters outside the room aren't.
+
+After the AI processes the events and generates the beliefs of each character, it writes these beliefs to an output file in JSON format.
+
+The check_beliefs function then checks the AI's beliefs against the expected beliefs for that level. The expected beliefs are predefined and represent the correct interpretation of the events for each level.
+
+If the AI's beliefs match the expected beliefs, it means the AI has correctly interpreted the events and the perspectives of each character. This would indicate that the AI has passed the test for that level.
+
+The test runs for levels up to the maximum level that the AI has successfully beaten, or up to a user-selected level.
+
+
+## Files
+
+- `instructions_1.txt`
+
+"Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).",
+
+
+- `instructions_2.txt`
+
+"Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.",
+
+
+...and so on.
+
+- `instructions_n.txt`
+
+The expected believes of every characters are given in a list:
+
+expected_beliefs = {
+    1: {
+        'Sally': {
+            'marble A': 'basket S',
+        },
+        'Anne': {
+            'marble A': 'basket A',
+        }
+    },
+    2: {
+        'Sally': {
+            'marble A': 'sofa',  # Because Charlie told her
+        },
+        'Anne': {
+            'marble A': 'green box',  # Because she moved it there
+            'marble B': 'basket A',  # Because Bob put it there and she was in the room
+        },
+        'Bob': {
+            'B': 'basket A',  # Last place he put it
+        },
+        'Charlie': {
+            'A': 'sofa',  # Because Anne told him to tell Sally so
+        }
+    },...
+
+
+## Objective
+
+This test essentially checks if an AI can accurately model and track the beliefs of different characters based on their knowledge of events, which is a critical aspect of understanding and generating human-like narratives. This ability would be beneficial for tasks such as writing stories, dialogue systems, and more.
diff --git a/docs/configuration/options.md b/docs/configuration/options.md
new file mode 100644
index 00000000000..b2cbf6bc76b
--- /dev/null
+++ b/docs/configuration/options.md
@@ -0,0 +1,53 @@
+# Configuration
+
+Configuration is controlled through the `Config` object. You can set configuration variables via the `.env` file. If you don't have a `.env` file, create a copy of `.env.template` in your `Auto-GPT` folder and name it `.env`.
+
+## Environment Variables
+
+- `AI_SETTINGS_FILE`: Location of AI Settings file. Default: ai_settings.yaml
+- `AUDIO_TO_TEXT_PROVIDER`: Audio To Text Provider. Only option currently is `huggingface`. Default: huggingface
+- `AUTHORISE_COMMAND_KEY`: Key response accepted when authorising commands. Default: y
+- `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
+- `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
+- `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
+- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.analyze_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
+- `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
+- `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
+- `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: text-embedding-ada-002
+- `EXECUTE_LOCAL_COMMANDS`: If shell commands should be executed locally. Default: False
+- `EXIT_KEY`: Exit key accepted to exit. Default: n
+- `FAST_LLM_MODEL`: LLM Model to use for most tasks. Default: gpt-3.5-turbo
+- `GITHUB_API_KEY`: [Github API Key](https://github.com/settings/tokens). Optional.
+- `GITHUB_USERNAME`: GitHub Username. Optional.
+- `GOOGLE_API_KEY`: Google API key. Optional.
+- `GOOGLE_CUSTOM_SEARCH_ENGINE_ID`: [Google custom search engine ID](https://programmablesearchengine.google.com/controlpanel/all). Optional.
+- `HEADLESS_BROWSER`: Use a headless browser while Auto-GPT uses a web browser. Setting to `False` will allow you to see Auto-GPT operate the browser. Default: True
+- `HUGGINGFACE_API_TOKEN`: HuggingFace API, to be used for both image generation and audio to text. Optional.
+- `HUGGINGFACE_AUDIO_TO_TEXT_MODEL`: HuggingFace audio to text model. Default: CompVis/stable-diffusion-v1-4
+- `HUGGINGFACE_IMAGE_MODEL`: HuggingFace model to use for image generation. Default: CompVis/stable-diffusion-v1-4
+- `IMAGE_PROVIDER`: Image provider. Options are `dalle`, `huggingface`, and `sdwebui`. Default: dalle
+- `IMAGE_SIZE`: Default size of image to generate. Default: 256
+- `MEMORY_BACKEND`: Memory back-end to use. Currently `json_file` is the only supported and enabled backend. Default: json_file
+- `MEMORY_INDEX`: Value used in the Memory backend for scoping, naming, or indexing. Default: auto-gpt
+- `OPENAI_API_KEY`: *REQUIRED*- Your [OpenAI API Key](https://platform.openai.com/account/api-keys).
+- `OPENAI_ORGANIZATION`: Organization ID in OpenAI. Optional.
+- `PLAIN_OUTPUT`: Plain output, which disables the spinner. Default: False
+- `PLUGINS_CONFIG_FILE`: Path of plugins_config.yaml file. Default: plugins_config.yaml
+- `PROMPT_SETTINGS_FILE`: Location of Prompt Settings file. Default: prompt_settings.yaml
+- `REDIS_HOST`: Redis Host. Default: localhost
+- `REDIS_PASSWORD`: Redis Password. Optional. Default:
+- `REDIS_PORT`: Redis Port. Default: 6379
+- `RESTRICT_TO_WORKSPACE`: The restrict file reading and writing to the workspace directory. Default: True
+- `SD_WEBUI_AUTH`: Stable Diffusion Web UI username:password pair. Optional.
+- `SD_WEBUI_URL`: Stable Diffusion Web UI URL. Default: http://localhost:7860
+- `SHELL_ALLOWLIST`: List of shell commands that ARE allowed to be executed by Auto-GPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `allowlist`. Default: None
+- `SHELL_COMMAND_CONTROL`: Whether to use `allowlist` or `denylist` to determine what shell commands can be executed (Default: denylist)
+- `SHELL_DENYLIST`: List of shell commands that ARE NOT allowed to be executed by Auto-GPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `denylist`. Default: sudo,su
+- `SMART_LLM_MODEL`: LLM Model to use for "smart" tasks. Default: gpt-3.5-turbo
+- `STREAMELEMENTS_VOICE`: StreamElements voice to use. Default: Brian
+- `TEMPERATURE`: Value of temperature given to OpenAI. Value from 0 to 2. Lower is more deterministic, higher is more random. See https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature
+- `TEXT_TO_SPEECH_PROVIDER`: Text to Speech Provider. Options are `gtts`, `macos`, `elevenlabs`, and `streamelements`. Default: gtts
+- `USER_AGENT`: User-Agent given when browsing websites. Default: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
+- `USE_AZURE`: Use Azure's LLM Default: False
+- `USE_WEB_BROWSER`: Which web browser to use. Options are `chrome`, `firefox`, `safari` or `edge` Default: chrome
+- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True
\ No newline at end of file
diff --git a/docs/plugins.md b/docs/plugins.md
index cc4a3299225..74e96f2ecef 100644
--- a/docs/plugins.md
+++ b/docs/plugins.md
@@ -2,6 +2,18 @@
 
 ⚠️💀 **WARNING** 💀⚠️: Review the code of any plugin you use thoroughly, as plugins can execute any Python code, potentially leading to malicious activities, such as stealing your API keys.
 
+To configure plugins, you can create or edit the `plugins_config.yaml` file in the root directory of Auto-GPT. This file allows you to enable or disable plugins as desired. For specific configuration instructions, please refer to the documentation provided for each plugin. The file should be formatted in YAML. Here is an example for your reference:
+
+```yaml
+plugin_a:
+  config:
+    api_key: my-api-key
+  enabled: false
+plugin_b:
+  config: {}
+  enabled: true
+```
+
 See our [Plugins Repo](https://github.com/Significant-Gravitas/Auto-GPT-Plugins) for more info on how to install all the amazing plugins the community has built!
 
 Alternatively, developers can use the [Auto-GPT Plugin Template](https://github.com/Significant-Gravitas/Auto-GPT-Plugin-Template) as a starting point for creating your own plugins.
diff --git a/docs/setup.md b/docs/setup.md
index 257e07c1157..e894ebe2c9a 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -172,7 +172,7 @@ If you need to upgrade Docker Compose to a newer version, you can follow the ins
 
 Once you have a recent version of docker-compose, run the commands below in your Auto-GPT folder.
 
-1. Build the image. If you have pulled the image from Docker Hub, skip this step (NOTE: You *will* need to do this if you are modifying requirements.txt to add/remove depedencies like Python libs/frameworks) 
+1. Build the image. If you have pulled the image from Docker Hub, skip this step (NOTE: You *will* need to do this if you are modifying requirements.txt to add/remove dependencies like Python libs/frameworks) 
 
         :::shell
         docker-compose build auto-gpt
diff --git a/mkdocs.yml b/mkdocs.yml
index 48fa0cb51db..50e062571c2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -25,6 +25,7 @@ nav:
         - Memory Challenge A: challenges/memory/challenge_a.md
         - Memory Challenge B: challenges/memory/challenge_b.md
         - Memory Challenge C: challenges/memory/challenge_c.md
+        - Memory Challenge D: challenges/memory/challenge_d.md
       - Information retrieval:
         - Introduction: challenges/information_retrieval/introduction.md
         - Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md
diff --git a/mypy.ini b/mypy.ini
index b977deb04f4..275cd2602b4 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,7 +2,7 @@
 follow_imports = skip
 check_untyped_defs = True
 disallow_untyped_defs = True
-files = tests/integration/challenges/**/*.py
+files = tests/challenges/**/*.py
 
 [mypy-requests.*]
 ignore_missing_imports = True
diff --git a/netlify.toml b/netlify.toml
new file mode 100644
index 00000000000..43e79f0fd15
--- /dev/null
+++ b/netlify.toml
@@ -0,0 +1,6 @@
+# Netlify config for Auto-GPT docs
+
+[build]
+  publish = "public/"
+  command = "mkdocs build -d public"
+  ignore = "git diff --quiet HEAD^ HEAD docs mkdocs.yml CONTRIBUTING.md CODE_OF_CONDUCT.md LICENSE"
diff --git a/prompt_settings.yaml b/prompt_settings.yaml
index b8e7c0d2dce..244886b5320 100644
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -14,6 +14,5 @@ performance_evaluations: [
   'Continuously review and analyze your actions to ensure you are performing to the best of your abilities.',
   'Constructively self-criticize your big-picture behavior constantly.',
   'Reflect on past decisions and strategies to refine your approach.',
-  'Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.',
-  'Write all code to a file.'
+  'Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.'
 ]
diff --git a/pyproject.toml b/pyproject.toml
index d695ac084af..d795f53e397 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agpt"
-version = "0.4.0"
+version = "0.4.1"
 authors = [
   { name="Torantulino", email="support@agpt.co" },
 ]
diff --git a/requirements.txt b/requirements.txt
index 31f7706a30f..8c171f8338d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,6 +27,7 @@ click
 charset-normalizer>=3.1.0
 spacy>=3.0.0,<4.0.0
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
+prompt_toolkit>=3.0.38
 
 ##Dev
 coverage
@@ -61,3 +62,4 @@ pytest-mock
 vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
 pytest-recording
 pytest-xdist
+flaky
diff --git a/run.sh b/run.sh
index 6246355572d..287499f8f74 100755
--- a/run.sh
+++ b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 function find_python_command() {
     if command -v python &> /dev/null
@@ -26,4 +26,4 @@ if $PYTHON_CMD -c "import sys; sys.exit(sys.version_info < (3, 10))"; then
     read -p "Press any key to continue..."
 else
     echo "Python 3.10 or higher is required to run Auto GPT."
-fi
\ No newline at end of file
+fi
diff --git a/scripts/install_plugin_deps.py b/scripts/install_plugin_deps.py
index d15c166f7e8..00d9f8a3b2d 100644
--- a/scripts/install_plugin_deps.py
+++ b/scripts/install_plugin_deps.py
@@ -2,6 +2,7 @@
 import subprocess
 import sys
 import zipfile
+from glob import glob
 from pathlib import Path
 
 
@@ -16,6 +17,8 @@ def install_plugin_dependencies():
         None
     """
     plugins_dir = Path(os.getenv("PLUGINS_DIR", "plugins"))
+
+    # Install zip-based plugins
     for plugin in plugins_dir.glob("*.zip"):
         with zipfile.ZipFile(str(plugin), "r") as zfile:
             try:
@@ -30,6 +33,13 @@ def install_plugin_dependencies():
             except KeyError:
                 continue
 
+    # Install directory-based plugins
+    for requirements_file in glob(f"{plugins_dir}/*/requirements.txt"):
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install", "-r", requirements_file],
+            stdout=subprocess.DEVNULL,
+        )
+
 
 if __name__ == "__main__":
     install_plugin_dependencies()
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index be280df43d6..427de6721cb 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit be280df43d6a23b8074d9cba10d18ed8724a54c9
+Subproject commit 427de6721cb5209a7a34359a81b71d60e80a110a
diff --git a/tests/integration/challenges/__init__.py b/tests/challenges/__init__.py
similarity index 100%
rename from tests/integration/challenges/__init__.py
rename to tests/challenges/__init__.py
diff --git a/tests/integration/challenges/basic_abilities/__init__.py b/tests/challenges/basic_abilities/__init__.py
similarity index 100%
rename from tests/integration/challenges/basic_abilities/__init__.py
rename to tests/challenges/basic_abilities/__init__.py
diff --git a/tests/integration/challenges/basic_abilities/goal_oriented_tasks.md b/tests/challenges/basic_abilities/goal_oriented_tasks.md
similarity index 100%
rename from tests/integration/challenges/basic_abilities/goal_oriented_tasks.md
rename to tests/challenges/basic_abilities/goal_oriented_tasks.md
diff --git a/tests/integration/challenges/basic_abilities/test_browse_website.py b/tests/challenges/basic_abilities/test_browse_website.py
similarity index 55%
rename from tests/integration/challenges/basic_abilities/test_browse_website.py
rename to tests/challenges/basic_abilities/test_browse_website.py
index 09e5ab2200a..1c4eb27ea2d 100644
--- a/tests/integration/challenges/basic_abilities/test_browse_website.py
+++ b/tests/challenges/basic_abilities/test_browse_website.py
@@ -1,26 +1,24 @@
 import pytest
 
 from autogpt.agent import Agent
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 
 CYCLE_COUNT = 2
 
 
-@requires_api_key("OPENAI_API_KEY")
-@pytest.mark.vcr
-@challenge
+@challenge()
 def test_browse_website(
     browser_agent: Agent,
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     file_path = browser_agent.workspace.get_path("browse_website.txt")
-    run_interaction_loop(monkeypatch, browser_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
+    )
 
     # content = read_file(file_path, config)
     content = open(file_path, encoding="utf-8").read()
diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
new file mode 100644
index 00000000000..39a45ec63c6
--- /dev/null
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -0,0 +1,42 @@
+from typing import List
+
+import pytest
+
+from autogpt.agent import Agent
+from autogpt.commands.file_operations import read_file
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
+
+CYCLE_COUNT_PER_LEVEL = [1, 1]
+EXPECTED_OUTPUTS_PER_LEVEL = [
+    {"hello_world.txt": ["Hello World"]},
+    {"hello_world_1.txt": ["Hello World"], "hello_world_2.txt": ["Hello World"]},
+]
+
+
+@challenge()
+def test_write_file(
+    file_system_agents: List[Agent],
+    patched_api_requestor: None,
+    monkeypatch: pytest.MonkeyPatch,
+    level_to_run: int,
+    challenge_name: str,
+) -> None:
+    file_system_agent = file_system_agents[level_to_run - 1]
+    run_interaction_loop(
+        monkeypatch,
+        file_system_agent,
+        CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
+        challenge_name,
+        level_to_run,
+    )
+
+    expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
+
+    for file_name, expected_lines in expected_outputs.items():
+        file_path = get_workspace_path(file_system_agent, file_name)
+        content = read_file(file_path, file_system_agent)
+        for expected_line in expected_lines:
+            assert (
+                expected_line in content
+            ), f"Expected '{expected_line}' in file {file_name}, but it was not found"
diff --git a/tests/integration/challenges/challenge_decorator/__init__.py b/tests/challenges/challenge_decorator/__init__.py
similarity index 100%
rename from tests/integration/challenges/challenge_decorator/__init__.py
rename to tests/challenges/challenge_decorator/__init__.py
diff --git a/tests/integration/challenges/challenge_decorator/challenge.py b/tests/challenges/challenge_decorator/challenge.py
similarity index 84%
rename from tests/integration/challenges/challenge_decorator/challenge.py
rename to tests/challenges/challenge_decorator/challenge.py
index fd3b60cb6cb..e875ac9908d 100644
--- a/tests/integration/challenges/challenge_decorator/challenge.py
+++ b/tests/challenges/challenge_decorator/challenge.py
@@ -3,6 +3,7 @@
 
 class Challenge:
     BEAT_CHALLENGES = False
+    DEFAULT_CHALLENGE_NAME = "default_challenge_name"
 
     def __init__(
         self,
@@ -10,7 +11,7 @@ def __init__(
         category: str,
         max_level: int,
         is_new_challenge: bool,
-        max_level_beaten: Optional[int],
+        max_level_beaten: Optional[int] = None,
         level_to_run: Optional[int] = None,
     ) -> None:
         self.name = name
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
new file mode 100644
index 00000000000..52d796c09eb
--- /dev/null
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -0,0 +1,89 @@
+import os
+from functools import wraps
+from typing import Any, Callable, Optional
+
+import pytest
+from flaky import flaky  # type: ignore
+
+from tests.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge_utils import create_challenge
+from tests.challenges.challenge_decorator.score_utils import (
+    get_scores,
+    update_new_score,
+)
+from tests.utils import requires_api_key
+
+MAX_LEVEL_TO_IMPROVE_ON = (
+    1  # we will attempt to beat 1 level above the current level for now.
+)
+
+CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt in the 'challenges' channel to let us know the challenge you're struggling with."
+
+
+def challenge(
+    max_runs: int = 2, min_passes: int = 1, api_key: str = "OPENAI_API_KEY"
+) -> Callable[[Callable[..., Any]], Callable[..., None]]:
+    def decorator(func: Callable[..., Any]) -> Callable[..., None]:
+        @requires_api_key(api_key)
+        @pytest.mark.vcr
+        @flaky(max_runs=max_runs, min_passes=min_passes)
+        @wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> None:
+            run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
+            original_error: Optional[Exception] = None
+
+            while run_remaining > 0:
+                current_score, new_score, new_score_location = get_scores()
+                level_to_run = (
+                    kwargs["level_to_run"] if "level_to_run" in kwargs else None
+                )
+                challenge = create_challenge(
+                    func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
+                )
+                if challenge.level_to_run is not None:
+                    kwargs["level_to_run"] = challenge.level_to_run
+                    kwargs["challenge_name"] = challenge.name
+                    try:
+                        func(*args, **kwargs)
+                        challenge.succeeded = True
+                    except AssertionError as err:
+                        original_error = AssertionError(
+                            f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
+                        )
+                        challenge.succeeded = False
+                    except Exception as err:
+                        original_error = err
+                        challenge.succeeded = False
+                else:
+                    challenge.skipped = True
+                if os.environ.get("CI") == "true":
+                    new_max_level_beaten = get_new_max_level_beaten(
+                        challenge, Challenge.BEAT_CHALLENGES
+                    )
+                    update_new_score(
+                        new_score_location, new_score, challenge, new_max_level_beaten
+                    )
+                if challenge.level_to_run is None:
+                    pytest.skip("This test has not been unlocked yet.")
+
+                if not challenge.succeeded:
+                    if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
+                        pytest.xfail(str(original_error))
+                    if original_error:
+                        raise original_error
+                run_remaining -= 1
+
+        return wrapper
+
+    return decorator
+
+
+def get_new_max_level_beaten(
+    challenge: Challenge, beat_challenges: bool
+) -> Optional[int]:
+    if challenge.succeeded:
+        return challenge.level_to_run
+    if challenge.skipped:
+        return challenge.max_level_beaten
+    # Challenge failed
+    return challenge.max_level_beaten if beat_challenges else None
diff --git a/tests/integration/challenges/challenge_decorator/challenge_utils.py b/tests/challenges/challenge_decorator/challenge_utils.py
similarity index 96%
rename from tests/integration/challenges/challenge_decorator/challenge_utils.py
rename to tests/challenges/challenge_decorator/challenge_utils.py
index 7db7648fa4b..74f4cf5654f 100644
--- a/tests/integration/challenges/challenge_decorator/challenge_utils.py
+++ b/tests/challenges/challenge_decorator/challenge_utils.py
@@ -1,7 +1,7 @@
 import os
 from typing import Any, Callable, Dict, Optional, Tuple
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge import Challenge
 
 CHALLENGE_PREFIX = "test_"
 
diff --git a/tests/integration/challenges/challenge_decorator/score_utils.py b/tests/challenges/challenge_decorator/score_utils.py
similarity index 95%
rename from tests/integration/challenges/challenge_decorator/score_utils.py
rename to tests/challenges/challenge_decorator/score_utils.py
index 0a3b71a8cb6..1a8be74439d 100644
--- a/tests/integration/challenges/challenge_decorator/score_utils.py
+++ b/tests/challenges/challenge_decorator/score_utils.py
@@ -2,7 +2,7 @@
 import os
 from typing import Any, Dict, Optional, Tuple
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge import Challenge
 
 CURRENT_SCORE_LOCATION = "../current_score"
 NEW_SCORE_LOCATION = "../new_score"
diff --git a/tests/integration/challenges/conftest.py b/tests/challenges/conftest.py
similarity index 83%
rename from tests/integration/challenges/conftest.py
rename to tests/challenges/conftest.py
index 5514a1293fb..0c13af91acb 100644
--- a/tests/integration/challenges/conftest.py
+++ b/tests/challenges/conftest.py
@@ -5,9 +5,8 @@
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
-from tests.integration.conftest import BASE_VCR_CONFIG
-from tests.vcr.vcr_filter import before_record_response
+from tests.challenges.challenge_decorator.challenge import Challenge
+from tests.vcr import before_record_response
 
 
 def before_record_response_filter_errors(
@@ -21,9 +20,9 @@ def before_record_response_filter_errors(
 
 
 @pytest.fixture(scope="module")
-def vcr_config() -> Dict[str, Any]:
+def vcr_config(get_base_vcr_config: Dict[str, Any]) -> Dict[str, Any]:
     # this fixture is called by the pytest-recording vcr decorator.
-    return BASE_VCR_CONFIG | {
+    return get_base_vcr_config | {
         "before_record_response": before_record_response_filter_errors,
     }
 
@@ -52,6 +51,11 @@ def level_to_run(request: FixtureRequest) -> int:
     return request.config.option.level
 
 
+@pytest.fixture
+def challenge_name() -> str:
+    return Challenge.DEFAULT_CHALLENGE_NAME
+
+
 @pytest.fixture(autouse=True)
 def check_beat_challenges(request: FixtureRequest) -> None:
     Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
diff --git a/tests/integration/challenges/current_score.json b/tests/challenges/current_score.json
similarity index 80%
rename from tests/integration/challenges/current_score.json
rename to tests/challenges/current_score.json
index deb4a82d33e..4d747f03597 100644
--- a/tests/integration/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -5,20 +5,20 @@
             "max_level_beaten": 1
         },
         "write_file": {
-            "max_level": 1,
+            "max_level": 2,
             "max_level_beaten": 1
         }
     },
     "debug_code": {
         "debug_code_challenge_a": {
-            "max_level": 1,
+            "max_level": 2,
             "max_level_beaten": 1
         }
     },
     "information_retrieval": {
         "information_retrieval_challenge_a": {
             "max_level": 3,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         },
         "information_retrieval_challenge_b": {
             "max_level": 1,
@@ -42,7 +42,11 @@
         },
         "memory_challenge_c": {
             "max_level": 5,
-            "max_level_beaten": 1
+            "max_level_beaten": null
+        },
+        "memory_challenge_d": {
+            "max_level": 5,
+            "max_level_beaten": null
         }
     }
 }
diff --git a/tests/integration/challenges/debug_code/data/two_sum.py b/tests/challenges/debug_code/data/code.py
similarity index 59%
rename from tests/integration/challenges/debug_code/data/two_sum.py
rename to tests/challenges/debug_code/data/code.py
index 305cff4e41d..df8120bfa2e 100644
--- a/tests/integration/challenges/debug_code/data/two_sum.py
+++ b/tests/challenges/debug_code/data/code.py
@@ -2,18 +2,12 @@
 from typing import List, Optional
 
 
-def two_sum(nums: List, target: int) -> Optional[int]:
+def two_sum(nums: List, target: int) -> Optional[List[int]]:
     seen = {}
     for i, num in enumerate(nums):
+        typo
         complement = target - num
         if complement in seen:
             return [seen[complement], i]
         seen[num] = i
     return None
-
-
-# Example usage:
-nums = [2, 7, 11, 15]
-target = 9
-result = two_sum(nums, target)
-print(result)  # Output: [0, 1]
diff --git a/tests/challenges/debug_code/data/test.py b/tests/challenges/debug_code/data/test.py
new file mode 100644
index 00000000000..d85d1353758
--- /dev/null
+++ b/tests/challenges/debug_code/data/test.py
@@ -0,0 +1,31 @@
+# mypy: ignore-errors
+from code import two_sum
+from typing import List
+
+
+def test_two_sum(nums: List, target: int, expected_result: List[int]) -> None:
+    result = two_sum(nums, target)
+    print(result)
+    assert (
+        result == expected_result
+    ), f"AssertionError: Expected the output to be {expected_result}"
+
+
+if __name__ == "__main__":
+    # test the trivial case with the first two numbers
+    nums = [2, 7, 11, 15]
+    target = 9
+    expected_result = [0, 1]
+    test_two_sum(nums, target, expected_result)
+
+    # test for ability to use zero and the same number twice
+    nums = [2, 7, 0, 15, 12, 0]
+    target = 0
+    expected_result = [2, 5]
+    test_two_sum(nums, target, expected_result)
+
+    # test for first and last index usage and negative numbers
+    nums = [-6, 7, 11, 4]
+    target = -2
+    expected_result = [0, 3]
+    test_two_sum(nums, target, expected_result)
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
new file mode 100644
index 00000000000..305c9693726
--- /dev/null
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -0,0 +1,56 @@
+from pathlib import Path
+
+import pytest
+from pytest_mock import MockerFixture
+
+from autogpt.agent import Agent
+from autogpt.commands.execute_code import execute_python_file
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import (
+    copy_file_into_workspace,
+    get_workspace_path,
+    run_interaction_loop,
+)
+
+CYCLE_COUNT = 5
+EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
+DIRECTORY_PATH = Path(__file__).parent / "data"
+CODE_FILE_PATH = "code.py"
+TEST_FILE_PATH = "test.py"
+
+
+@challenge()
+def test_debug_code_challenge_a(
+    debug_code_agents: Agent,
+    monkeypatch: pytest.MonkeyPatch,
+    patched_api_requestor: MockerFixture,
+    level_to_run: int,
+    challenge_name: str,
+) -> None:
+    """
+    Test whether the agent can debug a simple code snippet.
+
+    :param debug_code_agent: The agent to test.
+    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
+    :patched_api_requestor: Sends api requests to our API CI pipeline
+    :level_to_run: The level to run.
+    """
+    debug_code_agent = debug_code_agents[level_to_run - 1]
+
+    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
+    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
+
+    run_interaction_loop(
+        monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
+    )
+
+    output = execute_python_file(
+        get_workspace_path(debug_code_agent, TEST_FILE_PATH), debug_code_agent
+    )
+
+    assert "error" not in output.lower(), f"Errors found in output: {output}!"
+
+    for expected_value in EXPECTED_VALUES:
+        assert (
+            expected_value in output
+        ), f"Expected output to contain {expected_value}, but it was not found in {output}!"
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
similarity index 60%
rename from tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
rename to tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 6b970e8b227..56d5d4ec39d 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -1,27 +1,24 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 
 CYCLE_COUNT = 3
 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
 from autogpt.agent import Agent
 
+OUTPUT_LOCATION = "output.txt"
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+
+@challenge()
 def test_information_retrieval_challenge_a(
     information_retrieval_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
-    patched_api_requestor: None,
-    config: Config,
+    patched_api_requestor: MockerFixture,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
@@ -30,10 +27,16 @@ def test_information_retrieval_challenge_a(
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     """
     information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
-    run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch,
+        information_retrieval_agent,
+        CYCLE_COUNT,
+        challenge_name,
+        level_to_run,
+    )
 
-    file_path = str(information_retrieval_agent.workspace.get_path("output.txt"))
-    content = read_file(file_path, config)
+    file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
+    content = read_file(file_path, information_retrieval_agent)
     expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
     for revenue in expected_revenues:
         assert (
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
similarity index 64%
rename from tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
rename to tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index feac95a0f64..580b8b0b37a 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -1,28 +1,24 @@
 import contextlib
 
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 
 CYCLE_COUNT = 3
+OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_information_retrieval_challenge_b(
     get_nobel_prize_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     level_to_run: int,
-    config: Config,
+    challenge_name: str,
 ) -> None:
     """
     Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -31,16 +27,19 @@ def test_information_retrieval_challenge_b(
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     :param patched_api_requestor: APIRequestor Patch to override the openai.api_requestor module for testing.
     :param level_to_run: The level to run.
-    :param config: The config object.
     """
 
     with contextlib.suppress(SystemExit):
-        run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT)
-
-    file_path = str(
-        get_nobel_prize_agent.workspace.get_path("2010_nobel_prize_winners.txt")
-    )
-    content = read_file(file_path, config)
+        run_interaction_loop(
+            monkeypatch,
+            get_nobel_prize_agent,
+            CYCLE_COUNT,
+            challenge_name,
+            level_to_run,
+        )
+    file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
+
+    content = read_file(file_path, get_nobel_prize_agent)
     assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
     assert (
         "Konstantin Novoselov" in content
diff --git a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
similarity index 60%
rename from tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
rename to tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index 5fd280ac4bb..98f239cb74f 100644
--- a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -1,26 +1,23 @@
 import pytest
 import yaml
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 
 CYCLE_COUNT = 3
+OUTPUT_LOCATION = "kube.yaml"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_kubernetes_template_challenge_a(
     kubernetes_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
+    patched_api_requestor: MockerFixture,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs
@@ -29,13 +26,14 @@ def test_kubernetes_template_challenge_a(
     Args:
         kubernetes_agent (Agent)
         monkeypatch (pytest.MonkeyPatch)
-        config (Config)
         level_to_run (int)
     """
-    run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
+    )
 
-    file_path = str(kubernetes_agent.workspace.get_path("kube.yaml"))
-    content = read_file(file_path, config)
+    file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
+    content = read_file(file_path, kubernetes_agent)
 
     for word in ["apiVersion", "kind", "metadata", "spec"]:
         assert word in content, f"Expected the file to contain {word}"
diff --git a/tests/integration/challenges/memory/__init__.py b/tests/challenges/memory/__init__.py
similarity index 100%
rename from tests/integration/challenges/memory/__init__.py
rename to tests/challenges/memory/__init__.py
diff --git a/tests/integration/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
similarity index 68%
rename from tests/integration/challenges/memory/test_memory_challenge_a.py
rename to tests/challenges/memory/test_memory_challenge_a.py
index 8919bf58c0e..41453b250f8 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -1,24 +1,21 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 
+OUTPUT_LOCATION = "output.txt"
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+
+@challenge()
 def test_memory_challenge_a(
     memory_management_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     The agent reads a file containing a task_id. Then, it reads a series of other files.
@@ -27,17 +24,21 @@ def test_memory_challenge_a(
         memory_management_agent (Agent)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
-        config (Config)
         level_to_run (int)
     """
-
     task_id = "2314"
-    create_instructions_files(memory_management_agent, level_to_run, task_id, config)
+    create_instructions_files(memory_management_agent, level_to_run, task_id)
 
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
 
-    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
-    content = read_file(file_path, config)
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    content = read_file(file_path, memory_management_agent)
     assert task_id in content, f"Expected the file to contain {task_id}"
 
 
@@ -45,7 +46,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     num_files: int,
     task_id: str,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -59,8 +59,8 @@ def create_instructions_files(
     for i in range(1, num_files + 1):
         content = generate_content(i, task_id, base_filename, num_files)
         file_name = f"{base_filename}{i}.txt"
-        file_path = str(memory_management_agent.workspace.get_path(file_name))
-        write_to_file(file_path, content, config)
+        file_path = get_workspace_path(memory_management_agent, file_name)
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(
diff --git a/tests/integration/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
similarity index 72%
rename from tests/integration/challenges/memory/test_memory_challenge_b.py
rename to tests/challenges/memory/test_memory_challenge_b.py
index 5c28b330a3b..b381df1b475 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -1,26 +1,26 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import (
+    generate_noise,
+    get_workspace_path,
+    run_interaction_loop,
 )
-from tests.integration.challenges.utils import generate_noise, run_interaction_loop
-from tests.utils import requires_api_key
 
 NOISE = 1000
+OUTPUT_LOCATION = "output.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_memory_challenge_b(
     memory_management_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
@@ -33,12 +33,18 @@ def test_memory_challenge_b(
         level_to_run (int)
     """
     task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
-    create_instructions_files(memory_management_agent, level_to_run, task_ids, config)
+    create_instructions_files(memory_management_agent, level_to_run, task_ids)
 
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
 
-    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
-    content = read_file(file_path, config)
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    content = read_file(file_path, memory_management_agent)
     for task_id in task_ids:
         assert task_id in content, f"Expected the file to contain {task_id}"
 
@@ -47,7 +53,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     level: int,
     task_ids: list,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -62,8 +67,9 @@ def create_instructions_files(
     for i in range(1, level + 1):
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = str(memory_management_agent.workspace.get_path(file_name))
-        write_to_file(file_path, content, config)
+        file_path = get_workspace_path(memory_management_agent, file_name)
+
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
diff --git a/tests/integration/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
similarity index 73%
rename from tests/integration/challenges/memory/test_memory_challenge_c.py
rename to tests/challenges/memory/test_memory_challenge_c.py
index 23c0217d613..3cfeb2c014e 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -1,27 +1,26 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import (
+    generate_noise,
+    get_workspace_path,
+    run_interaction_loop,
 )
-from tests.integration.challenges.utils import generate_noise, run_interaction_loop
-from tests.utils import requires_api_key
 
-NOISE = 1000
+NOISE = 1200
+OUTPUT_LOCATION = "output.txt"
 
 
-# @pytest.mark.vcr
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_memory_challenge_c(
     memory_management_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@@ -32,31 +31,37 @@ def test_memory_challenge_c(
         memory_management_agent (Agent)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
-        config (Config)
         level_to_run (int)
     """
     silly_phrases = [
-        "The purple elephant danced on a rainbow while eating a taco.",
-        "The sneaky toaster stole my socks and ran away to Hawaii.",
-        "My pet rock sings better than Beyoncé on Tuesdays.",
-        "The giant hamster rode a unicycle through the crowded mall.",
-        "The talking tree gave me a high-five and then flew away.",
-        "I have a collection of invisible hats that I wear on special occasions.",
-        "The flying spaghetti monster stole my sandwich and left a note saying 'thanks for the snack!'",
-        "My imaginary friend is a dragon who loves to play video games.",
-        "I once saw a cloud shaped like a giant chicken eating a pizza.",
-        "The ninja unicorn disguised itself as a potted plant and infiltrated the office.",
+        "The purple elephant danced on a rainbow while eating a taco",
+        "The sneaky toaster stole my socks and ran away to Hawaii",
+        "My pet rock sings better than Beyoncé on Tuesdays",
+        "The giant hamster rode a unicycle through the crowded mall",
+        "The talking tree gave me a high-five and then flew away",
+        "I have a collection of invisible hats that I wear on special occasions",
+        "The flying spaghetti monster stole my sandwich and left a note saying 'thanks for the snack'",
+        "My imaginary friend is a dragon who loves to play video games",
+        "I once saw a cloud shaped like a giant chicken eating a pizza",
+        "The ninja unicorn disguised itself as a potted plant and infiltrated the office",
     ]
 
     level_silly_phrases = silly_phrases[:level_to_run]
     create_instructions_files(
-        memory_management_agent, level_to_run, level_silly_phrases, config=config
+        memory_management_agent,
+        level_to_run,
+        level_silly_phrases,
     )
 
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
-
-    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
-    content = read_file(file_path, config)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+    content = read_file(file_path, agent=memory_management_agent)
     for phrase in level_silly_phrases:
         assert phrase in content, f"Expected the file to contain {phrase}"
 
@@ -65,7 +70,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     level: int,
     task_ids: list,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -80,8 +84,8 @@ def create_instructions_files(
     for i in range(1, level + 1):
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = str(memory_management_agent.workspace.get_path(file_name))
-        write_to_file(file_path, content, config)
+        file_path = get_workspace_path(memory_management_agent, file_name)
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
new file mode 100644
index 00000000000..ae44831b3bb
--- /dev/null
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -0,0 +1,241 @@
+import json
+from typing import Dict
+
+import pytest
+from pytest_mock import MockerFixture
+
+from autogpt.agent import Agent
+from autogpt.commands.file_operations import read_file, write_to_file
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
+
+LEVEL_CURRENTLY_BEATEN = 1
+MAX_LEVEL = 5
+OUTPUT_LOCATION = "output.txt"
+
+
+@challenge()
+def test_memory_challenge_d(
+    memory_management_agent: Agent,
+    patched_api_requestor: MockerFixture,
+    monkeypatch: pytest.MonkeyPatch,
+    level_to_run: int,
+    challenge_name: str,
+) -> None:
+    """
+    The agent is given a series of events and must remember the respective beliefs of the characters.
+    Args:
+        memory_management_agent (Agent)
+        user_selected_level (int)
+    """
+    sally_anne_test_phrases = [
+        "Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).",
+        "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speaks to Sally about the marble A as instructed by Anne.",
+        "Sally gives a new marble (marble C) to Charlie who is outside with her. Charlie enters the room and exchanges marble C with marble B in Anne's basket (basket A). Anne tells Charlie to tell Sally that he put marble C into the red box. Charlie leaves the room and speak to Sally about marble C as instructed by Anne. Meanwhile, after Charlie leaves the room, Bob enters into the room and moves marble A from the green box to under the sofa, but tells Anne to tell Sally that marble A is in the green box. Anne leaves the room and speak to Sally about the marble A as instructed by Bob",
+        "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.",
+        "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
+    ]
+    level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
+    create_instructions_files(
+        memory_management_agent, level_to_run, level_sally_anne_test_phrases
+    )
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+
+    content = read_file(file_path, memory_management_agent)
+    check_beliefs(content, level_to_run)
+
+
+def check_beliefs(content: str, level: int) -> None:
+    # Define the expected beliefs for each level
+    expected_beliefs = {
+        1: {
+            "Sally": {
+                "marble A": "basket S",
+            },
+            "Anne": {
+                "marble A": "basket A",
+            },
+        },
+        2: {
+            "Sally": {
+                "marble A": "sofa",  # Because Charlie told her
+                "marble B": "lost",  # Because Bob told her
+            },
+            "Anne": {
+                "marble A": "green box",  # Because she moved it there
+                "marble B": "basket A",  # Because Bob put it there and she was in the room
+            },
+            "Bob": {
+                "marble B": "basket A",  # Last place he put it
+            },
+            "Charlie": {
+                "marble A": "sofa",  # Because Anne told him to tell Sally so
+            },
+        },
+        3: {
+            "Sally": {
+                "marble A": "green box",  # Because Anne told her
+                "marble C": "red box",  # Because Charlie told her
+            },
+            "Anne": {
+                "marble A": "sofa",  # Because Bob moved it there and told her
+                "marble B": "basket A",  # Because Charlie exchanged marble C with marble B in her basket
+                "marble C": "basket A",  # Because Charlie exchanged marble C with marble B in her basket
+            },
+            "Bob": {
+                "marble A": "sofa",  # Because he moved it there
+                "marble B": "basket A",
+                # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room
+                "marble C": "basket A",
+                # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room
+            },
+            "Charlie": {
+                "marble A": "sofa",  # Last place he knew it was
+                "marble B": "basket A",  # Because he exchanged marble C with marble B in Anne's basket
+                "marble C": "red box",  # Because Anne told him to tell Sally so
+            },
+        },
+        4: {
+            "Sally": {
+                "marble A": "green box",  # Because Anne told her in the last conversation
+                "marble C": "red box",  # Because Charlie told her
+                "marble D": "sofa",  # Because Charlie told her
+            },
+            "Anne": {
+                "marble A": "blue box",  # Because Bob moved it there, and she was not in the room to see
+                "marble B": "basket A",  # Last place she knew it was
+                "marble C": "basket A",  # Last place she knew it was
+                "marble D": "sofa",  # Because Bob moved it there, and she was in the room to see
+            },
+            "Bob": {
+                "marble A": "blue box",  # Because he moved it there
+                "marble B": "basket A",  # Last place he knew it was
+                "marble C": "basket A",  # Last place he knew it was
+                "marble D": "sofa",  # Because he moved it there
+            },
+            "Charlie": {
+                "marble A": "sofa",  # Last place he knew it was
+                "marble B": "basket A",  # Last place he knew it was
+                "marble C": "red box",  # Last place he knew it was
+                "marble D": "sofa",  # Because Bob told him to tell Sally so
+            },
+        },
+        5: {
+            "Sally": {
+                "marble A": "green box",  # Because Anne told her in the last level
+                "marble C": "red box",  # Because Charlie told her
+                "marble D": "sofa",  # Because Charlie told her
+                "marble E": "green box",  # Because Anne told her
+            },
+            "Anne": {
+                "marble A": "blue box",  # Last place she knew it was
+                "marble B": "basket A",  # Last place she knew it was
+                "marble C": "basket A",  # Last place she knew it was
+                "marble D": "basket C",  # Last place she knew it was
+                "marble E": "sofa",  # Because she moved it there
+            },
+            "Charlie": {
+                "marble A": "blue box",  # Last place he knew it was
+                "marble B": "basket A",  # Last place he knew it was
+                "marble C": "basket A",  # Last place he knew it was
+                "marble D": "basket C",  # Because he moved it there
+                "marble E": "red box",  # Last place he knew it was
+            },
+            "Bob": {
+                "marble A": "blue box",  # Last place he knew it was
+                "marble C": "red box",  # Last place he knew it was
+                "marble D": "sofa",  # Last place he knew it was
+            },
+        },
+    }
+
+    # Extract the beliefs from the AI's response
+    ai_beliefs = extract_beliefs(content)
+    # Check the AI's beliefs against the expected beliefs
+    for character, belief in expected_beliefs[level].items():
+        for marble, location in belief.items():
+            ai_belief = ai_beliefs.get(character, {}).get(marble, "")
+            assert (
+                location in ai_belief
+            ), f"For {character}'s {marble}, expected '{location}' to be in '{ai_belief}'"
+
+
+def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
+    """Extract the beliefs of each character from the AI's output."""
+    # Parse the JSON content
+    content_dict = json.loads(content)
+    beliefs = content_dict.get("beliefs", {})
+    return beliefs
+
+
+def create_instructions_files(
+    memory_management_agent: Agent,
+    level: int,
+    test_phrases: list,
+    base_filename: str = "instructions_",
+) -> None:
+    """
+    Creates a series of instructions files for the memory challenge.
+    Args:
+        level:
+        memory_management_agent (Agent)
+        test_phrases (list)
+        base_filename (str, optional)
+    """
+    for i in range(1, level + 1):
+        content = generate_content(i, test_phrases, base_filename, level)
+        file_name = f"{base_filename}{i}.txt"
+        file_path = get_workspace_path(memory_management_agent, file_name)
+        write_to_file(file_path, content, memory_management_agent)
+
+
+def generate_content(
+    index: int, test_phrases: list, base_filename: str, level: int
+) -> str:
+    """
+    Args:
+        index: int
+        test_phrases: list
+        base_filename: str
+        level: int
+    Returns: str
+    """
+    phrase = test_phrases[index - 1]
+
+    if index != level:
+        return f"The current event is '{phrase}'.\nRead the file {base_filename}{index + 1}.txt using the read_file command."
+    else:
+        return f"""
+        This event is '{phrase}'
+        rules:
+        1. Everyone inside the room see and know what are the actions of the people inside the room.
+        2. Everyone outside the room do not see and do not know the actions of the people inside the room.
+        3. Only write about the marbles that are present in the level
+        4. Marbles names are marble A, marble B, marble C, marble D, ...
+        5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ...
+        6. Do not use expression like <Anne's basket> use <basket A> instead.
+        7. All characters believe what they're being told by other characters.
+        Instructions:
+        I) Write the following information in the file output.txt in JSON format:
+        1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is)
+        The format should be as follows:
+        {{
+            "beliefs": {{
+                "<character_name>": {{
+                    "<marble_name>": "<believed_location_of_the_specific_marble>",
+                    ...
+                }},
+                ...
+            }},
+        }}
+        Example of output (only use this to understand and learn how to use the format. Nothing else):
+        {{"beliefs": {{"Sally": {{"marble A": "basket A"}}, "Bob": {{"marble B": "basket S"}}, "Anne": {{"marble A": "green box"}}, "Charlie": {{"marble B": "sofa"}}}}
+        II) The file output.txt has not been created yet. You need to create it. After that, use the task_complete command.
+        """
diff --git a/tests/integration/challenges/test_challenge_should_be_formatted_properly.py b/tests/challenges/test_challenge_should_be_formatted_properly.py
similarity index 100%
rename from tests/integration/challenges/test_challenge_should_be_formatted_properly.py
rename to tests/challenges/test_challenge_should_be_formatted_properly.py
diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py
new file mode 100644
index 00000000000..0b5f54c57a1
--- /dev/null
+++ b/tests/challenges/utils.py
@@ -0,0 +1,76 @@
+import contextlib
+import random
+import shutil
+from pathlib import Path
+from typing import Any, Generator
+
+import pytest
+
+from autogpt.agent import Agent
+from autogpt.log_cycle.log_cycle import LogCycleHandler
+
+
+def generate_noise(noise_size: int) -> str:
+    random.seed(42)
+    return "".join(
+        random.choices(
+            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
+            k=noise_size,
+        )
+    )
+
+
+def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
+    """
+    Sets up the mock input for testing.
+
+    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
+    :param cycle_count: The number of cycles to mock.
+    """
+    input_sequence = ["y"] * (cycle_count) + ["EXIT"]
+
+    def input_generator() -> Generator[str, None, None]:
+        """
+        Creates a generator that yields input strings from the given sequence.
+        """
+        yield from input_sequence
+
+    gen = input_generator()
+    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
+
+
+def run_interaction_loop(
+    monkeypatch: pytest.MonkeyPatch,
+    agent: Agent,
+    cycle_count: int,
+    challenge_name: str,
+    level_to_run: int,
+) -> None:
+    setup_mock_input(monkeypatch, cycle_count)
+
+    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
+    with contextlib.suppress(SystemExit):
+        agent.start_interaction_loop()
+
+
+def setup_mock_log_cycle_agent_name(
+    monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
+) -> None:
+    def mock_get_agent_short_name(*args: Any, **kwargs: Any) -> str:
+        return f"{challenge_name}_level_{level_to_run}"
+
+    monkeypatch.setattr(
+        LogCycleHandler, "get_agent_short_name", mock_get_agent_short_name
+    )
+
+
+def get_workspace_path(agent: Agent, file_name: str) -> str:
+    return str(agent.workspace.get_path(file_name))
+
+
+def copy_file_into_workspace(
+    agent: Agent, directory_path: Path, file_path: str
+) -> None:
+    workspace_code_file_path = get_workspace_path(agent, file_path)
+    code_file_path = directory_path / file_path
+    shutil.copy(code_file_path, workspace_code_file_path)
diff --git a/tests/integration/challenges/utils/build_current_score.py b/tests/challenges/utils/build_current_score.py
similarity index 80%
rename from tests/integration/challenges/utils/build_current_score.py
rename to tests/challenges/utils/build_current_score.py
index 743b1328424..b8e752424dc 100644
--- a/tests/integration/challenges/utils/build_current_score.py
+++ b/tests/challenges/utils/build_current_score.py
@@ -26,12 +26,8 @@ def recursive_sort_dict(data: dict) -> dict:
 
 
 cwd = os.getcwd()  # get current working directory
-new_score_filename_pattern = os.path.join(
-    cwd, "tests/integration/challenges/new_score_*.json"
-)
-current_score_filename = os.path.join(
-    cwd, "tests/integration/challenges/current_score.json"
-)
+new_score_filename_pattern = os.path.join(cwd, "tests/challenges/new_score_*.json")
+current_score_filename = os.path.join(cwd, "tests/challenges/current_score.json")
 
 merged_data: Dict[str, Any] = {}
 for filename in glob.glob(new_score_filename_pattern):
@@ -44,4 +40,5 @@ def recursive_sort_dict(data: dict) -> dict:
 sorted_data = recursive_sort_dict(merged_data)
 
 with open(current_score_filename, "w") as f_current:
-    json.dump(sorted_data, f_current, indent=4)
+    json_data = json.dumps(sorted_data, indent=4)
+    f_current.write(json_data + "\n")
diff --git a/tests/conftest.py b/tests/conftest.py
index 98bebc9e34c..2342a3b04ec 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,22 +1,26 @@
 import os
 from pathlib import Path
+from tempfile import TemporaryDirectory
 
 import pytest
+import yaml
 from pytest_mock import MockerFixture
 
+from autogpt.agent.agent import Agent
+from autogpt.commands.command import CommandRegistry
+from autogpt.config.ai_config import AIConfig
 from autogpt.config.config import Config
 from autogpt.llm.api_manager import ApiManager
+from autogpt.logs import TypingConsoleHandler
+from autogpt.memory.vector import get_memory
+from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
 
-pytest_plugins = ["tests.integration.agent_factory", "tests.integration.memory.utils"]
-
-PROXY = os.environ.get("PROXY")
-
-
-@pytest.fixture()
-def vcr_cassette_dir(request):
-    test_name = os.path.splitext(request.node.name)[0]
-    return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
+pytest_plugins = [
+    "tests.integration.agent_factory",
+    "tests.integration.memory.utils",
+    "tests.vcr",
+]
 
 
 @pytest.fixture()
@@ -30,9 +34,25 @@ def workspace(workspace_root: Path) -> Workspace:
     return Workspace(workspace_root, restrict_to_workspace=True)
 
 
+@pytest.fixture
+def temp_plugins_config_file():
+    """Create a plugins_config.yaml file in a temp directory so that it doesn't mess with existing ones"""
+    config_directory = TemporaryDirectory()
+    config_file = os.path.join(config_directory.name, "plugins_config.yaml")
+    with open(config_file, "w+") as f:
+        f.write(yaml.dump({}))
+
+    yield config_file
+
+
 @pytest.fixture()
-def config(mocker: MockerFixture, workspace: Workspace) -> Config:
+def config(
+    temp_plugins_config_file: str, mocker: MockerFixture, workspace: Workspace
+) -> Config:
     config = Config()
+    config.plugins_dir = "tests/unit/data/test_plugins"
+    config.plugins_config_file = temp_plugins_config_file
+    config.load_plugins_config()
 
     # Do a little setup and teardown since the config object is a singleton
     mocker.patch.multiple(
@@ -48,3 +68,44 @@ def api_manager() -> ApiManager:
     if ApiManager in ApiManager._instances:
         del ApiManager._instances[ApiManager]
     return ApiManager()
+
+
+@pytest.fixture(autouse=True)
+def patch_emit(monkeypatch):
+    # convert plain_output to a boolean
+
+    if bool(os.environ.get("PLAIN_OUTPUT")):
+
+        def quick_emit(self, record: str):
+            print(self.format(record))
+
+        monkeypatch.setattr(TypingConsoleHandler, "emit", quick_emit)
+
+
+@pytest.fixture
+def agent(config: Config, workspace: Workspace) -> Agent:
+    ai_config = AIConfig(
+        ai_name="Base",
+        ai_role="A base AI",
+        ai_goals=[],
+    )
+
+    command_registry = CommandRegistry()
+    ai_config.command_registry = command_registry
+
+    config.set_memory_backend("json_file")
+    memory_json_file = get_memory(config, init=True)
+
+    system_prompt = ai_config.construct_full_prompt()
+
+    return Agent(
+        ai_name=ai_config.ai_name,
+        memory=memory_json_file,
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        next_action_count=0,
+        system_prompt=system_prompt,
+        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        workspace_directory=workspace.root,
+    )
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 30d9cc13b2a..fff3867ed3b 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -59,7 +59,8 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
         ai_name="",
         memory=memory_none,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -70,49 +71,45 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
 
 
 @pytest.fixture
-def writer_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
-
-    ai_config = AIConfig(
-        ai_name="write_to_file-GPT",
-        ai_role="an AI designed to use the write_to_file command to write 'Hello World' into a file named \"hello_world.txt\" and then use the task_complete command to complete the task.",
-        ai_goals=[
-            "Use the write_to_file command to write 'Hello World' into a file named \"hello_world.txt\".",
-            "Use the task_complete command to complete the task.",
-            "Do not use any other commands.",
-        ],
-    )
-    ai_config.command_registry = command_registry
-
-    triggering_prompt = (
-        "Determine which next command to use, and respond using the"
-        " format specified above:"
-    )
-    system_prompt = ai_config.construct_full_prompt()
+def file_system_agents(
+    agent_test_config, memory_json_file: NoMemory, workspace: Workspace
+):
+    agents = []
+    command_registry = get_command_registry(agent_test_config)
 
-    agent = Agent(
-        ai_name="",
-        memory=memory_none,
-        command_registry=command_registry,
-        config=ai_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=triggering_prompt,
-        workspace_directory=workspace.root,
-    )
+    ai_goals = [
+        "Write 'Hello World' into a file named \"hello_world.txt\".",
+        'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
+    ]
 
-    return agent
+    for ai_goal in ai_goals:
+        ai_config = AIConfig(
+            ai_name="File System Agent",
+            ai_role="an AI designed to manage a file system.",
+            ai_goals=[ai_goal],
+        )
+        ai_config.command_registry = command_registry
+        system_prompt = ai_config.construct_full_prompt()
+        Config().set_continuous_mode(False)
+        agents.append(
+            Agent(
+                ai_name="File System Agent",
+                memory=memory_json_file,
+                command_registry=command_registry,
+                ai_config=ai_config,
+                config=agent_test_config,
+                next_action_count=0,
+                system_prompt=system_prompt,
+                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+                workspace_directory=workspace.root,
+            )
+        )
+    return agents
 
 
 @pytest.fixture
 def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
+    command_registry = get_command_registry(agent_test_config)
 
     ai_config = AIConfig(
         ai_name="Follow-Instructions-GPT",
@@ -127,10 +124,11 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
     system_prompt = ai_config.construct_full_prompt()
 
     agent = Agent(
-        ai_name="",
+        ai_name="Follow-Instructions-GPT",
         memory=memory_json_file,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -145,19 +143,12 @@ def information_retrieval_agents(
     agent_test_config, memory_json_file, workspace: Workspace
 ):
     agents = []
-    command_registry = CommandRegistry()
-    enabled_command_categories = [
-        x
-        for x in COMMAND_CATEGORIES
-        if x not in agent_test_config.disabled_command_categories
-    ]
+    command_registry = get_command_registry(agent_test_config)
 
-    for command_category in enabled_command_categories:
-        command_registry.import_commands(command_category)
     ai_goals = [
-        "Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
-        "Write to a file called output.txt tesla's revenue in 2022.",
-        "Write to a file called output.txt tesla's revenue every year since its creation.",
+        "Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
+        "Write to a file called output.txt containing tesla's revenue in 2022.",
+        "Write to a file called output.txt containing tesla's revenue every year since its creation.",
     ]
     for ai_goal in ai_goals:
         ai_config = AIConfig(
@@ -173,7 +164,8 @@ def information_retrieval_agents(
                 ai_name="Information Retrieval Agent",
                 memory=memory_json_file,
                 command_registry=command_registry,
-                config=ai_config,
+                ai_config=ai_config,
+                config=agent_test_config,
                 next_action_count=0,
                 system_prompt=system_prompt,
                 triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -184,7 +176,9 @@ def information_retrieval_agents(
 
 
 @pytest.fixture
-def kubernetes_agent(memory_json_file, workspace: Workspace):
+def kubernetes_agent(
+    agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
+) -> Agent:
     command_registry = CommandRegistry()
     command_registry.import_commands("autogpt.commands.file_operations")
     command_registry.import_commands("autogpt.app")
@@ -205,7 +199,8 @@ def kubernetes_agent(memory_json_file, workspace: Workspace):
         ai_name="Kubernetes-Demo",
         memory=memory_json_file,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -238,7 +233,8 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
         ai_name="Get-PhysicsNobelPrize",
         memory=memory_json_file,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -249,38 +245,57 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
 
 
 @pytest.fixture
-def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.commands.execute_code")
-    command_registry.import_commands("autogpt.commands.improve_code")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
-
-    ai_config = AIConfig(
-        ai_name="Debug Code Agent",
-        ai_role="an autonomous agent that specializes in debugging python code",
-        ai_goals=[
-            "1-Run the code in the file named 'code.py' using the execute_code command.",
-            "2-Read code.py to understand why the code is not working as expected.",
-            "3-Modify code.py to fix the error.",
-            "Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.",
-            "Do not use any other commands than execute_python_file and write_file",
+def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
+    agents = []
+    goals = [
+        [
+            "1- Run test.py using the execute_python_file command.",
+            "2- Read code.py using the read_file command.",
+            "3- Modify code.py using the write_to_file command."
+            "Repeat step 1, 2 and 3 until test.py runs without errors.",
         ],
-    )
-    ai_config.command_registry = command_registry
+        [
+            "1- Run test.py.",
+            "2- Read code.py.",
+            "3- Modify code.py."
+            "Repeat step 1, 2 and 3 until test.py runs without errors.",
+        ],
+        ["1- Make test.py run without errors."],
+    ]
 
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
-    agent = Agent(
-        ai_name="Debug Code Agent",
-        memory=memory_json_file,
-        command_registry=command_registry,
-        config=ai_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-        workspace_directory=workspace.root,
-    )
+    for goal in goals:
+        ai_config = AIConfig(
+            ai_name="Debug Code Agent",
+            ai_role="an autonomous agent that specializes in debugging python code",
+            ai_goals=goal,
+        )
+        command_registry = get_command_registry(agent_test_config)
+        ai_config.command_registry = command_registry
+        system_prompt = ai_config.construct_full_prompt()
+        Config().set_continuous_mode(False)
+        agents.append(
+            Agent(
+                ai_name="Debug Code Agent",
+                memory=memory_json_file,
+                command_registry=command_registry,
+                ai_config=ai_config,
+                config=agent_test_config,
+                next_action_count=0,
+                system_prompt=system_prompt,
+                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+                workspace_directory=workspace.root,
+            )
+        )
+    return agents
 
-    return agent
+
+def get_command_registry(agent_test_config):
+    command_registry = CommandRegistry()
+    enabled_command_categories = [
+        x
+        for x in COMMAND_CATEGORIES
+        if x not in agent_test_config.disabled_command_categories
+    ]
+    for command_category in enabled_command_categories:
+        command_registry.import_commands(command_category)
+    return command_registry
diff --git a/tests/integration/challenges/basic_abilities/test_write_file.py b/tests/integration/challenges/basic_abilities/test_write_file.py
deleted file mode 100644
index cbbad514b6b..00000000000
--- a/tests/integration/challenges/basic_abilities/test_write_file.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import pytest
-
-from autogpt.agent import Agent
-from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
-
-CYCLE_COUNT = 3
-
-
-@requires_api_key("OPENAI_API_KEY")
-@pytest.mark.vcr
-@challenge
-def test_write_file(
-    writer_agent: Agent,
-    patched_api_requestor: None,
-    monkeypatch: pytest.MonkeyPatch,
-    config: Config,
-    level_to_run: int,
-) -> None:
-    file_path = str(writer_agent.workspace.get_path("hello_world.txt"))
-    run_interaction_loop(monkeypatch, writer_agent, CYCLE_COUNT)
-
-    content = read_file(file_path, config)
-    assert content == "Hello World", f"Expected 'Hello World', got {content}"
diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/integration/challenges/challenge_decorator/challenge_decorator.py
deleted file mode 100644
index fe12317eed8..00000000000
--- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import os
-from functools import wraps
-from typing import Any, Callable, Optional
-
-import pytest
-
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
-from tests.integration.challenges.challenge_decorator.challenge_utils import (
-    create_challenge,
-)
-from tests.integration.challenges.challenge_decorator.score_utils import (
-    get_scores,
-    update_new_score,
-)
-
-MAX_LEVEL_TO_IMPROVE_ON = (
-    1  # we will attempt to beat 1 level above the current level for now.
-)
-
-
-def challenge(func: Callable[..., Any]) -> Callable[..., None]:
-    @wraps(func)
-    def wrapper(*args: Any, **kwargs: Any) -> None:
-        run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
-        original_error = None
-
-        while run_remaining > 0:
-            current_score, new_score, new_score_location = get_scores()
-            level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None
-            challenge = create_challenge(
-                func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
-            )
-            if challenge.level_to_run is not None:
-                kwargs["level_to_run"] = challenge.level_to_run
-                try:
-                    func(*args, **kwargs)
-                    challenge.succeeded = True
-                except AssertionError as err:
-                    original_error = err
-                    challenge.succeeded = False
-            else:
-                challenge.skipped = True
-            if os.environ.get("CI") == "true":
-                new_max_level_beaten = get_new_max_level_beaten(
-                    challenge, Challenge.BEAT_CHALLENGES
-                )
-                update_new_score(
-                    new_score_location, new_score, challenge, new_max_level_beaten
-                )
-            if challenge.level_to_run is None:
-                pytest.skip("This test has not been unlocked yet.")
-
-            if not challenge.succeeded:
-                if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
-                    # xfail
-                    pytest.xfail("Challenge failed")
-                if original_error:
-                    raise original_error
-                raise AssertionError("Challenge failed")
-            run_remaining -= 1
-
-    return wrapper
-
-
-def get_new_max_level_beaten(
-    challenge: Challenge, beat_challenges: bool
-) -> Optional[int]:
-    if challenge.succeeded:
-        return challenge.level_to_run
-    if challenge.skipped:
-        return challenge.max_level_beaten
-    # Challenge failed
-    return challenge.max_level_beaten if beat_challenges else None
diff --git a/tests/integration/challenges/debug_code/data/two_sum_tests.py b/tests/integration/challenges/debug_code/data/two_sum_tests.py
deleted file mode 100644
index 0eb89bcbfc9..00000000000
--- a/tests/integration/challenges/debug_code/data/two_sum_tests.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# mypy: ignore-errors
-# we need a new line at the top of the file to avoid a syntax error
-
-
-def test_two_sum(nums, target, expected_result):
-    # These tests are appended to the two_sum file so we can ignore this error for now
-    result = two_sum(nums, target)
-    print(result)
-    assert (
-        result == expected_result
-    ), f"AssertionError: Expected the output to be {expected_result}"
-
-
-# test the trivial case with the first two numbers
-nums = [2, 7, 11, 15]
-target = 9
-expected_result = [0, 1]
-test_two_sum(nums, target, expected_result)
-
-# test for ability to use zero and the same number twice
-nums = [2, 7, 0, 15, 12, 0]
-target = 0
-expected_result = [2, 5]
-test_two_sum(nums, target, expected_result)
-
-# test for first and last index usage and negative numbers
-nums = [-6, 7, 11, 4]
-target = -2
-expected_result = [0, 3]
-test_two_sum(nums, target, expected_result)
diff --git a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py b/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py
deleted file mode 100644
index 008e562ce30..00000000000
--- a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from pathlib import Path
-
-import pytest
-from pytest_mock import MockerFixture
-
-from autogpt.agent import Agent
-from autogpt.commands.execute_code import execute_python_file
-from autogpt.commands.file_operations import append_to_file, write_to_file
-from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
-
-CYCLE_COUNT = 5
-
-
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
-def test_debug_code_challenge_a(
-    debug_code_agent: Agent,
-    monkeypatch: pytest.MonkeyPatch,
-    patched_api_requestor: MockerFixture,
-    config: Config,
-    level_to_run: int,
-) -> None:
-    """
-    Test whether the agent can debug a simple code snippet.
-
-    :param debug_code_agent: The agent to test.
-    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
-    :patched_api_requestor: Sends api requests to our API CI pipeline
-    :config: The config object for the agent.
-    :level_to_run: The level to run.
-    """
-
-    file_path = str(debug_code_agent.workspace.get_path("code.py"))
-
-    code_file_path = Path(__file__).parent / "data" / "two_sum.py"
-    test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py"
-
-    write_to_file(file_path, code_file_path.read_text(), config)
-
-    run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)
-
-    append_to_file(file_path, test_file_path.read_text(), config)
-
-    output = execute_python_file(file_path, config)
-    assert "error" not in output.lower(), f"Errors found in output: {output}!"
diff --git a/tests/integration/challenges/utils.py b/tests/integration/challenges/utils.py
deleted file mode 100644
index 3ffd136b66d..00000000000
--- a/tests/integration/challenges/utils.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import contextlib
-import random
-from typing import Generator
-
-import pytest
-
-from autogpt.agent import Agent
-
-
-def generate_noise(noise_size: int) -> str:
-    random.seed(42)
-    return "".join(
-        random.choices(
-            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
-            k=noise_size,
-        )
-    )
-
-
-def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
-    """
-    Sets up the mock input for testing.
-
-    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
-    :param cycle_count: The number of cycles to mock.
-    """
-    input_sequence = ["y"] * (cycle_count) + ["EXIT"]
-
-    def input_generator() -> Generator[str, None, None]:
-        """
-        Creates a generator that yields input strings from the given sequence.
-        """
-        yield from input_sequence
-
-    gen = input_generator()
-    monkeypatch.setattr("builtins.input", lambda _: next(gen))
-
-
-def run_interaction_loop(
-    monkeypatch: pytest.MonkeyPatch, agent: Agent, cycle_count: int
-) -> None:
-    setup_mock_input(monkeypatch, cycle_count)
-    with contextlib.suppress(SystemExit):
-        agent.start_interaction_loop()
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
deleted file mode 100644
index 686f50be405..00000000000
--- a/tests/integration/conftest.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-
-import openai.api_requestor
-import pytest
-from pytest_mock import MockerFixture
-
-from tests.conftest import PROXY
-from tests.vcr.vcr_filter import before_record_request, before_record_response
-
-BASE_VCR_CONFIG = {
-    "record_mode": "new_episodes",
-    "before_record_request": before_record_request,
-    "before_record_response": before_record_response,
-    "filter_headers": [
-        "Authorization",
-        "X-OpenAI-Client-User-Agent",
-        "User-Agent",
-    ],
-    "match_on": ["method", "body"],
-}
-
-
-@pytest.fixture(scope="session")
-def vcr_config():
-    # this fixture is called by the pytest-recording vcr decorator.
-    return BASE_VCR_CONFIG
-
-
-def patch_api_base(requestor):
-    new_api_base = f"{PROXY}/v1"
-    requestor.api_base = new_api_base
-    return requestor
-
-
-@pytest.fixture
-def patched_api_requestor(mocker: MockerFixture):
-    original_init = openai.api_requestor.APIRequestor.__init__
-    original_validate_headers = openai.api_requestor.APIRequestor._validate_headers
-
-    def patched_init(requestor, *args, **kwargs):
-        original_init(requestor, *args, **kwargs)
-        patch_api_base(requestor)
-
-    def patched_validate_headers(self, supplied_headers):
-        headers = original_validate_headers(self, supplied_headers)
-        headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
-        headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
-        return headers
-
-    if PROXY:
-        mocker.patch("openai.api_requestor.APIRequestor.__init__", new=patched_init)
-        mocker.patch.object(
-            openai.api_requestor.APIRequestor,
-            "_validate_headers",
-            new=patched_validate_headers,
-        )
diff --git a/tests/integration/test_commands.py b/tests/integration/test_commands.py
deleted file mode 100644
index 1cbb3929667..00000000000
--- a/tests/integration/test_commands.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Unit tests for the commands module"""
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from autogpt.app import list_agents, start_agent
-from tests.utils import requires_api_key
-
-
-@pytest.mark.vcr
-@pytest.mark.integration_test
-@requires_api_key("OPENAI_API_KEY")
-def test_make_agent(patched_api_requestor, config) -> None:
-    """Test that an agent can be created"""
-    # Use the mock agent manager to avoid creating a real agent
-    with patch("openai.ChatCompletion.create") as mock:
-        response = MagicMock()
-        # del response.error
-        response.choices[0].messages[0].content = "Test message"
-        response.usage.prompt_tokens = 1
-        response.usage.completion_tokens = 1
-        mock.return_value = response
-        start_agent(
-            "Test Agent", "chat", "Hello, how are you?", config, "gpt-3.5-turbo"
-        )
-        agents = list_agents(config)
-        assert "List of agents:\n0: chat" == agents
-        start_agent(
-            "Test Agent 2", "write", "Hello, how are you?", config, "gpt-3.5-turbo"
-        )
-        agents = list_agents(config)
-        assert "List of agents:\n0: chat\n1: write" == agents
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index c75d66fa7b9..16d6c4d97d6 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -1,23 +1,25 @@
+import os
 import random
 import string
 import tempfile
+from typing import Callable
 
 import pytest
-from pytest_mock import MockerFixture
 
 import autogpt.commands.execute_code as sut  # system under testing
+from autogpt.agent.agent import Agent
 from autogpt.config import Config
 
 
 @pytest.fixture
-def config_allow_execute(config: Config, mocker: MockerFixture):
-    yield mocker.patch.object(config, "execute_local_commands", True)
+def random_code(random_string) -> Callable:
+    return f"print('Hello {random_string}!')"
 
 
 @pytest.fixture
-def python_test_file(config: Config, random_string):
+def python_test_file(config: Config, random_code: str) -> Callable:
     temp_file = tempfile.NamedTemporaryFile(dir=config.workspace_path, suffix=".py")
-    temp_file.write(str.encode(f"print('Hello {random_string}!')"))
+    temp_file.write(str.encode(random_code))
     temp_file.flush()
 
     yield temp_file.name
@@ -29,22 +31,98 @@ def random_string():
     return "".join(random.choice(string.ascii_lowercase) for _ in range(10))
 
 
-def test_execute_python_file(python_test_file: str, random_string: str, config):
-    result: str = sut.execute_python_file(python_test_file, config)
+def test_execute_python_file(python_test_file: str, random_string: str, agent: Agent):
+    result: str = sut.execute_python_file(python_test_file, agent=agent)
     assert result.replace("\r", "") == f"Hello {random_string}!\n"
 
 
-def test_execute_python_file_invalid(config):
+def test_execute_python_code(random_code: str, random_string: str, agent: Agent):
+    ai_name = agent.ai_name
+
+    result: str = sut.execute_python_code(random_code, "test_code", agent=agent)
+    assert result.replace("\r", "") == f"Hello {random_string}!\n"
+
+    # Check that the code is stored
+    destination = os.path.join(
+        agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
+    )
+    with open(destination) as f:
+        assert f.read() == random_code
+
+
+def test_execute_python_code_overwrites_file(
+    random_code: str, random_string: str, agent: Agent
+):
+    ai_name = agent.ai_name
+    destination = os.path.join(
+        agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
+    )
+    os.makedirs(os.path.dirname(destination), exist_ok=True)
+
+    with open(destination, "w+") as f:
+        f.write("This will be overwritten")
+
+    sut.execute_python_code(random_code, "test_code.py", agent=agent)
+
+    # Check that the file is updated with the new code
+    with open(destination) as f:
+        assert f.read() == random_code
+
+
+def test_execute_python_file_invalid(agent: Agent):
     assert all(
-        s in sut.execute_python_file("not_python", config).lower()
+        s in sut.execute_python_file("not_python", agent).lower()
         for s in ["error:", "invalid", ".py"]
     )
+
+
+def test_execute_python_file_not_found(agent: Agent):
     assert all(
-        s in sut.execute_python_file("notexist.py", config).lower()
-        for s in ["error:", "does not exist"]
+        s in sut.execute_python_file("notexist.py", agent).lower()
+        for s in [
+            "python: can't open file 'notexist.py'",
+            "[errno 2] no such file or directory",
+        ]
     )
 
 
-def test_execute_shell(config_allow_execute, random_string, config):
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+def test_execute_shell(random_string: str, agent: Agent):
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert f"Hello {random_string}!" in result
+
+
+def test_execute_shell_local_commands_not_allowed(random_string: str, agent: Agent):
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
+    assert f"Hello {random_string}!" in result
+
+
+def test_execute_shell_denylist_should_deny(agent: Agent, random_string: str):
+    agent.config.shell_denylist = ["echo"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
+    assert "Error:" in result and "not allowed" in result
+
+
+def test_execute_shell_denylist_should_allow(agent: Agent, random_string: str):
+    agent.config.shell_denylist = ["cat"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
+    assert "Hello" in result and random_string in result
+    assert "Error" not in result
+
+
+def test_execute_shell_allowlist_should_deny(agent: Agent, random_string: str):
+    agent.config.shell_command_control = sut.ALLOWLIST_CONTROL
+    agent.config.shell_allowlist = ["cat"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
+    assert "Error:" in result and "not allowed" in result
+
+
+def test_execute_shell_allowlist_should_allow(agent: Agent, random_string: str):
+    agent.config.shell_command_control = sut.ALLOWLIST_CONTROL
+    agent.config.shell_allowlist = ["echo"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
+    assert "Hello" in result and random_string in result
+    assert "Error" not in result
diff --git a/tests/integration/test_image_gen.py b/tests/integration/test_image_gen.py
index 0156c9e5bdc..a606d8da2b9 100644
--- a/tests/integration/test_image_gen.py
+++ b/tests/integration/test_image_gen.py
@@ -6,6 +6,7 @@
 import pytest
 from PIL import Image
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.image_gen import generate_image, generate_image_with_sd_webui
 from tests.utils import requires_api_key
 
@@ -18,10 +19,10 @@ def image_size(request):
 
 @requires_api_key("OPENAI_API_KEY")
 @pytest.mark.vcr
-def test_dalle(config, workspace, image_size, patched_api_requestor):
+def test_dalle(agent: Agent, workspace, image_size, patched_api_requestor):
     """Test DALL-E image generation."""
     generate_and_validate(
-        config,
+        agent,
         workspace,
         image_provider="dalle",
         image_size=image_size,
@@ -36,10 +37,10 @@ def test_dalle(config, workspace, image_size, patched_api_requestor):
     "image_model",
     ["CompVis/stable-diffusion-v1-4", "stabilityai/stable-diffusion-2-1"],
 )
-def test_huggingface(config, workspace, image_size, image_model):
+def test_huggingface(agent: Agent, workspace, image_size, image_model):
     """Test HuggingFace image generation."""
     generate_and_validate(
-        config,
+        agent,
         workspace,
         image_provider="huggingface",
         image_size=image_size,
@@ -48,10 +49,10 @@ def test_huggingface(config, workspace, image_size, image_model):
 
 
 @pytest.mark.xfail(reason="SD WebUI call does not work.")
-def test_sd_webui(config, workspace, image_size):
+def test_sd_webui(agent: Agent, workspace, image_size):
     """Test SD WebUI image generation."""
     generate_and_validate(
-        config,
+        agent,
         workspace,
         image_provider="sd_webui",
         image_size=image_size,
@@ -59,11 +60,11 @@ def test_sd_webui(config, workspace, image_size):
 
 
 @pytest.mark.xfail(reason="SD WebUI call does not work.")
-def test_sd_webui_negative_prompt(config, workspace, image_size):
+def test_sd_webui_negative_prompt(agent: Agent, workspace, image_size):
     gen_image = functools.partial(
         generate_image_with_sd_webui,
         prompt="astronaut riding a horse",
-        config=config,
+        agent=agent,
         size=image_size,
         extra={"seed": 123},
     )
@@ -87,7 +88,7 @@ def lst(txt):
 
 
 def generate_and_validate(
-    config,
+    agent: Agent,
     workspace,
     image_size,
     image_provider,
@@ -95,11 +96,11 @@ def generate_and_validate(
     **kwargs,
 ):
     """Generate an image and validate the output."""
-    config.image_provider = image_provider
-    config.huggingface_image_model = hugging_face_image_model
+    agent.config.image_provider = image_provider
+    agent.config.huggingface_image_model = hugging_face_image_model
     prompt = "astronaut riding a horse"
 
-    image_path = lst(generate_image(prompt, config, image_size, **kwargs))
+    image_path = lst(generate_image(prompt, agent, image_size, **kwargs))
     assert image_path.exists()
     with Image.open(image_path) as img:
         assert img.size == (image_size, image_size)
@@ -120,7 +121,7 @@ def generate_and_validate(
 )
 @pytest.mark.parametrize("delay", [10, 0])
 def test_huggingface_fail_request_with_delay(
-    config, workspace, image_size, image_model, return_text, delay
+    agent: Agent, workspace, image_size, image_model, return_text, delay
 ):
     return_text = return_text.replace("[model]", image_model).replace(
         "[delay]", str(delay)
@@ -138,13 +139,13 @@ def test_huggingface_fail_request_with_delay(
             mock_post.return_value.ok = False
             mock_post.return_value.text = return_text
 
-        config.image_provider = "huggingface"
-        config.huggingface_image_model = image_model
+        agent.config.image_provider = "huggingface"
+        agent.config.huggingface_image_model = image_model
         prompt = "astronaut riding a horse"
 
         with patch("time.sleep") as mock_sleep:
             # Verify request fails.
-            result = generate_image(prompt, config, image_size)
+            result = generate_image(prompt, agent, image_size)
             assert result == "Error creating image."
 
             # Verify retry was called with delay if delay is in return_text
@@ -154,8 +155,8 @@ def test_huggingface_fail_request_with_delay(
                 mock_sleep.assert_not_called()
 
 
-def test_huggingface_fail_request_with_delay(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_with_delay(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
@@ -166,10 +167,10 @@ def test_huggingface_fail_request_with_delay(mocker, config):
     # Mock time.sleep
     mock_sleep = mocker.patch("time.sleep")
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
@@ -177,8 +178,8 @@ def test_huggingface_fail_request_with_delay(mocker, config):
     mock_sleep.assert_called_with(0)
 
 
-def test_huggingface_fail_request_no_delay(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_no_delay(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
@@ -191,10 +192,10 @@ def test_huggingface_fail_request_no_delay(mocker, config):
     # Mock time.sleep
     mock_sleep = mocker.patch("time.sleep")
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
@@ -202,8 +203,8 @@ def test_huggingface_fail_request_no_delay(mocker, config):
     mock_sleep.assert_not_called()
 
 
-def test_huggingface_fail_request_bad_json(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_bad_json(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
@@ -214,10 +215,10 @@ def test_huggingface_fail_request_bad_json(mocker, config):
     # Mock time.sleep
     mock_sleep = mocker.patch("time.sleep")
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
@@ -225,28 +226,28 @@ def test_huggingface_fail_request_bad_json(mocker, config):
     mock_sleep.assert_not_called()
 
 
-def test_huggingface_fail_request_bad_image(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_bad_image(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
     mock_post.return_value.status_code = 200
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
 
-def test_huggingface_fail_missing_api_token(mocker, config):
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+def test_huggingface_fail_missing_api_token(mocker, agent: Agent):
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
     # Mock requests.post to raise ValueError
     mock_post = mocker.patch("requests.post", side_effect=ValueError)
 
     # Verify request raises an error.
     with pytest.raises(ValueError):
-        generate_image("astronaut riding a horse", config, 512)
+        generate_image("astronaut riding a horse", agent, 512)
diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py
deleted file mode 100644
index d5f62cca5a6..00000000000
--- a/tests/integration/test_plugins.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pytest
-
-from autogpt.config import Config
-from autogpt.plugins import scan_plugins
-
-PLUGINS_TEST_DIR = "tests/unit/data/test_plugins"
-PLUGIN_TEST_OPENAI = "https://weathergpt.vercel.app/"
-
-
-@pytest.fixture
-def mock_config_denylist_allowlist_check():
-    class MockConfig:
-        """Mock config object for testing the denylist_allowlist_check function"""
-
-        plugins_denylist = ["BadPlugin"]
-        plugins_allowlist = ["GoodPlugin"]
-        authorise_key = "y"
-        exit_key = "n"
-
-    return MockConfig()
-
-
-@pytest.fixture
-def config_with_plugins():
-    """Mock config object for testing the scan_plugins function"""
-    # Test that the function returns the correct number of plugins
-    cfg = Config()
-    cfg.plugins_dir = PLUGINS_TEST_DIR
-    cfg.plugins_openai = ["https://weathergpt.vercel.app/"]
-    return cfg
-
-
-@pytest.fixture
-def mock_config_openai_plugin():
-    """Mock config object for testing the scan_plugins function"""
-
-    class MockConfig:
-        """Mock config object for testing the scan_plugins function"""
-
-        plugins_dir = PLUGINS_TEST_DIR
-        plugins_openai = [PLUGIN_TEST_OPENAI]
-        plugins_denylist = ["AutoGPTPVicuna"]
-        plugins_allowlist = [PLUGIN_TEST_OPENAI]
-
-    return MockConfig()
-
-
-def test_scan_plugins_openai(mock_config_openai_plugin):
-    # Test that the function returns the correct number of plugins
-    result = scan_plugins(mock_config_openai_plugin, debug=True)
-    assert len(result) == 1
-
-
-@pytest.fixture
-def mock_config_generic_plugin():
-    """Mock config object for testing the scan_plugins function"""
-
-    # Test that the function returns the correct number of plugins
-    class MockConfig:
-        plugins_dir = PLUGINS_TEST_DIR
-        plugins_openai = []
-        plugins_denylist = []
-        plugins_allowlist = ["AutoGPTPVicuna"]
-
-    return MockConfig()
-
-
-def test_scan_plugins_generic(mock_config_generic_plugin):
-    # Test that the function returns the correct number of plugins
-    result = scan_plugins(mock_config_generic_plugin, debug=True)
-    assert len(result) == 1
diff --git a/tests/integration/test_setup.py b/tests/integration/test_setup.py
index 4e2a505d9a5..5217d72abef 100644
--- a/tests/integration/test_setup.py
+++ b/tests/integration/test_setup.py
@@ -11,7 +11,7 @@
 @requires_api_key("OPENAI_API_KEY")
 def test_generate_aiconfig_automatic_default(patched_api_requestor):
     user_inputs = [""]
-    with patch("builtins.input", side_effect=user_inputs):
+    with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
         ai_config = prompt_user()
 
     assert isinstance(ai_config, AIConfig)
@@ -44,7 +44,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
         "",
         "",
     ]
-    with patch("builtins.input", side_effect=user_inputs):
+    with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
         ai_config = prompt_user()
 
     assert isinstance(ai_config, AIConfig)
@@ -65,7 +65,7 @@ def test_prompt_user_manual_mode(patched_api_requestor):
         "",
         "",
     ]
-    with patch("builtins.input", side_effect=user_inputs):
+    with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
         ai_config = prompt_user()
 
     assert isinstance(ai_config, AIConfig)
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index 2a03a3c047e..baf3653ca7a 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,14 +1,18 @@
+import pytest
 from pytest_mock import MockerFixture
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.web_selenium import browse_website
-from autogpt.config import Config
+from tests.utils import requires_api_key
 
 
-def test_browse_website(config: Config, patched_api_requestor: MockerFixture):
+@pytest.mark.vcr
+@requires_api_key("OPENAI_API_KEY")
+def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
     url = "https://barrel-roll.com"
     question = "How to execute a barrel roll"
 
-    response = browse_website(url, question, config)
+    response = browse_website(url, question, agent)
     assert "Error" in response
     # Sanity check that the response is not too long
     assert len(response) < 200
diff --git a/tests/test_analyze_code.py b/tests/test_analyze_code.py
deleted file mode 100644
index 98ab8b724dd..00000000000
--- a/tests/test_analyze_code.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Date: 2023-5-13
-# Author: Generated by GoCodeo.
-import pytest
-
-from autogpt.commands.analyze_code import analyze_code
-from autogpt.config import Config
-
-
-@pytest.fixture
-def mock_call_ai_function(mocker):
-    return mocker.patch("autogpt.commands.analyze_code.call_ai_function")
-
-
-class TestAnalyzeCode:
-    def test_positive_analyze_code(self, mock_call_ai_function):
-        # Positive Test
-        mock_call_ai_function.return_value = ["Suggestion 1", "Suggestion 2"]
-        code = "def example_function():\n    pass"
-        config = Config()
-        result = analyze_code(code, config)
-        assert result == ["Suggestion 1", "Suggestion 2"]
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
-
-    def test_negative_analyze_code(
-        self,
-        mock_call_ai_function,
-        config: Config,
-    ):
-        # Negative Test
-        mock_call_ai_function.return_value = []
-        code = "def example_function():\n    pass"
-        result = analyze_code(code, config)
-        assert result == []
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
-
-    def test_error_analyze_code(self, mock_call_ai_function, config: Config):
-        # Error Test
-        mock_call_ai_function.side_effect = Exception("Error occurred")
-        code = "def example_function():\n    pass"
-        with pytest.raises(Exception):
-            result = analyze_code(code, config)
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
-
-    def test_edge_analyze_code_empty_code(
-        self,
-        mock_call_ai_function,
-        config: Config,
-    ):
-        # Edge Test
-        mock_call_ai_function.return_value = ["Suggestion 1", "Suggestion 2"]
-        code = ""
-        result = analyze_code(code, config)
-        assert result == ["Suggestion 1", "Suggestion 2"]
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
diff --git a/tests/test_audio_text_read_audio.py b/tests/test_audio_text_read_audio.py
deleted file mode 100644
index 4385da321cc..00000000000
--- a/tests/test_audio_text_read_audio.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Date: 2023-5-13
-# Author: Generated by GoCodeo.
-import json
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from autogpt.commands.audio_text import read_audio
-
-
-class TestReadAudio:
-    @patch("requests.post")
-    def test_positive_read_audio(self, mock_post, config):
-        # Positive Test
-        audio_data = b"test_audio_data"
-        mock_response = MagicMock()
-        mock_response.content.decode.return_value = json.dumps(
-            {"text": "Hello, world!"}
-        )
-        mock_post.return_value = mock_response
-
-        config.huggingface_api_token = "testing-token"
-        result = read_audio(audio_data, config)
-        assert result == "The audio says: Hello, world!"
-        mock_post.assert_called_once_with(
-            f"https://api-inference.huggingface.co/models/{config.huggingface_audio_to_text_model}",
-            headers={"Authorization": f"Bearer {config.huggingface_api_token}"},
-            data=audio_data,
-        )
-
-    @patch("requests.post")
-    def test_negative_read_audio(self, mock_post, config):
-        # Negative Test
-        audio_data = b"test_audio_data"
-        mock_response = MagicMock()
-        mock_response.content.decode.return_value = json.dumps({"text": ""})
-        mock_post.return_value = mock_response
-        config.huggingface_api_token = "testing-token"
-        result = read_audio(audio_data, config)
-        assert result == "The audio says: "
-        mock_post.assert_called_once_with(
-            f"https://api-inference.huggingface.co/models/{config.huggingface_audio_to_text_model}",
-            headers={"Authorization": f"Bearer {config.huggingface_api_token}"},
-            data=audio_data,
-        )
-
-    def test_error_read_audio(self, config):
-        # Error Test
-        config.huggingface_api_token = None
-        with pytest.raises(ValueError):
-            read_audio(b"test_audio_data", config)
-
-    def test_edge_read_audio_empty_audio(self, config):
-        # Edge Test
-        with pytest.raises(ValueError):
-            read_audio(b"", config)
diff --git a/tests/test_audio_text_read_audio_from_file.py b/tests/test_audio_text_read_audio_from_file.py
deleted file mode 100644
index c8d66a06085..00000000000
--- a/tests/test_audio_text_read_audio_from_file.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Date: 2023-5-13
-# Author: Generated by GoCodeo.
-
-
-from unittest.mock import mock_open, patch
-
-import pytest
-
-from autogpt.commands.audio_text import read_audio_from_file
-from autogpt.config import Config
-
-
-@pytest.fixture
-def mock_read_audio(mocker):
-    return mocker.patch("autogpt.commands.audio_text.read_audio")
-
-
-class TestReadAudioFromFile:
-    def test_positive_read_audio_from_file(self, mock_read_audio):
-        # Positive test
-        mock_read_audio.return_value = "This is a sample text."
-        mock_file_data = b"Audio data"
-        m = mock_open(read_data=mock_file_data)
-
-        with patch("builtins.open", m):
-            result = read_audio_from_file("test_audio.wav", Config())
-            assert result == "This is a sample text."
-            m.assert_called_once_with("test_audio.wav", "rb")
-
-    def test_negative_read_audio_from_file(self, mock_read_audio):
-        # Negative test
-        mock_read_audio.return_value = "This is a sample text."
-        mock_file_data = b"Audio data"
-        m = mock_open(read_data=mock_file_data)
-
-        with patch("builtins.open", m):
-            result = read_audio_from_file("test_audio.wav", Config())
-            assert result != "Incorrect text."
-            m.assert_called_once_with("test_audio.wav", "rb")
-
-    def test_error_read_audio_from_file(self):
-        # Error test
-        with pytest.raises(FileNotFoundError):
-            read_audio_from_file("non_existent_file.wav", Config())
-
-    def test_edge_empty_audio_file(self, mock_read_audio):
-        # Edge test
-        mock_read_audio.return_value = ""
-        mock_file_data = b""
-        m = mock_open(read_data=mock_file_data)
-
-        with patch("builtins.open", m):
-            result = read_audio_from_file("empty_audio.wav", Config())
-            assert result == ""
-            m.assert_called_once_with("empty_audio.wav", "rb")
diff --git a/tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py b/tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py
new file mode 100644
index 00000000000..f915553ccd1
--- /dev/null
+++ b/tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py
@@ -0,0 +1,274 @@
+"""This is the Test plugin for Auto-GPT."""
+from typing import Any, Dict, List, Optional, Tuple, TypeVar
+
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
+
+PromptGenerator = TypeVar("PromptGenerator")
+
+
+class AutoGPTGuanaco(AutoGPTPluginTemplate):
+    """
+    This is plugin for Auto-GPT.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._name = "Auto-GPT-Guanaco"
+        self._version = "0.1.0"
+        self._description = "This is a Guanaco local model plugin."
+
+    def can_handle_on_response(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_response method.
+
+        Returns:
+            bool: True if the plugin can handle the on_response method."""
+        return False
+
+    def on_response(self, response: str, *args, **kwargs) -> str:
+        """This method is called when a response is received from the model."""
+        if len(response):
+            print("OMG OMG It's Alive!")
+        else:
+            print("Is it alive?")
+
+    def can_handle_post_prompt(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_prompt method.
+
+        Returns:
+            bool: True if the plugin can handle the post_prompt method."""
+        return False
+
+    def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
+        """This method is called just after the generate_prompt is called,
+            but actually before the prompt is generated.
+
+        Args:
+            prompt (PromptGenerator): The prompt generator.
+
+        Returns:
+            PromptGenerator: The prompt generator.
+        """
+
+    def can_handle_on_planning(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_planning method.
+
+        Returns:
+            bool: True if the plugin can handle the on_planning method."""
+        return False
+
+    def on_planning(
+        self, prompt: PromptGenerator, messages: List[str]
+    ) -> Optional[str]:
+        """This method is called before the planning chat completeion is done.
+
+        Args:
+            prompt (PromptGenerator): The prompt generator.
+            messages (List[str]): The list of messages.
+        """
+
+    def can_handle_post_planning(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_planning method.
+
+        Returns:
+            bool: True if the plugin can handle the post_planning method."""
+        return False
+
+    def post_planning(self, response: str) -> str:
+        """This method is called after the planning chat completeion is done.
+
+        Args:
+            response (str): The response.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_pre_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the pre_instruction method.
+
+        Returns:
+            bool: True if the plugin can handle the pre_instruction method."""
+        return False
+
+    def pre_instruction(self, messages: List[str]) -> List[str]:
+        """This method is called before the instruction chat is done.
+
+        Args:
+            messages (List[str]): The list of context messages.
+
+        Returns:
+            List[str]: The resulting list of messages.
+        """
+
+    def can_handle_on_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_instruction method.
+
+        Returns:
+            bool: True if the plugin can handle the on_instruction method."""
+        return False
+
+    def on_instruction(self, messages: List[str]) -> Optional[str]:
+        """This method is called when the instruction chat is done.
+
+        Args:
+            messages (List[str]): The list of context messages.
+
+        Returns:
+            Optional[str]: The resulting message.
+        """
+
+    def can_handle_post_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_instruction method.
+
+        Returns:
+            bool: True if the plugin can handle the post_instruction method."""
+        return False
+
+    def post_instruction(self, response: str) -> str:
+        """This method is called after the instruction chat is done.
+
+        Args:
+            response (str): The response.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_pre_command(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the pre_command method.
+
+        Returns:
+            bool: True if the plugin can handle the pre_command method."""
+        return False
+
+    def pre_command(
+        self, command_name: str, arguments: Dict[str, Any]
+    ) -> Tuple[str, Dict[str, Any]]:
+        """This method is called before the command is executed.
+
+        Args:
+            command_name (str): The command name.
+            arguments (Dict[str, Any]): The arguments.
+
+        Returns:
+            Tuple[str, Dict[str, Any]]: The command name and the arguments.
+        """
+
+    def can_handle_post_command(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_command method.
+
+        Returns:
+            bool: True if the plugin can handle the post_command method."""
+        return False
+
+    def post_command(self, command_name: str, response: str) -> str:
+        """This method is called after the command is executed.
+
+        Args:
+            command_name (str): The command name.
+            response (str): The response.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_chat_completion(
+        self,
+        messages: list[Dict[Any, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> bool:
+        """This method is called to check that the plugin can
+          handle the chat_completion method.
+
+        Args:
+            messages (Dict[Any, Any]): The messages.
+            model (str): The model name.
+            temperature (float): The temperature.
+            max_tokens (int): The max tokens.
+
+          Returns:
+              bool: True if the plugin can handle the chat_completion method."""
+        return False
+
+    def handle_chat_completion(
+        self,
+        messages: list[Dict[Any, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> str:
+        """This method is called when the chat completion is done.
+
+        Args:
+            messages (Dict[Any, Any]): The messages.
+            model (str): The model name.
+            temperature (float): The temperature.
+            max_tokens (int): The max tokens.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_text_embedding(self, text: str) -> bool:
+        """This method is called to check that the plugin can
+          handle the text_embedding method.
+        Args:
+            text (str): The text to be convert to embedding.
+          Returns:
+              bool: True if the plugin can handle the text_embedding method."""
+        return False
+
+    def handle_text_embedding(self, text: str) -> list:
+        """This method is called when the chat completion is done.
+        Args:
+            text (str): The text to be convert to embedding.
+        Returns:
+            list: The text embedding.
+        """
+
+    def can_handle_user_input(self, user_input: str) -> bool:
+        """This method is called to check that the plugin can
+        handle the user_input method.
+
+        Args:
+            user_input (str): The user input.
+
+        Returns:
+            bool: True if the plugin can handle the user_input method."""
+        return False
+
+    def user_input(self, user_input: str) -> str:
+        """This method is called to request user input to the user.
+
+        Args:
+            user_input (str): The question or prompt to ask the user.
+
+        Returns:
+            str: The user input.
+        """
+
+    def can_handle_report(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the report method.
+
+        Returns:
+            bool: True if the plugin can handle the report method."""
+        return False
+
+    def report(self, message: str) -> None:
+        """This method is called to report a message to the user.
+
+        Args:
+            message (str): The message to report.
+        """
diff --git a/tests/test_agent.py b/tests/unit/test_agent.py
similarity index 66%
rename from tests/test_agent.py
rename to tests/unit/test_agent.py
index 4f05e36b126..3fb896bad2e 100644
--- a/tests/test_agent.py
+++ b/tests/unit/test_agent.py
@@ -4,28 +4,30 @@
 
 from autogpt.agent import Agent
 from autogpt.config import AIConfig
+from autogpt.config.config import Config
 
 
 @pytest.fixture
-def agent():
+def agent(config: Config):
     ai_name = "Test AI"
     memory = MagicMock()
     next_action_count = 0
     command_registry = MagicMock()
-    config = AIConfig()
+    ai_config = AIConfig(ai_name=ai_name)
     system_prompt = "System prompt"
     triggering_prompt = "Triggering prompt"
     workspace_directory = "workspace_directory"
 
     agent = Agent(
-        ai_name,
-        memory,
-        next_action_count,
-        command_registry,
-        config,
-        system_prompt,
-        triggering_prompt,
-        workspace_directory,
+        ai_name=ai_name,
+        memory=memory,
+        next_action_count=next_action_count,
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        system_prompt=system_prompt,
+        triggering_prompt=triggering_prompt,
+        workspace_directory=workspace_directory,
     )
     return agent
 
@@ -36,7 +38,7 @@ def test_agent_initialization(agent: Agent):
     assert agent.history.messages == []
     assert agent.next_action_count == 0
     assert agent.command_registry == agent.command_registry
-    assert agent.config == agent.config
+    assert agent.ai_config == agent.ai_config
     assert agent.system_prompt == "System prompt"
     assert agent.triggering_prompt == "Triggering prompt"
 
diff --git a/tests/test_agent_manager.py b/tests/unit/test_agent_manager.py
similarity index 100%
rename from tests/test_agent_manager.py
rename to tests/unit/test_agent_manager.py
diff --git a/tests/test_ai_config.py b/tests/unit/test_ai_config.py
similarity index 100%
rename from tests/test_ai_config.py
rename to tests/unit/test_ai_config.py
diff --git a/tests/test_api_manager.py b/tests/unit/test_api_manager.py
similarity index 70%
rename from tests/test_api_manager.py
rename to tests/unit/test_api_manager.py
index 9585fba7408..e259f56adcf 100644
--- a/tests/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -1,8 +1,9 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
+from pytest_mock import MockerFixture
 
-from autogpt.llm.api_manager import COSTS, ApiManager
+from autogpt.llm.api_manager import OPEN_AI_MODELS, ApiManager
 
 api_manager = ApiManager()
 
@@ -14,16 +15,17 @@ def reset_api_manager():
 
 
 @pytest.fixture(autouse=True)
-def mock_costs():
-    with patch.dict(
-        COSTS,
-        {
-            "gpt-3.5-turbo": {"prompt": 0.002, "completion": 0.002},
-            "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0},
-        },
-        clear=True,
-    ):
-        yield
+def mock_costs(mocker: MockerFixture):
+    mocker.patch.multiple(
+        OPEN_AI_MODELS["gpt-3.5-turbo"],
+        prompt_token_cost=0.0013,
+        completion_token_cost=0.0025,
+    )
+    mocker.patch.multiple(
+        OPEN_AI_MODELS["text-embedding-ada-002"],
+        prompt_token_cost=0.0004,
+    )
+    yield
 
 
 class TestApiManager:
@@ -87,15 +89,15 @@ def test_create_chat_completion_valid_inputs():
 
             assert api_manager.get_total_prompt_tokens() == 10
             assert api_manager.get_total_completion_tokens() == 20
-            assert api_manager.get_total_cost() == (10 * 0.002 + 20 * 0.002) / 1000
+            assert api_manager.get_total_cost() == (10 * 0.0013 + 20 * 0.0025) / 1000
 
     def test_getter_methods(self):
         """Test the getter methods for total tokens, cost, and budget."""
-        api_manager.update_cost(60, 120, "gpt-3.5-turbo")
+        api_manager.update_cost(600, 1200, "gpt-3.5-turbo")
         api_manager.set_total_budget(10.0)
-        assert api_manager.get_total_prompt_tokens() == 60
-        assert api_manager.get_total_completion_tokens() == 120
-        assert api_manager.get_total_cost() == (60 * 0.002 + 120 * 0.002) / 1000
+        assert api_manager.get_total_prompt_tokens() == 600
+        assert api_manager.get_total_completion_tokens() == 1200
+        assert api_manager.get_total_cost() == (600 * 0.0013 + 1200 * 0.0025) / 1000
         assert api_manager.get_total_budget() == 10.0
 
     @staticmethod
@@ -107,7 +109,7 @@ def test_set_total_budget():
         assert api_manager.get_total_budget() == total_budget
 
     @staticmethod
-    def test_update_cost():
+    def test_update_cost_completion_model():
         """Test if updating the cost works correctly."""
         prompt_tokens = 50
         completion_tokens = 100
@@ -115,9 +117,24 @@ def test_update_cost():
 
         api_manager.update_cost(prompt_tokens, completion_tokens, model)
 
-        assert api_manager.get_total_prompt_tokens() == 50
-        assert api_manager.get_total_completion_tokens() == 100
-        assert api_manager.get_total_cost() == (50 * 0.002 + 100 * 0.002) / 1000
+        assert api_manager.get_total_prompt_tokens() == prompt_tokens
+        assert api_manager.get_total_completion_tokens() == completion_tokens
+        assert (
+            api_manager.get_total_cost()
+            == (prompt_tokens * 0.0013 + completion_tokens * 0.0025) / 1000
+        )
+
+    @staticmethod
+    def test_update_cost_embedding_model():
+        """Test if updating the cost works correctly."""
+        prompt_tokens = 1337
+        model = "text-embedding-ada-002"
+
+        api_manager.update_cost(prompt_tokens, 0, model)
+
+        assert api_manager.get_total_prompt_tokens() == prompt_tokens
+        assert api_manager.get_total_completion_tokens() == 0
+        assert api_manager.get_total_cost() == (prompt_tokens * 0.0004) / 1000
 
     @staticmethod
     def test_get_models():
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
index 2d1e8f90320..5975e086e81 100644
--- a/tests/unit/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -3,6 +3,7 @@
 # Dependencies:
 # pip install pytest-mock
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.web_requests import scrape_links
 
 """
@@ -42,14 +43,14 @@ class TestScrapeLinks:
     provided with a valid url that returns a webpage with hyperlinks.
     """
 
-    def test_valid_url_with_hyperlinks(self, config):
+    def test_valid_url_with_hyperlinks(self, agent: Agent):
         url = "https://www.google.com"
-        result = scrape_links(url, config=config)
+        result = scrape_links(url, agent=agent)
         assert len(result) > 0
         assert isinstance(result, list)
         assert isinstance(result[0], str)
 
-    def test_valid_url(self, mocker, config):
+    def test_valid_url(self, mocker, agent: Agent):
         """Test that the function returns correctly formatted hyperlinks when given a valid url."""
         # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
         mock_response = mocker.Mock()
@@ -60,12 +61,12 @@ def test_valid_url(self, mocker, config):
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with a valid URL
-        result = scrape_links("https://www.example.com", config)
+        result = scrape_links("https://www.example.com", agent)
 
         # Assert that the function returns correctly formatted hyperlinks
         assert result == ["Google (https://www.google.com)"]
 
-    def test_invalid_url(self, mocker, config):
+    def test_invalid_url(self, mocker, agent: Agent):
         """Test that the function returns "error" when given an invalid url."""
         # Mock the requests.get() function to return an HTTP error response
         mock_response = mocker.Mock()
@@ -73,12 +74,12 @@ def test_invalid_url(self, mocker, config):
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with an invalid URL
-        result = scrape_links("https://www.invalidurl.com", config)
+        result = scrape_links("https://www.invalidurl.com", agent)
 
         # Assert that the function returns "error"
         assert "Error:" in result
 
-    def test_no_hyperlinks(self, mocker, config):
+    def test_no_hyperlinks(self, mocker, agent: Agent):
         """Test that the function returns an empty list when the html contains no hyperlinks."""
         # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
         mock_response = mocker.Mock()
@@ -87,12 +88,12 @@ def test_no_hyperlinks(self, mocker, config):
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with a URL containing no hyperlinks
-        result = scrape_links("https://www.example.com", config)
+        result = scrape_links("https://www.example.com", agent)
 
         # Assert that the function returns an empty list
         assert result == []
 
-    def test_scrape_links_with_few_hyperlinks(self, mocker, config):
+    def test_scrape_links_with_few_hyperlinks(self, mocker, agent: Agent):
         """Test that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks."""
         mock_response = mocker.Mock()
         mock_response.status_code = 200
@@ -108,7 +109,7 @@ def test_scrape_links_with_few_hyperlinks(self, mocker, config):
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function being tested
-        result = scrape_links("https://www.example.com", config)
+        result = scrape_links("https://www.example.com", agent)
 
         # Assert that the function returns a list of formatted hyperlinks
         assert isinstance(result, list)
diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
index 3d916bc73dd..23a80c545ff 100644
--- a/tests/unit/test_browse_scrape_text.py
+++ b/tests/unit/test_browse_scrape_text.py
@@ -3,6 +3,7 @@
 import pytest
 import requests
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.web_requests import scrape_text
 
 """
@@ -42,7 +43,7 @@
 
 
 class TestScrapeText:
-    def test_scrape_text_with_valid_url(self, mocker, config):
+    def test_scrape_text_with_valid_url(self, mocker, agent: Agent):
         """Tests that scrape_text() returns the expected text when given a valid URL."""
         # Mock the requests.get() method to return a response with expected text
         expected_text = "This is some sample text"
@@ -57,14 +58,14 @@ def test_scrape_text_with_valid_url(self, mocker, config):
         # Call the function with a valid URL and assert that it returns the
         #  expected text
         url = "http://www.example.com"
-        assert scrape_text(url, config) == expected_text
+        assert scrape_text(url, agent) == expected_text
 
-    def test_invalid_url(self, config):
+    def test_invalid_url(self, agent: Agent):
         """Tests that an error is raised when an invalid url is provided."""
         url = "invalidurl.com"
-        pytest.raises(ValueError, scrape_text, url, config)
+        pytest.raises(ValueError, scrape_text, url, agent)
 
-    def test_unreachable_url(self, mocker, config):
+    def test_unreachable_url(self, mocker, agent: Agent):
         """Test that scrape_text returns an error message when an invalid or unreachable url is provided."""
         # Mock the requests.get() method to raise an exception
         mocker.patch(
@@ -74,10 +75,10 @@ def test_unreachable_url(self, mocker, config):
         # Call the function with an invalid URL and assert that it returns an error
         #  message
         url = "http://thiswebsitedoesnotexist.net/"
-        error_message = scrape_text(url, config)
+        error_message = scrape_text(url, agent)
         assert "Error:" in error_message
 
-    def test_no_text(self, mocker, config):
+    def test_no_text(self, mocker, agent: Agent):
         """Test that scrape_text returns an empty string when the html page contains no text to be scraped."""
         # Mock the requests.get() method to return a response with no text
         mock_response = mocker.Mock()
@@ -87,20 +88,20 @@ def test_no_text(self, mocker, config):
 
         # Call the function with a valid URL and assert that it returns an empty string
         url = "http://www.example.com"
-        assert scrape_text(url, config) == ""
+        assert scrape_text(url, agent) == ""
 
-    def test_http_error(self, mocker, config):
+    def test_http_error(self, mocker, agent: Agent):
         """Test that scrape_text returns an error message when the response status code is an http error (>=400)."""
         # Mock the requests.get() method to return a response with a 404 status code
         mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404))
 
         # Call the function with a URL
-        result = scrape_text("https://www.example.com", config)
+        result = scrape_text("https://www.example.com", agent)
 
         # Check that the function returns an error message
         assert result == "Error: HTTP 404 error"
 
-    def test_scrape_text_with_html_tags(self, mocker, config):
+    def test_scrape_text_with_html_tags(self, mocker, agent: Agent):
         """Test that scrape_text() properly handles HTML tags."""
         # Create a mock response object with HTML containing tags
         html = "<html><body><p>This is <b>bold</b> text.</p></body></html>"
@@ -110,7 +111,7 @@ def test_scrape_text_with_html_tags(self, mocker, config):
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with a URL
-        result = scrape_text("https://www.example.com", config)
+        result = scrape_text("https://www.example.com", agent)
 
         # Check that the function properly handles HTML tags
         assert result == "This is bold text."
diff --git a/tests/test_commands.py b/tests/unit/test_commands.py
similarity index 100%
rename from tests/test_commands.py
rename to tests/unit/test_commands.py
diff --git a/tests/test_config.py b/tests/unit/test_config.py
similarity index 86%
rename from tests/test_config.py
rename to tests/unit/test_config.py
index eb6946c91d2..9a95cef1a83 100644
--- a/tests/test_config.py
+++ b/tests/unit/test_config.py
@@ -20,9 +20,7 @@ def test_initial_values(config: Config):
     assert config.continuous_mode == False
     assert config.speak_mode == False
     assert config.fast_llm_model == "gpt-3.5-turbo"
-    assert config.smart_llm_model == "gpt-4"
-    assert config.fast_token_limit == 4000
-    assert config.smart_token_limit == 8000
+    assert config.smart_llm_model == "gpt-3.5-turbo"
 
 
 def test_set_continuous_mode(config: Config):
@@ -81,34 +79,6 @@ def test_set_smart_llm_model(config: Config):
     config.set_smart_llm_model(smart_llm_model)
 
 
-def test_set_fast_token_limit(config: Config):
-    """
-    Test if the set_fast_token_limit() method updates the fast_token_limit attribute.
-    """
-    # Store token limit to reset it after the test
-    fast_token_limit = config.fast_token_limit
-
-    config.set_fast_token_limit(5000)
-    assert config.fast_token_limit == 5000
-
-    # Reset token limit
-    config.set_fast_token_limit(fast_token_limit)
-
-
-def test_set_smart_token_limit(config: Config):
-    """
-    Test if the set_smart_token_limit() method updates the smart_token_limit attribute.
-    """
-    # Store token limit to reset it after the test
-    smart_token_limit = config.smart_token_limit
-
-    config.set_smart_token_limit(9000)
-    assert config.smart_token_limit == 9000
-
-    # Reset token limit
-    config.set_smart_token_limit(smart_token_limit)
-
-
 def test_set_debug_mode(config: Config):
     """
     Test if the set_debug_mode() method updates the debug_mode attribute.
diff --git a/tests/unit/test_execute_command.py b/tests/unit/test_execute_command.py
new file mode 100644
index 00000000000..fb3f043a902
--- /dev/null
+++ b/tests/unit/test_execute_command.py
@@ -0,0 +1,24 @@
+from autogpt.agent import Agent
+from autogpt.app import execute_command
+
+
+def check_plan():
+    return "hi"
+
+
+def test_execute_command_plugin(agent: Agent):
+    """Test that executing a command that came from a plugin works as expected"""
+    agent.ai_config.prompt_generator.add_command(
+        "check_plan",
+        "Read the plan.md with the next goals to achieve",
+        {},
+        check_plan,
+    )
+    command_name = "check_plan"
+    arguments = {}
+    command_result = execute_command(
+        command_name=command_name,
+        arguments=arguments,
+        agent=agent,
+    )
+    assert command_result == "hi"
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 35c77a15c35..27af9373749 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -12,7 +12,7 @@
 from pytest_mock import MockerFixture
 
 import autogpt.commands.file_operations as file_ops
-from autogpt.config import Config
+from autogpt.agent.agent import Agent
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
 from autogpt.utils import readable_file_size
@@ -42,7 +42,7 @@ def mock_MemoryItem_from_text(mocker: MockerFixture, mock_embedding: Embedding):
 
 
 @pytest.fixture()
-def test_file_path(config, workspace: Workspace):
+def test_file_path(workspace: Workspace):
     return workspace.get_path("test_file.txt")
 
 
@@ -55,22 +55,22 @@ def test_file(test_file_path: Path):
 
 
 @pytest.fixture()
-def test_file_with_content_path(test_file: TextIOWrapper, file_content, config):
+def test_file_with_content_path(test_file: TextIOWrapper, file_content, agent: Agent):
     test_file.write(file_content)
     test_file.close()
     file_ops.log_operation(
-        "write", test_file.name, config, file_ops.text_checksum(file_content)
+        "write", test_file.name, agent, file_ops.text_checksum(file_content)
     )
     return Path(test_file.name)
 
 
 @pytest.fixture()
-def test_directory(config, workspace: Workspace):
+def test_directory(workspace: Workspace):
     return workspace.get_path("test_directory")
 
 
 @pytest.fixture()
-def test_nested_file(config, workspace: Workspace):
+def test_nested_file(workspace: Workspace):
     return workspace.get_path("nested/test_file.txt")
 
 
@@ -117,7 +117,7 @@ def test_file_operations_state(test_file: TextIOWrapper):
     assert file_ops.file_operations_state(test_file.name) == expected_state
 
 
-def test_is_duplicate_operation(config: Config, mocker: MockerFixture):
+def test_is_duplicate_operation(agent: Agent, mocker: MockerFixture):
     # Prepare a fake state dictionary for the function to use
     state = {
         "path/to/file1.txt": "checksum1",
@@ -128,42 +128,48 @@ def test_is_duplicate_operation(config: Config, mocker: MockerFixture):
     # Test cases with write operations
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", config, "checksum1"
+            "write", "path/to/file1.txt", agent.config, "checksum1"
         )
         is True
     )
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", config, "checksum2"
+            "write", "path/to/file1.txt", agent.config, "checksum2"
         )
         is False
     )
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file3.txt", config, "checksum3"
+            "write", "path/to/file3.txt", agent.config, "checksum3"
         )
         is False
     )
     # Test cases with append operations
     assert (
         file_ops.is_duplicate_operation(
-            "append", "path/to/file1.txt", config, "checksum1"
+            "append", "path/to/file1.txt", agent.config, "checksum1"
         )
         is False
     )
     # Test cases with delete operations
     assert (
-        file_ops.is_duplicate_operation("delete", "path/to/file1.txt", config) is False
+        file_ops.is_duplicate_operation(
+            "delete", "path/to/file1.txt", config=agent.config
+        )
+        is False
     )
     assert (
-        file_ops.is_duplicate_operation("delete", "path/to/file3.txt", config) is True
+        file_ops.is_duplicate_operation(
+            "delete", "path/to/file3.txt", config=agent.config
+        )
+        is True
     )
 
 
 # Test logging a file operation
-def test_log_operation(config: Config):
-    file_ops.log_operation("log_test", "path/to/test", config)
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+def test_log_operation(agent: Agent):
+    file_ops.log_operation("log_test", "path/to/test", agent=agent)
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         content = f.read()
     assert f"log_test: path/to/test\n" in content
 
@@ -175,104 +181,120 @@ def test_text_checksum(file_content: str):
     assert checksum != different_checksum
 
 
-def test_log_operation_with_checksum(config: Config):
-    file_ops.log_operation("log_test", "path/to/test", config, checksum="ABCDEF")
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+def test_log_operation_with_checksum(agent: Agent):
+    file_ops.log_operation("log_test", "path/to/test", agent=agent, checksum="ABCDEF")
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         content = f.read()
     assert f"log_test: path/to/test #ABCDEF\n" in content
 
 
-@pytest.mark.parametrize(
-    "max_length, overlap, content, expected",
-    [
-        (
-            4,
-            1,
-            "abcdefghij",
-            ["abcd", "defg", "ghij"],
-        ),
-        (
-            4,
-            0,
-            "abcdefghijkl",
-            ["abcd", "efgh", "ijkl"],
-        ),
-        (
-            4,
-            0,
-            "abcdefghijklm",
-            ["abcd", "efgh", "ijkl", "m"],
-        ),
-        (
-            4,
-            0,
-            "abcdefghijk",
-            ["abcd", "efgh", "ijk"],
-        ),
-    ],
-)
-# Test splitting a file into chunks
-def test_split_file(max_length, overlap, content, expected):
-    assert (
-        list(file_ops.split_file(content, max_length=max_length, overlap=overlap))
-        == expected
-    )
-
-
 def test_read_file(
     mock_MemoryItem_from_text,
     test_file_with_content_path: Path,
     file_content,
-    config: Config,
+    agent: Agent,
 ):
-    content = file_ops.read_file(test_file_with_content_path, config)
+    content = file_ops.read_file(test_file_with_content_path, agent=agent)
     assert content.replace("\r", "") == file_content
 
 
-def test_write_to_file(test_file_path: Path, config):
+def test_read_file_not_found(agent: Agent):
+    filename = "does_not_exist.txt"
+    content = file_ops.read_file(filename, agent=agent)
+    assert "Error:" in content and filename in content and "no such file" in content
+
+
+def test_write_to_file(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
-    file_ops.write_to_file(str(test_file_path), new_content, config)
+    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
     with open(test_file_path, "r", encoding="utf-8") as f:
         content = f.read()
     assert content == new_content
 
 
-def test_write_file_logs_checksum(test_file_path: Path, config):
+def test_write_file_logs_checksum(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
     new_checksum = file_ops.text_checksum(new_content)
-    file_ops.write_to_file(str(test_file_path), new_content, config)
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         log_entry = f.read()
     assert log_entry == f"write: {test_file_path} #{new_checksum}\n"
 
 
-def test_write_file_fails_if_content_exists(test_file_path: Path, config):
+def test_write_file_fails_if_content_exists(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
     file_ops.log_operation(
         "write",
         str(test_file_path),
-        config,
+        agent=agent,
         checksum=file_ops.text_checksum(new_content),
     )
-    result = file_ops.write_to_file(str(test_file_path), new_content, config)
+    result = file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
     assert result == "Error: File has already been updated."
 
 
 def test_write_file_succeeds_if_content_different(
-    test_file_with_content_path: Path, config
+    test_file_with_content_path: Path, agent: Agent
 ):
     new_content = "This is different content.\n"
     result = file_ops.write_to_file(
-        str(test_file_with_content_path), new_content, config
+        str(test_file_with_content_path), new_content, agent=agent
     )
     assert result == "File written to successfully."
 
 
-def test_append_to_file(test_nested_file: Path, config):
+# Update file testing
+def test_replace_in_file_all_occurrences(test_file, test_file_path, agent: Agent):
+    old_content = "This is a test file.\n we test file here\na test is needed"
+    expected_content = (
+        "This is a update file.\n we update file here\na update is needed"
+    )
+    test_file.write(old_content)
+    test_file.close()
+    file_ops.replace_in_file(test_file_path, "test", "update", agent=agent)
+    with open(test_file_path) as f:
+        new_content = f.read()
+    print(new_content)
+    print(expected_content)
+    assert new_content == expected_content
+
+
+def test_replace_in_file_one_occurrence(test_file, test_file_path, agent: Agent):
+    old_content = "This is a test file.\n we test file here\na test is needed"
+    expected_content = "This is a test file.\n we update file here\na test is needed"
+    test_file.write(old_content)
+    test_file.close()
+    file_ops.replace_in_file(
+        test_file_path, "test", "update", agent=agent, occurrence_index=1
+    )
+    with open(test_file_path) as f:
+        new_content = f.read()
+
+    assert new_content == expected_content
+
+
+def test_replace_in_file_multiline_old_text(test_file, test_file_path, agent: Agent):
+    old_content = "This is a multi_line\ntest for testing\nhow well this function\nworks when the input\nis multi-lined"
+    expected_content = "This is a multi_line\nfile. succeeded test\nis multi-lined"
+    test_file.write(old_content)
+    test_file.close()
+    file_ops.replace_in_file(
+        test_file_path,
+        "\ntest for testing\nhow well this function\nworks when the input\n",
+        "\nfile. succeeded test\n",
+        agent=agent,
+    )
+    with open(test_file_path) as f:
+        new_content = f.read()
+
+    assert new_content == expected_content
+
+
+def test_append_to_file(test_nested_file: Path, agent: Agent):
     append_text = "This is appended text.\n"
-    file_ops.write_to_file(test_nested_file, append_text, config)
+    file_ops.write_to_file(test_nested_file, append_text, agent=agent)
 
-    file_ops.append_to_file(test_nested_file, append_text, config)
+    file_ops.append_to_file(test_nested_file, append_text, agent=agent)
 
     with open(test_nested_file, "r") as f:
         content_after = f.read()
@@ -280,11 +302,13 @@ def test_append_to_file(test_nested_file: Path, config):
     assert content_after == append_text + append_text
 
 
-def test_append_to_file_uses_checksum_from_appended_file(test_file_path: Path, config):
+def test_append_to_file_uses_checksum_from_appended_file(
+    test_file_path: Path, agent: Agent
+):
     append_text = "This is appended text.\n"
-    file_ops.append_to_file(test_file_path, append_text, config)
-    file_ops.append_to_file(test_file_path, append_text, config)
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+    file_ops.append_to_file(test_file_path, append_text, agent=agent)
+    file_ops.append_to_file(test_file_path, append_text, agent=agent)
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         log_contents = f.read()
 
     digest = hashlib.md5()
@@ -298,25 +322,25 @@ def test_append_to_file_uses_checksum_from_appended_file(test_file_path: Path, c
     )
 
 
-def test_delete_file(test_file_with_content_path: Path, config):
-    result = file_ops.delete_file(str(test_file_with_content_path), config)
+def test_delete_file(test_file_with_content_path: Path, agent: Agent):
+    result = file_ops.delete_file(str(test_file_with_content_path), agent=agent)
     assert result == "File deleted successfully."
     assert os.path.exists(test_file_with_content_path) is False
 
 
-def test_delete_missing_file(config):
+def test_delete_missing_file(agent: Agent):
     filename = "path/to/file/which/does/not/exist"
     # confuse the log
-    file_ops.log_operation("write", filename, config, checksum="fake")
+    file_ops.log_operation("write", filename, agent=agent, checksum="fake")
     try:
         os.remove(filename)
     except FileNotFoundError as err:
-        assert str(err) in file_ops.delete_file(filename, config)
+        assert str(err) in file_ops.delete_file(filename, agent=agent)
         return
     assert False, f"Failed to test delete_file; {filename} not expected to exist"
 
 
-def test_list_files(workspace: Workspace, test_directory: Path, config):
+def test_list_files(workspace: Workspace, test_directory: Path, agent: Agent):
     # Case 1: Create files A and B, search for A, and ensure we don't return A and B
     file_a = workspace.get_path("file_a.txt")
     file_b = workspace.get_path("file_b.txt")
@@ -334,7 +358,7 @@ def test_list_files(workspace: Workspace, test_directory: Path, config):
     with open(os.path.join(test_directory, file_a.name), "w") as f:
         f.write("This is file A in the subdirectory.")
 
-    files = file_ops.list_files(str(workspace.root), config)
+    files = file_ops.list_files(str(workspace.root), agent=agent)
     assert file_a.name in files
     assert file_b.name in files
     assert os.path.join(Path(test_directory).name, file_a.name) in files
@@ -347,17 +371,17 @@ def test_list_files(workspace: Workspace, test_directory: Path, config):
 
     # Case 2: Search for a file that does not exist and make sure we don't throw
     non_existent_file = "non_existent_file.txt"
-    files = file_ops.list_files("", config)
+    files = file_ops.list_files("", agent=agent)
     assert non_existent_file not in files
 
 
-def test_download_file(workspace: Workspace, config):
+def test_download_file(workspace: Workspace, agent: Agent):
     url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.2.2.tar.gz"
     local_name = workspace.get_path("auto-gpt.tar.gz")
     size = 365023
     readable_size = readable_file_size(size)
     assert (
-        file_ops.download_file(url, local_name, config)
+        file_ops.download_file(url, local_name, agent=agent)
         == f'Successfully downloaded and locally stored file: "{local_name}"! (Size: {readable_size})'
     )
     assert os.path.isfile(local_name) is True
@@ -365,10 +389,10 @@ def test_download_file(workspace: Workspace, config):
 
     url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.0.0.tar.gz"
     assert "Got an HTTP Error whilst trying to download file" in file_ops.download_file(
-        url, local_name, config
+        url, local_name, agent=agent
     )
 
     url = "https://thiswebsiteiswrong.hmm/v0.0.0.tar.gz"
     assert "Failed to establish a new connection:" in file_ops.download_file(
-        url, local_name, config
+        url, local_name, agent=agent
     )
diff --git a/tests/unit/test_get_self_feedback.py b/tests/unit/test_get_self_feedback.py
index 64268898f4e..ba3e10fecc0 100644
--- a/tests/unit/test_get_self_feedback.py
+++ b/tests/unit/test_get_self_feedback.py
@@ -1,12 +1,15 @@
 from datetime import datetime
 
+from pytest_mock import MockerFixture
+
 from autogpt.agent.agent import Agent
 from autogpt.config import AIConfig
+from autogpt.config.config import Config
 from autogpt.llm.chat import create_chat_completion
 from autogpt.log_cycle.log_cycle import LogCycleHandler
 
 
-def test_get_self_feedback(mocker):
+def test_get_self_feedback(config: Config, mocker: MockerFixture):
     # Define a sample thoughts dictionary
     thoughts = {
         "reasoning": "Sample reasoning.",
@@ -32,7 +35,8 @@ def test_get_self_feedback(mocker):
     agent_mock = mocker.MagicMock(spec=Agent)
 
     # Mock the config attribute of the Agent instance
-    agent_mock.config = AIConfig()
+    agent_mock.config = config
+    agent_mock.ai_config = AIConfig()
 
     # Mock the log_cycle_handler attribute of the Agent instance
     agent_mock.log_cycle_handler = LogCycleHandler()
diff --git a/tests/integration/test_git_commands.py b/tests/unit/test_git_commands.py
similarity index 71%
rename from tests/integration/test_git_commands.py
rename to tests/unit/test_git_commands.py
index 375a9cf45ae..a6defdfc32f 100644
--- a/tests/integration/test_git_commands.py
+++ b/tests/unit/test_git_commands.py
@@ -2,6 +2,7 @@
 from git.exc import GitCommandError
 from git.repo.base import Repo
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.git_operations import clone_repository
 
 
@@ -10,7 +11,7 @@ def mock_clone_from(mocker):
     return mocker.patch.object(Repo, "clone_from")
 
 
-def test_clone_auto_gpt_repository(workspace, mock_clone_from, config):
+def test_clone_auto_gpt_repository(workspace, mock_clone_from, agent: Agent):
     mock_clone_from.return_value = None
 
     repo = "github.com/Significant-Gravitas/Auto-GPT.git"
@@ -20,16 +21,16 @@ def test_clone_auto_gpt_repository(workspace, mock_clone_from, config):
 
     expected_output = f"Cloned {url} to {clone_path}"
 
-    clone_result = clone_repository(url=url, clone_path=clone_path, config=config)
+    clone_result = clone_repository(url=url, clone_path=clone_path, agent=agent)
 
     assert clone_result == expected_output
     mock_clone_from.assert_called_once_with(
-        url=f"{scheme}{config.github_username}:{config.github_api_key}@{repo}",
+        url=f"{scheme}{agent.config.github_username}:{agent.config.github_api_key}@{repo}",
         to_path=clone_path,
     )
 
 
-def test_clone_repository_error(workspace, mock_clone_from, config):
+def test_clone_repository_error(workspace, mock_clone_from, agent: Agent):
     url = "https://github.com/this-repository/does-not-exist.git"
     clone_path = str(workspace.get_path("does-not-exist"))
 
@@ -37,6 +38,6 @@ def test_clone_repository_error(workspace, mock_clone_from, config):
         "clone", "fatal: repository not found", ""
     )
 
-    result = clone_repository(url=url, clone_path=clone_path, config=config)
+    result = clone_repository(url=url, clone_path=clone_path, agent=agent)
 
     assert "Error: " in result
diff --git a/tests/integration/test_google_search.py b/tests/unit/test_google_search.py
similarity index 87%
rename from tests/integration/test_google_search.py
rename to tests/unit/test_google_search.py
index e379f78e66f..3f039fdb4a4 100644
--- a/tests/integration/test_google_search.py
+++ b/tests/unit/test_google_search.py
@@ -3,6 +3,7 @@
 import pytest
 from googleapiclient.errors import HttpError
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.google_search import (
     google_official_search,
     google_search,
@@ -39,13 +40,13 @@ def test_safe_google_results_invalid_input():
     ],
 )
 def test_google_search(
-    query, num_results, expected_output, return_value, mocker, config
+    query, num_results, expected_output, return_value, mocker, agent: Agent
 ):
     mock_ddg = mocker.Mock()
     mock_ddg.return_value = return_value
 
     mocker.patch("autogpt.commands.google_search.DDGS.text", mock_ddg)
-    actual_output = google_search(query, config, num_results=num_results)
+    actual_output = google_search(query, agent=agent, num_results=num_results)
     expected_output = safe_google_results(expected_output)
     assert actual_output == expected_output
 
@@ -79,10 +80,15 @@ def mock_googleapiclient(mocker):
     ],
 )
 def test_google_official_search(
-    query, num_results, expected_output, search_results, mock_googleapiclient, config
+    query,
+    num_results,
+    expected_output,
+    search_results,
+    mock_googleapiclient,
+    agent: Agent,
 ):
     mock_googleapiclient.return_value = search_results
-    actual_output = google_official_search(query, config, num_results=num_results)
+    actual_output = google_official_search(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
 
 
@@ -113,7 +119,7 @@ def test_google_official_search_errors(
     mock_googleapiclient,
     http_code,
     error_msg,
-    config,
+    agent: Agent,
 ):
     class resp:
         def __init__(self, _status, _reason):
@@ -130,5 +136,5 @@ def __init__(self, _status, _reason):
     )
 
     mock_googleapiclient.side_effect = error
-    actual_output = google_official_search(query, config, num_results=num_results)
+    actual_output = google_official_search(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
diff --git a/tests/unit/test_json_parser.py b/tests/unit/test_json_parser.py
deleted file mode 100644
index be5f07339f6..00000000000
--- a/tests/unit/test_json_parser.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from unittest import TestCase
-
-from autogpt.json_utils.json_fix_llm import fix_and_parse_json
-
-
-class TestParseJson(TestCase):
-    def test_valid_json(self):
-        """Test that a valid JSON string is parsed correctly."""
-        json_str = '{"name": "John", "age": 30, "city": "New York"}'
-        obj = fix_and_parse_json(json_str)
-        self.assertEqual(obj, {"name": "John", "age": 30, "city": "New York"})
-
-    def test_invalid_json_minor(self):
-        """Test that an invalid JSON string can not be fixed without gpt"""
-        json_str = '{"name": "John", "age": 30, "city": "New York",}'
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
-
-    def test_invalid_json_major_with_gpt(self):
-        """Test that an invalid JSON string raises an error when try_to_fix_with_gpt is False"""
-        json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END'
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
-
-    def test_invalid_json_major_without_gpt(self):
-        """Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False"""
-        json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END'
-        # Assert that this raises an exception:
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
-
-    def test_invalid_json_leading_sentence_with_gpt(self):
-        """Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False"""
-        json_str = """I suggest we start by browsing the repository to find any issues that we can fix.
-
-{
-    "command": {
-        "name": "browse_website",
-        "args":{
-            "url": "https://github.com/Torantulino/Auto-GPT"
-        }
-    },
-    "thoughts":
-    {
-        "text": "I suggest we start browsing the repository to find any issues that we can fix.",
-        "reasoning": "Browsing the repository will give us an idea of the current state of the codebase and identify any issues that we can address to improve the repo.",
-        "plan": "- Look through the repository to find any issues.\n- Investigate any issues to determine what needs to be fixed\n- Identify possible solutions to fix the issues\n- Open Pull Requests with fixes",
-        "criticism": "I should be careful while browsing so as not to accidentally introduce any new bugs or issues.",
-        "speak": "I will start browsing the repository to find any issues we can fix."
-    }
-}"""
-        good_obj = {
-            "command": {
-                "name": "browse_website",
-                "args": {"url": "https://github.com/Torantulino/Auto-GPT"},
-            },
-            "thoughts": {
-                "text": "I suggest we start browsing the repository to find any issues that we can fix.",
-                "reasoning": "Browsing the repository will give us an idea of the current state of the codebase and identify any issues that we can address to improve the repo.",
-                "plan": "- Look through the repository to find any issues.\n- Investigate any issues to determine what needs to be fixed\n- Identify possible solutions to fix the issues\n- Open Pull Requests with fixes",
-                "criticism": "I should be careful while browsing so as not to accidentally introduce any new bugs or issues.",
-                "speak": "I will start browsing the repository to find any issues we can fix.",
-            },
-        }
-
-        # # Assert that this can be fixed with GPT
-        # self.assertEqual(fix_and_parse_json(json_str), good_obj)
-
-        # Assert that trying to fix this without GPT raises an exception
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
diff --git a/tests/unit/test_json_utils_llm.py b/tests/unit/test_json_utils_llm.py
deleted file mode 100644
index 93e01acb924..00000000000
--- a/tests/unit/test_json_utils_llm.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Generated by CodiumAI
-
-from autogpt.json_utils.json_fix_llm import (
-    fix_and_parse_json,
-    fix_json_using_multiple_techniques,
-)
-
-"""
-Code Analysis
-
-Objective:
-- The objective of the function is to fix a given JSON string to make it parseable and fully compliant with two techniques.
-
-Inputs:
-- The function takes in a string called 'assistant_reply', which is the JSON string to be fixed.
-
-Flow:
-- The function first calls the 'fix_and_parse_json' function to parse and print the Assistant response.
-- If the parsed JSON is an empty dictionary, the function calls the 'attempt_to_fix_json_by_finding_outermost_brackets' function to fix the JSON string.
-- If the parsed JSON is not an empty dictionary, the function returns the parsed JSON.
-- If the parsed JSON is an empty dictionary and cannot be fixed, the function logs an error and returns an empty dictionary.
-
-Outputs:
-- The main output of the function is a dictionary containing the fixed JSON string.
-
-Additional aspects:
-- The function uses two techniques to fix the JSON string: parsing and finding outermost brackets.
-- The function logs an error if the JSON string cannot be fixed and returns an empty dictionary.
-- The function uses the 'CFG' object to determine whether to speak the error message or not.
-"""
-
-
-class TestFixJsonUsingMultipleTechniques:
-    # Tests that the function successfully fixes and parses a JSON string that is already compliant with both techniques.
-    def test_fix_and_parse_json_happy_path(self):
-        # Happy path test case where the JSON string is already compliant with both techniques
-        json_string = '{"text": "Hello world", "confidence": 0.9}'
-        expected_output = {"text": "Hello world", "confidence": 0.9}
-        assert fix_json_using_multiple_techniques(json_string) == expected_output
-
-    # Tests that the function successfully fixes and parses a JSON string that contains only whitespace characters.
-    # @requires_api_key("OPEN_API_KEY")
-    def test_fix_and_parse_json_whitespace(self, mocker):
-        # Happy path test case where the JSON string contains only whitespace characters
-        json_string = "   \n\t   "
-
-        # mock try_ai_fix to avoid calling the AI model:
-        mocker.patch("autogpt.json_utils.json_fix_llm.try_ai_fix", return_value={})
-
-        expected_output = {}
-        assert fix_json_using_multiple_techniques(json_string) == expected_output
-
-    # Tests that the function successfully converts a string with arrays to an array
-    def test_fix_and_parse_json_array(self):
-        # Happy path test case where the JSON string contains an array of string
-        json_string = '[ "Add type hints", "Move docstrings", "Consider using" ]'
-        expected_output = ["Add type hints", "Move docstrings", "Consider using"]
-        assert fix_json_using_multiple_techniques(json_string) == expected_output
-
-    # Tests that the function returns an empty dictionary when the JSON string is not parseable and cannot be fixed using either technique.
-    # @requires_api_key("OPEN_API_KEY")
-    def test_fix_and_parse_json_can_not(self, mocker):
-        # Edge case test case where the JSON string is not parseable and cannot be fixed using either technique
-        json_string = "This is not a JSON string"
-
-        # mock try_ai_fix to avoid calling the AI model:
-        mocker.patch("autogpt.json_utils.json_fix_llm.try_ai_fix", return_value={})
-
-        expected_output = {}
-
-        # Use the actual function name in the test
-        result = fix_json_using_multiple_techniques(json_string)
-
-        assert result == expected_output
-
-    # Tests that the function returns an empty dictionary when the JSON string is empty.
-    # @requires_api_key("OPEN_API_KEY")
-    def test_fix_and_parse_json_empty_string(self, mocker):
-        # Arrange
-        json_string = ""
-
-        # Act
-        # mock try_ai_fix to avoid calling the AI model:
-        mocker.patch("autogpt.json_utils.json_fix_llm.try_ai_fix", return_value={})
-
-        result = fix_and_parse_json(json_string)
-
-        # Assert
-        assert result == {}
-
-    # Tests that the function successfully fixes and parses a JSON string that contains escape characters.
-    def test_fix_and_parse_json_escape_characters(self):
-        # Arrange
-        json_string = '{"text": "This is a \\"test\\" string."}'
-
-        # Act
-        result = fix_json_using_multiple_techniques(json_string)
-
-        # Assert
-        assert result == {"text": 'This is a "test" string.'}
-
-    # Tests that the function successfully fixes and parses a JSON string that contains nested objects or arrays.
-    def test_fix_and_parse_json_nested_objects(self):
-        # Arrange
-        json_string = '{"person": {"name": "John", "age": 30}, "hobbies": ["reading", "swimming"]}'
-
-        # Act
-        result = fix_json_using_multiple_techniques(json_string)
-
-        # Assert
-        assert result == {
-            "person": {"name": "John", "age": 30},
-            "hobbies": ["reading", "swimming"],
-        }
diff --git a/tests/unit/test_llm_utils.py b/tests/unit/test_llm_utils.py
deleted file mode 100644
index 8c4de115df7..00000000000
--- a/tests/unit/test_llm_utils.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from unittest.mock import patch
-
-import pytest
-from openai.error import APIError, RateLimitError
-
-from autogpt.llm import utils as llm_utils
-
-
-@pytest.fixture(params=[RateLimitError, APIError])
-def error(request):
-    if request.param == APIError:
-        return request.param("Error", http_status=502)
-    else:
-        return request.param("Error")
-
-
-def error_factory(error_instance, error_count, retry_count, warn_user=True):
-    class RaisesError:
-        def __init__(self):
-            self.count = 0
-
-        @llm_utils.retry_openai_api(
-            num_retries=retry_count, backoff_base=0.001, warn_user=warn_user
-        )
-        def __call__(self):
-            self.count += 1
-            if self.count <= error_count:
-                raise error_instance
-            return self.count
-
-    return RaisesError()
-
-
-def test_retry_open_api_no_error(capsys):
-    @llm_utils.retry_openai_api()
-    def f():
-        return 1
-
-    result = f()
-    assert result == 1
-
-    output = capsys.readouterr()
-    assert output.out == ""
-    assert output.err == ""
-
-
-@pytest.mark.parametrize(
-    "error_count, retry_count, failure",
-    [(2, 10, False), (2, 2, False), (10, 2, True), (3, 2, True), (1, 0, True)],
-    ids=["passing", "passing_edge", "failing", "failing_edge", "failing_no_retries"],
-)
-def test_retry_open_api_passing(capsys, error, error_count, retry_count, failure):
-    call_count = min(error_count, retry_count) + 1
-
-    raises = error_factory(error, error_count, retry_count)
-    if failure:
-        with pytest.raises(type(error)):
-            raises()
-    else:
-        result = raises()
-        assert result == call_count
-
-    assert raises.count == call_count
-
-    output = capsys.readouterr()
-
-    if error_count and retry_count:
-        if type(error) == RateLimitError:
-            assert "Reached rate limit, passing..." in output.out
-            assert "Please double check" in output.out
-        if type(error) == APIError:
-            assert "API Bad gateway" in output.out
-    else:
-        assert output.out == ""
-
-
-def test_retry_open_api_rate_limit_no_warn(capsys):
-    error_count = 2
-    retry_count = 10
-
-    raises = error_factory(RateLimitError, error_count, retry_count, warn_user=False)
-    result = raises()
-    call_count = min(error_count, retry_count) + 1
-    assert result == call_count
-    assert raises.count == call_count
-
-    output = capsys.readouterr()
-
-    assert "Reached rate limit, passing..." in output.out
-    assert "Please double check" not in output.out
-
-
-def test_retry_openapi_other_api_error(capsys):
-    error_count = 2
-    retry_count = 10
-
-    raises = error_factory(APIError("Error", http_status=500), error_count, retry_count)
-
-    with pytest.raises(APIError):
-        raises()
-    call_count = 1
-    assert raises.count == call_count
-
-    output = capsys.readouterr()
-    assert output.out == ""
-
-
-def test_check_model(api_manager):
-    """
-    Test if check_model() returns original model when valid.
-    Test if check_model() returns gpt-3.5-turbo when model is invalid.
-    """
-    with patch("openai.Model.list") as mock_list_models:
-        # Test when correct model is returned
-        mock_list_models.return_value = {"data": [{"id": "gpt-4"}]}
-        result = llm_utils.check_model("gpt-4", "smart_llm_model")
-        assert result == "gpt-4"
-
-        # Reset api manager models
-        api_manager.models = None
-
-        # Test when incorrect model is returned
-        mock_list_models.return_value = {"data": [{"id": "gpt-3.5-turbo"}]}
-        result = llm_utils.check_model("gpt-4", "fast_llm_model")
-        assert result == "gpt-3.5-turbo"
-
-        # Reset api manager models
-        api_manager.models = None
diff --git a/tests/test_logs.py b/tests/unit/test_logs.py
similarity index 100%
rename from tests/test_logs.py
rename to tests/unit/test_logs.py
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
new file mode 100644
index 00000000000..61a7a6f5d93
--- /dev/null
+++ b/tests/unit/test_make_agent.py
@@ -0,0 +1,25 @@
+from unittest.mock import MagicMock
+
+from pytest_mock import MockerFixture
+
+from autogpt.agent.agent import Agent
+from autogpt.app import list_agents, start_agent
+
+
+def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
+    """Test that an agent can be created"""
+    mock = mocker.patch("openai.ChatCompletion.create")
+
+    response = MagicMock()
+    response.choices[0].message.content = "Test message"
+    response.usage.prompt_tokens = 1
+    response.usage.completion_tokens = 1
+    del response.error
+
+    mock.return_value = response
+    start_agent("Test Agent", "chat", "Hello, how are you?", agent, "gpt-3.5-turbo")
+    agents = list_agents(agent)
+    assert "List of agents:\n0: chat" == agents
+    start_agent("Test Agent 2", "write", "Hello, how are you?", agent, "gpt-3.5-turbo")
+    agents = list_agents(agent.config)
+    assert "List of agents:\n0: chat\n1: write" == agents
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
new file mode 100644
index 00000000000..6fdf75e61da
--- /dev/null
+++ b/tests/unit/test_message_history.py
@@ -0,0 +1,145 @@
+import math
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+from autogpt.agent import Agent
+from autogpt.config import AIConfig
+from autogpt.config.config import Config
+from autogpt.llm.base import ChatSequence, Message
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.llm.utils import count_string_tokens
+from autogpt.memory.message_history import MessageHistory
+
+
+@pytest.fixture
+def agent(config: Config):
+    ai_name = "Test AI"
+    memory = MagicMock()
+    next_action_count = 0
+    command_registry = MagicMock()
+    ai_config = AIConfig(ai_name=ai_name)
+    system_prompt = "System prompt"
+    triggering_prompt = "Triggering prompt"
+    workspace_directory = "workspace_directory"
+
+    agent = Agent(
+        ai_name=ai_name,
+        memory=memory,
+        next_action_count=next_action_count,
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        system_prompt=system_prompt,
+        triggering_prompt=triggering_prompt,
+        workspace_directory=workspace_directory,
+    )
+    return agent
+
+
+def test_message_history_batch_summary(mocker, agent):
+    config = Config()
+    history = MessageHistory(agent)
+    model = config.fast_llm_model
+    message_tlength = 0
+    message_count = 0
+
+    # Setting the mock output and inputs
+    mock_summary_text = "I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings."
+    mock_summary = mocker.patch(
+        "autogpt.memory.message_history.create_chat_completion",
+        return_value=mock_summary_text,
+    )
+
+    system_prompt = 'You are AIJobSearcher, an AI designed to search for job openings for software engineer role\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. Find any job openings for software engineers online\n2. Go through each of the websites and job openings to summarize their requirements and URL, and skip that if you already visit the website\n\nIt takes money to let you run. Your API budget is $5.000\n\nConstraints:\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n3. No user assistance\n4. Exclusively use the commands listed in double quotes e.g. "command name"\n\nCommands:\n1. google_search: Google Search, args: "query": "<query>"\n2. browse_website: Browse Website, args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"\n3. task_complete: Task Complete (Shutdown), args: "reason": "<reason>"\n\nResources:\n1. Internet access for searches and information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n2. Constructively self-criticize your big-picture behavior constantly.\n3. Reflect on past decisions and strategies to refine your approach.\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou should only respond in JSON format as described below \nResponse Format: \n{\n    "thoughts": {\n        "text": "thought",\n        "reasoning": "reasoning",\n        "plan": "- short bulleted\\n- list that conveys\\n- long-term plan",\n        "criticism": "constructive self-criticism",\n        "speak": "thoughts summary to say to user"\n    },\n    "command": {\n        "name": "command name",\n        "args": {\n            "arg name": "value"\n        }\n    }\n} \nEnsure the response can be parsed by Python json.loads'
+    message_sequence = ChatSequence.for_model(
+        model,
+        [
+            Message("system", system_prompt),
+            Message("system", f"The current time and date is {time.strftime('%c')}"),
+        ],
+    )
+    insertion_index = len(message_sequence)
+
+    user_input = "Determine which next command to use, and respond using the format specified above:'"
+    user_input_msg = Message("user", user_input)
+    history.append(user_input_msg)
+
+    # mock a reponse from AI
+    assistant_reply = '{\n    "thoughts": {\n        "text": "I will use the \'google_search\' command to find more websites with job openings for software engineering manager role.",\n        "reasoning": "Since the previous website did not provide any relevant information, I will use the \'google_search\' command to find more websites with job openings for software engineer role.",\n        "plan": "- Use \'google_search\' command to find more websites with job openings for software engineer role",\n        "criticism": "I need to ensure that I am able to extract the relevant information from each website and job opening.",\n        "speak": "I will now use the \'google_search\' command to find more websites with job openings for software engineer role."\n    },\n    "command": {\n        "name": "google_search",\n        "args": {\n            "query": "software engineer job openings"\n        }\n    }\n}'
+    msg = Message("assistant", assistant_reply, "ai_response")
+    history.append(msg)
+    message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+    message_count += 1
+
+    # mock some websites returned from google search command in the past
+    result = "Command google_search returned: ["
+    for i in range(50):
+        result += "http://www.job" + str(i) + ".com,"
+    result += "]"
+    msg = Message("system", result, "action_result")
+    history.append(msg)
+    message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+    message_count += 1
+
+    user_input = "Determine which next command to use, and respond using the format specified above:'"
+    user_input_msg = Message("user", user_input)
+    history.append(user_input_msg)
+
+    # mock numbers of AI response and action results from browse_website commands in the past, doesn't need the thoughts part, as the summarization code discard them anyway
+    for i in range(50):
+        assistant_reply = (
+            '{\n    "command": {\n        "name": "browse_website",\n        "args": {\n            "url": "https://www.job'
+            + str(i)
+            + '.com",\n            "question": "software engineer"\n        }\n    }\n}'
+        )
+        msg = Message("assistant", assistant_reply, "ai_response")
+        history.append(msg)
+        message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+        message_count += 1
+
+        result = (
+            "Command browse_website returned: Answer gathered from website: The text in job"
+            + str(i)
+            + " does not provide information on specific job requirements or a job URL.]",
+        )
+        msg = Message("system", result, "action_result")
+        history.append(msg)
+        message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+        message_count += 1
+
+        user_input = "Determine which next command to use, and respond using the format specified above:'"
+        user_input_msg = Message("user", user_input)
+        history.append(user_input_msg)
+
+    # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
+    for cycle in reversed(list(history.per_cycle())):
+        messages_to_add = [msg for msg in cycle if msg is not None]
+        message_sequence.insert(insertion_index, *messages_to_add)
+        break
+
+    # count the expected token length of the trimmed message by reducing the token length of messages in the last cycle
+    for message in messages_to_add:
+        if message.role != "user":
+            message_tlength -= count_string_tokens(str(message), config.fast_llm_model)
+            message_count -= 1
+
+    # test the main trim_message function
+    new_summary_message, trimmed_messages = history.trim_messages(
+        current_message_chain=list(message_sequence),
+    )
+
+    expected_call_count = math.ceil(
+        message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm_model).max_tokens)
+    )
+    # Expecting 2 batches because of over max token
+    assert mock_summary.call_count == expected_call_count  # 2 at the time of writing
+    # Expecting 100 messages because 50 pairs of ai_response and action_result, based on the range set above
+    assert len(trimmed_messages) == message_count  # 100 at the time of writing
+    assert new_summary_message == Message(
+        role="system",
+        content="This reminds you of these events from your past: \n"
+        + mock_summary_text,
+        type=None,
+    )
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 6aa8dd47caa..80aa1b9dd39 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -1,73 +1,111 @@
-import pytest
+import os
 
-from autogpt.plugins import denylist_allowlist_check, inspect_zip_for_modules
+import yaml
+
+from autogpt.config.config import Config
+from autogpt.plugins import inspect_zip_for_modules, scan_plugins
+from autogpt.plugins.plugin_config import PluginConfig
 
 PLUGINS_TEST_DIR = "tests/unit/data/test_plugins"
 PLUGIN_TEST_ZIP_FILE = "Auto-GPT-Plugin-Test-master.zip"
 PLUGIN_TEST_INIT_PY = "Auto-GPT-Plugin-Test-master/src/auto_gpt_vicuna/__init__.py"
+PLUGIN_TEST_OPENAI = "https://weathergpt.vercel.app/"
 
 
-def test_inspect_zip_for_modules():
-    result = inspect_zip_for_modules(str(f"{PLUGINS_TEST_DIR}/{PLUGIN_TEST_ZIP_FILE}"))
-    assert result == [PLUGIN_TEST_INIT_PY]
-
-
-@pytest.fixture
-def mock_config_denylist_allowlist_check():
-    class MockConfig:
-        """Mock config object for testing the denylist_allowlist_check function"""
-
-        plugins_denylist = ["BadPlugin"]
-        plugins_allowlist = ["GoodPlugin"]
-        authorise_key = "y"
-        exit_key = "n"
+def test_scan_plugins_openai(config: Config):
+    config.plugins_openai = [PLUGIN_TEST_OPENAI]
+    plugins_config = config.plugins_config
+    plugins_config.plugins[PLUGIN_TEST_OPENAI] = PluginConfig(
+        name=PLUGIN_TEST_OPENAI, enabled=True
+    )
 
-    return MockConfig()
+    # Test that the function returns the correct number of plugins
+    result = scan_plugins(config, debug=True)
+    assert len(result) == 1
 
 
-def test_denylist_allowlist_check_denylist(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns False when the plugin is in the denylist
-    monkeypatch.setattr("builtins.input", lambda _: "y")
-    assert not denylist_allowlist_check(
-        "BadPlugin", mock_config_denylist_allowlist_check
+def test_scan_plugins_generic(config: Config):
+    # Test that the function returns the correct number of plugins
+    plugins_config = config.plugins_config
+    plugins_config.plugins["auto_gpt_guanaco"] = PluginConfig(
+        name="auto_gpt_guanaco", enabled=True
     )
+    plugins_config.plugins["AutoGPTPVicuna"] = PluginConfig(
+        name="AutoGPTPVicuna", enabled=True
+    )
+    result = scan_plugins(config, debug=True)
+    plugin_class_names = [plugin.__class__.__name__ for plugin in result]
 
-
-def test_denylist_allowlist_check_allowlist(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns True when the plugin is in the allowlist
-    monkeypatch.setattr("builtins.input", lambda _: "y")
-    assert denylist_allowlist_check("GoodPlugin", mock_config_denylist_allowlist_check)
+    assert len(result) == 2
+    assert "AutoGPTGuanaco" in plugin_class_names
+    assert "AutoGPTPVicuna" in plugin_class_names
 
 
-def test_denylist_allowlist_check_user_input_yes(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns True when the user inputs "y"
-    monkeypatch.setattr("builtins.input", lambda _: "y")
-    assert denylist_allowlist_check(
-        "UnknownPlugin", mock_config_denylist_allowlist_check
+def test_scan_plugins_not_enabled(config: Config):
+    # Test that the function returns the correct number of plugins
+    plugins_config = config.plugins_config
+    plugins_config.plugins["auto_gpt_guanaco"] = PluginConfig(
+        name="auto_gpt_guanaco", enabled=True
+    )
+    plugins_config.plugins["auto_gpt_vicuna"] = PluginConfig(
+        name="auto_gptp_vicuna", enabled=False
     )
+    result = scan_plugins(config, debug=True)
+    plugin_class_names = [plugin.__class__.__name__ for plugin in result]
 
+    assert len(result) == 1
+    assert "AutoGPTGuanaco" in plugin_class_names
+    assert "AutoGPTPVicuna" not in plugin_class_names
 
-def test_denylist_allowlist_check_user_input_no(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns False when the user inputs "n"
-    monkeypatch.setattr("builtins.input", lambda _: "n")
-    assert not denylist_allowlist_check(
-        "UnknownPlugin", mock_config_denylist_allowlist_check
-    )
+
+def test_inspect_zip_for_modules():
+    result = inspect_zip_for_modules(str(f"{PLUGINS_TEST_DIR}/{PLUGIN_TEST_ZIP_FILE}"))
+    assert result == [PLUGIN_TEST_INIT_PY]
 
 
-def test_denylist_allowlist_check_user_input_invalid(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns False when the user inputs an invalid value
-    monkeypatch.setattr("builtins.input", lambda _: "invalid")
-    assert not denylist_allowlist_check(
-        "UnknownPlugin", mock_config_denylist_allowlist_check
-    )
+def test_create_base_config(config: Config):
+    """Test the backwards-compatibility shim to convert old plugin allow/deny list to a config file"""
+    config.plugins_allowlist = ["a", "b"]
+    config.plugins_denylist = ["c", "d"]
+
+    os.remove(config.plugins_config_file)
+    plugins_config = config.load_plugins_config()
+
+    # Check the structure of the plugins config data
+    assert len(plugins_config.plugins) == 4
+    assert plugins_config.get("a").enabled
+    assert plugins_config.get("b").enabled
+    assert not plugins_config.get("c").enabled
+    assert not plugins_config.get("d").enabled
+
+    # Check the saved config file
+    with open(config.plugins_config_file, "r") as saved_config_file:
+        saved_config = yaml.load(saved_config_file, Loader=yaml.FullLoader)
+
+    assert saved_config == {
+        "a": {"enabled": True, "config": {}},
+        "b": {"enabled": True, "config": {}},
+        "c": {"enabled": False, "config": {}},
+        "d": {"enabled": False, "config": {}},
+    }
+
+
+def test_load_config(config: Config):
+    """Test that the plugin config is loaded correctly from the plugins_config.yaml file"""
+    # Create a test config and write it to disk
+    test_config = {
+        "a": {"enabled": True, "config": {"api_key": "1234"}},
+        "b": {"enabled": False, "config": {}},
+    }
+    with open(config.plugins_config_file, "w+") as f:
+        f.write(yaml.dump(test_config))
+
+    # Load the config from disk
+    plugins_config = config.load_plugins_config()
+
+    # Check that the loaded config is equal to the test config
+    assert len(plugins_config.plugins) == 2
+    assert plugins_config.get("a").enabled
+    assert plugins_config.get("a").config == {"api_key": "1234"}
+    assert not plugins_config.get("b").enabled
+    assert plugins_config.get("b").config == {}
diff --git a/tests/test_prompt_config.py b/tests/unit/test_prompt_config.py
similarity index 100%
rename from tests/test_prompt_config.py
rename to tests/unit/test_prompt_config.py
diff --git a/tests/test_prompt_generator.py b/tests/unit/test_prompt_generator.py
similarity index 100%
rename from tests/test_prompt_generator.py
rename to tests/unit/test_prompt_generator.py
diff --git a/tests/test_text_file_parsers.py b/tests/unit/test_text_file_parsers.py
similarity index 100%
rename from tests/test_text_file_parsers.py
rename to tests/unit/test_text_file_parsers.py
diff --git a/tests/unit/test_url_validation.py b/tests/unit/test_url_validation.py
index 16eb8cd5008..5d6e8124e91 100644
--- a/tests/unit/test_url_validation.py
+++ b/tests/unit/test_url_validation.py
@@ -49,25 +49,17 @@ def test_url_validation_succeeds(url):
     assert dummy_method(url) == url
 
 
-bad_protocol_data = (
-    ("htt://example.com"),
-    ("httppp://example.com"),
-    (" https://example.com"),
+@pytest.mark.parametrize(
+    "url,expected_error",
+    [
+        ("htt://example.com", "Invalid URL format"),
+        ("httppp://example.com", "Invalid URL format"),
+        (" https://example.com", "Invalid URL format"),
+        ("http://?query=q", "Missing Scheme or Network location"),
+    ],
 )
-
-
-@pytest.mark.parametrize("url", bad_protocol_data)
-def test_url_validation_fails_bad_protocol(url):
-    with raises(ValueError, match="Invalid URL format"):
-        dummy_method(url)
-
-
-missing_loc = (("http://?query=q"),)
-
-
-@pytest.mark.parametrize("url", missing_loc)
-def test_url_validation_fails_bad_protocol(url):
-    with raises(ValueError, match="Missing Scheme or Network location"):
+def test_url_validation_fails_invalid_url(url, expected_error):
+    with raises(ValueError, match=expected_error):
         dummy_method(url)
 
 
diff --git a/tests/test_utils.py b/tests/unit/test_utils.py
similarity index 66%
rename from tests/test_utils.py
rename to tests/unit/test_utils.py
index c0ce28cc1fe..f9a471c25ad 100644
--- a/tests/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -4,6 +4,7 @@
 import pytest
 import requests
 
+from autogpt.json_utils.utilities import extract_json_from_response, validate_json
 from autogpt.utils import (
     get_bulletin_from_web,
     get_current_git_branch,
@@ -14,6 +15,37 @@
 from tests.utils import skip_in_ci
 
 
+@pytest.fixture
+def valid_json_response() -> dict:
+    return {
+        "thoughts": {
+            "text": "My task is complete. I will use the 'task_complete' command to shut down.",
+            "reasoning": "I will use the 'task_complete' command because it allows me to shut down and signal that my task is complete.",
+            "plan": "I will use the 'task_complete' command with the reason 'Task complete: retrieved Tesla's revenue in 2022.' to shut down.",
+            "criticism": "I need to ensure that I have completed all necessary tasks before shutting down.",
+            "speak": "",
+        },
+        "command": {
+            "name": "task_complete",
+            "args": {"reason": "Task complete: retrieved Tesla's revenue in 2022."},
+        },
+    }
+
+
+@pytest.fixture
+def invalid_json_response() -> dict:
+    return {
+        "thoughts": {
+            "text": "My task is complete. I will use the 'task_complete' command to shut down.",
+            "reasoning": "I will use the 'task_complete' command because it allows me to shut down and signal that my task is complete.",
+            "plan": "I will use the 'task_complete' command with the reason 'Task complete: retrieved Tesla's revenue in 2022.' to shut down.",
+            "criticism": "I need to ensure that I have completed all necessary tasks before shutting down.",
+            "speak": "",
+        },
+        "command": {"name": "", "args": {}},
+    }
+
+
 def test_validate_yaml_file_valid():
     with open("valid_test_file.yaml", "w") as f:
         f.write("setting: value")
@@ -153,5 +185,23 @@ def test_get_current_git_branch_failure(mock_repo):
     assert branch_name == ""
 
 
-if __name__ == "__main__":
-    pytest.main()
+def test_validate_json_valid(valid_json_response):
+    assert validate_json(valid_json_response)
+
+
+def test_validate_json_invalid(invalid_json_response):
+    assert not validate_json(valid_json_response)
+
+
+def test_extract_json_from_response(valid_json_response: dict):
+    emulated_response_from_openai = str(valid_json_response)
+    assert (
+        extract_json_from_response(emulated_response_from_openai) == valid_json_response
+    )
+
+
+def test_extract_json_from_response_wrapped_in_code_block(valid_json_response: dict):
+    emulated_response_from_openai = "```" + str(valid_json_response) + "```"
+    assert (
+        extract_json_from_response(emulated_response_from_openai) == valid_json_response
+    )
diff --git a/tests/test_workspace.py b/tests/unit/test_workspace.py
similarity index 100%
rename from tests/test_workspace.py
rename to tests/unit/test_workspace.py
diff --git a/tests/vcr/__init__.py b/tests/vcr/__init__.py
index e69de29bb2d..04ce79fcb1b 100644
--- a/tests/vcr/__init__.py
+++ b/tests/vcr/__init__.py
@@ -0,0 +1,71 @@
+import os
+
+import openai.api_requestor
+import pytest
+from pytest_mock import MockerFixture
+
+from .vcr_filter import PROXY, before_record_request, before_record_response
+
+DEFAULT_RECORD_MODE = "new_episodes"
+BASE_VCR_CONFIG = {
+    "before_record_request": before_record_request,
+    "before_record_response": before_record_response,
+    "filter_headers": [
+        "Authorization",
+        "X-OpenAI-Client-User-Agent",
+        "User-Agent",
+    ],
+    "match_on": ["method", "body"],
+}
+
+
+@pytest.fixture(scope="session")
+def vcr_config(get_base_vcr_config):
+    return get_base_vcr_config
+
+
+@pytest.fixture(scope="session")
+def get_base_vcr_config(request):
+    record_mode = request.config.getoption("--record-mode", default="new_episodes")
+    config = BASE_VCR_CONFIG
+
+    if record_mode is None:
+        config["record_mode"] = DEFAULT_RECORD_MODE
+
+    return config
+
+
+@pytest.fixture()
+def vcr_cassette_dir(request):
+    test_name = os.path.splitext(request.node.name)[0]
+    return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
+
+
+def patch_api_base(requestor):
+    new_api_base = f"{PROXY}/v1"
+    requestor.api_base = new_api_base
+    return requestor
+
+
+@pytest.fixture
+def patched_api_requestor(mocker: MockerFixture):
+    original_init = openai.api_requestor.APIRequestor.__init__
+    original_validate_headers = openai.api_requestor.APIRequestor._validate_headers
+
+    def patched_init(requestor, *args, **kwargs):
+        original_init(requestor, *args, **kwargs)
+        patch_api_base(requestor)
+
+    def patched_validate_headers(self, supplied_headers):
+        headers = original_validate_headers(self, supplied_headers)
+        headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
+        headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
+        return headers
+
+    if PROXY:
+        mocker.patch("openai.api_requestor.APIRequestor.__init__", new=patched_init)
+        mocker.patch.object(
+            openai.api_requestor.APIRequestor,
+            "_validate_headers",
+            new=patched_validate_headers,
+        )
diff --git a/tests/vcr/vcr_filter.py b/tests/vcr/vcr_filter.py
index 4cc49fd3019..1ba433a76ec 100644
--- a/tests/vcr/vcr_filter.py
+++ b/tests/vcr/vcr_filter.py
@@ -1,8 +1,9 @@
 import json
+import os
 import re
 from typing import Any, Dict, List
 
-from tests.conftest import PROXY
+PROXY = os.environ.get("PROXY")
 
 REPLACEMENTS: List[Dict[str, str]] = [
     {