Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the maximum context length issue by chunking #3222

Merged
merged 20 commits into from
May 1, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions autogpt/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
self.embedding_token_limit = int(os.getenv("EMBEDDING_TOKEN_LIMIT", 8191))
self.embedding_encoding = os.getenv("EMBEDDING_ENCODING", "cl100k_base")
self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 3000))
self.browse_spacy_language_model = os.getenv(
"BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
Expand Down Expand Up @@ -214,6 +217,18 @@
"""Set the smart token limit value."""
self.smart_token_limit = value

def set_embedding_model(self, value: str) -> None:
"""Set the embedding model value."""
self.embedding_model = value

Check warning on line 222 in autogpt/config/config.py

View check run for this annotation

Codecov / codecov/patch

autogpt/config/config.py#L222

Added line #L222 was not covered by tests

def set_embedding_token_limit(self, value: int) -> None:
"""Set the embedding token limit value."""
self.embedding_token_limit = value

Check warning on line 226 in autogpt/config/config.py

View check run for this annotation

Codecov / codecov/patch

autogpt/config/config.py#L226

Added line #L226 was not covered by tests

def set_embedding_encoding(self, value: str) -> None:
"""Set the embedding encoding value."""
self.embedding_encoding = value

Check warning on line 230 in autogpt/config/config.py

View check run for this annotation

Codecov / codecov/patch

autogpt/config/config.py#L230

Added line #L230 was not covered by tests

def set_browse_chunk_max_length(self, value: int) -> None:
"""Set the browse_website command chunk max length value."""
self.browse_chunk_max_length = value
Expand Down
63 changes: 50 additions & 13 deletions autogpt/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

import functools
import time
from itertools import islice
from typing import List, Optional

import numpy as np
import openai
import tiktoken
from colorama import Fore, Style
from openai.error import APIError, RateLimitError, Timeout

Expand Down Expand Up @@ -210,6 +213,23 @@
return resp


def batched(iterable, n):
"""Batch data into tuples of length n. The last batch may be shorter."""
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError("n must be at least one")

Check warning on line 220 in autogpt/llm_utils.py

View check run for this annotation

Codecov / codecov/patch

autogpt/llm_utils.py#L220

Added line #L220 was not covered by tests
it = iter(iterable)
while batch := tuple(islice(it, n)):
yield batch


def chunked_tokens(text, encoding_name, chunk_length):
encoding = tiktoken.get_encoding(encoding_name)
tokens = encoding.encode(text)
chunks_iterator = batched(tokens, chunk_length)
yield from chunks_iterator


def get_ada_embedding(text: str) -> List[float]:
"""Get an embedding from the ada model.

Expand All @@ -220,7 +240,7 @@
List[float]: The embedding.
"""
cfg = Config()
model = "text-embedding-ada-002"
model = cfg.embedding_model
text = text.replace("\n", " ")

if cfg.use_azure:
Expand All @@ -229,13 +249,7 @@
kwargs = {"model": model}

embedding = create_embedding(text, **kwargs)
api_manager = ApiManager()
api_manager.update_cost(
prompt_tokens=embedding.usage.prompt_tokens,
completion_tokens=0,
model=model,
)
return embedding["data"][0]["embedding"]
return embedding


@retry_openai_api()
Expand All @@ -254,8 +268,31 @@
openai.Embedding: The embedding object.
"""
cfg = Config()
return openai.Embedding.create(
input=[text],
api_key=cfg.openai_api_key,
**kwargs,
)
chunk_embeddings = []
chunk_lens = []
for chunk in chunked_tokens(
text,
encoding_name=cfg.embedding_encoding,
chunk_length=cfg.embedding_token_limit,
):
embedding = openai.Embedding.create(
input=[chunk],
api_key=cfg.openai_api_key,
**kwargs,
)
api_manager = ApiManager()
api_manager.update_cost(
prompt_tokens=embedding.usage.prompt_tokens,
completion_tokens=0,
model=cfg.embedding_model,
)
chunk_embeddings.append(embedding["data"][0]["embedding"])
chunk_lens.append(len(chunk))

# do weighted avg
chunk_embeddings = np.average(chunk_embeddings, axis=0, weights=chunk_lens)
chunk_embeddings = chunk_embeddings / np.linalg.norm(
chunk_embeddings
) # normalize the length to one
chunk_embeddings = chunk_embeddings.tolist()
return chunk_embeddings
40 changes: 31 additions & 9 deletions tests/test_llm_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from openai.error import APIError, RateLimitError

from autogpt.llm_utils import get_ada_embedding, retry_openai_api
from autogpt.llm_utils import chunked_tokens, get_ada_embedding, retry_openai_api
from autogpt.modelsinfo import COSTS


Expand All @@ -15,11 +15,8 @@ def error(request):

@pytest.fixture
def mock_create_embedding(mocker):
mock_response = mocker.MagicMock()
mock_response.usage.prompt_tokens = 5
mock_response.__getitem__.side_effect = lambda key: [{"embedding": [0.1, 0.2, 0.3]}]
return mocker.patch(
"autogpt.llm_utils.create_embedding", return_value=mock_response
"autogpt.llm_utils.create_embedding", return_value=[0.1, 0.2, 0.3]
)


Expand Down Expand Up @@ -123,7 +120,32 @@ def test_get_ada_embedding(mock_create_embedding, api_manager):

assert embedding == [0.1, 0.2, 0.3]

cost = COSTS[model]["prompt"]
assert api_manager.get_total_prompt_tokens() == 5
assert api_manager.get_total_completion_tokens() == 0
assert api_manager.get_total_cost() == (5 * cost) / 1000

def test_chunked_tokens():
text = "Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model"
expected_output = [
(
13556,
12279,
2898,
374,
459,
22772,
1825,
31874,
3851,
67908,
279,
17357,
315,
279,
480,
2898,
12,
19,
4221,
1646,
)
]
output = list(chunked_tokens(text, "cl100k_base", 8191))
assert output == expected_output
Loading