Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the maximum context length issue by chunking #3222

Merged
merged 20 commits into from
May 1, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ OPENAI_API_KEY=your-openai-api-key
# FAST_TOKEN_LIMIT=4000
# SMART_TOKEN_LIMIT=8000

### EMBEDDINGS
## EMBEDDING_MODEL - Model to use for creating embeddings
## EMBEDDING_TOKENIZER - Tokenizer to use for chunking large inputs
## EMBEDDING_TOKEN_LIMIT - Chunk size limit for large inputs
# EMBEDDING_MODEL=text-embedding-ada-002
# EMBEDDING_TOKENIZER=cl100k_base
# EMBEDDING_TOKEN_LIMIT=8191

################################################################################
### MEMORY
################################################################################
Expand Down
15 changes: 15 additions & 0 deletions autogpt/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
self.embedding_tokenizer = os.getenv("EMBEDDING_TOKENIZER", "cl100k_base")
self.embedding_token_limit = int(os.getenv("EMBEDDING_TOKEN_LIMIT", 8191))
self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 3000))
self.browse_spacy_language_model = os.getenv(
"BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
Expand Down Expand Up @@ -216,6 +219,18 @@
"""Set the smart token limit value."""
self.smart_token_limit = value

def set_embedding_model(self, value: str) -> None:
"""Set the model to use for creating embeddings."""
self.embedding_model = value

Check warning on line 224 in autogpt/config/config.py

View check run for this annotation

Codecov / codecov/patch

autogpt/config/config.py#L224

Added line #L224 was not covered by tests

def set_embedding_tokenizer(self, value: str) -> None:
"""Set the tokenizer to use when creating embeddings."""
self.embedding_tokenizer = value

Check warning on line 228 in autogpt/config/config.py

View check run for this annotation

Codecov / codecov/patch

autogpt/config/config.py#L228

Added line #L228 was not covered by tests

def set_embedding_token_limit(self, value: int) -> None:
"""Set the token limit for creating embeddings."""
self.embedding_token_limit = value

Check warning on line 232 in autogpt/config/config.py

View check run for this annotation

Codecov / codecov/patch

autogpt/config/config.py#L232

Added line #L232 was not covered by tests

def set_browse_chunk_max_length(self, value: int) -> None:
"""Set the browse_website command chunk max length value."""
self.browse_chunk_max_length = value
Expand Down
3 changes: 2 additions & 1 deletion autogpt/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from autogpt.llm.chat import chat_with_ai, create_chat_message, generate_context
from autogpt.llm.llm_utils import (
call_ai_function,
chunked_tokens,
create_chat_completion,
get_ada_embedding,
)
Expand All @@ -32,7 +33,7 @@
"call_ai_function",
"create_chat_completion",
"get_ada_embedding",
"COSTS",
"chunked_tokens," "COSTS",
Pwuts marked this conversation as resolved.
Show resolved Hide resolved
Pwuts marked this conversation as resolved.
Show resolved Hide resolved
"count_message_tokens",
"count_string_tokens",
]
63 changes: 50 additions & 13 deletions autogpt/llm/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

import functools
import time
from itertools import islice
from typing import List, Optional

import numpy as np
import openai
import tiktoken
from colorama import Fore, Style
from openai.error import APIError, RateLimitError, Timeout

Expand Down Expand Up @@ -207,6 +210,23 @@
return resp


def batched(iterable, n):
"""Batch data into tuples of length n. The last batch may be shorter."""
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError("n must be at least one")

Check warning on line 217 in autogpt/llm/llm_utils.py

View check run for this annotation

Codecov / codecov/patch

autogpt/llm/llm_utils.py#L217

Added line #L217 was not covered by tests
it = iter(iterable)
while batch := tuple(islice(it, n)):
yield batch


def chunked_tokens(text, tokenizer_name, chunk_length):
tokenizer = tiktoken.get_encoding(tokenizer_name)
tokens = tokenizer.encode(text)
chunks_iterator = batched(tokens, chunk_length)
yield from chunks_iterator


def get_ada_embedding(text: str) -> List[float]:
"""Get an embedding from the ada model.

Expand All @@ -217,7 +237,7 @@
List[float]: The embedding.
"""
cfg = Config()
model = "text-embedding-ada-002"
model = cfg.embedding_model
text = text.replace("\n", " ")

if cfg.use_azure:
Expand All @@ -226,13 +246,7 @@
kwargs = {"model": model}

embedding = create_embedding(text, **kwargs)
api_manager = ApiManager()
api_manager.update_cost(
prompt_tokens=embedding.usage.prompt_tokens,
completion_tokens=0,
model=model,
)
return embedding["data"][0]["embedding"]
return embedding


@retry_openai_api()
Expand All @@ -251,8 +265,31 @@
openai.Embedding: The embedding object.
"""
cfg = Config()
return openai.Embedding.create(
input=[text],
api_key=cfg.openai_api_key,
**kwargs,
)
chunk_embeddings = []
chunk_lengths = []
for chunk in chunked_tokens(
text,
tokenizer_name=cfg.embedding_tokenizer,
chunk_length=cfg.embedding_token_limit,
):
embedding = openai.Embedding.create(
input=[chunk],
api_key=cfg.openai_api_key,
**kwargs,
)
api_manager = ApiManager()
api_manager.update_cost(
prompt_tokens=embedding.usage.prompt_tokens,
completion_tokens=0,
model=cfg.embedding_model,
)
chunk_embeddings.append(embedding["data"][0]["embedding"])
chunk_lengths.append(len(chunk))

# do weighted avg
chunk_embeddings = np.average(chunk_embeddings, axis=0, weights=chunk_lengths)
chunk_embeddings = chunk_embeddings / np.linalg.norm(
chunk_embeddings
) # normalize the length to one
chunk_embeddings = chunk_embeddings.tolist()
return chunk_embeddings
40 changes: 31 additions & 9 deletions tests/test_llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from openai.error import APIError, RateLimitError

from autogpt.llm import COSTS, get_ada_embedding
from autogpt.llm.llm_utils import retry_openai_api
from autogpt.llm.llm_utils import chunked_tokens, retry_openai_api


@pytest.fixture(params=[RateLimitError, APIError])
Expand All @@ -15,11 +15,8 @@ def error(request):

@pytest.fixture
def mock_create_embedding(mocker):
mock_response = mocker.MagicMock()
mock_response.usage.prompt_tokens = 5
mock_response.__getitem__.side_effect = lambda key: [{"embedding": [0.1, 0.2, 0.3]}]
return mocker.patch(
"autogpt.llm.llm_utils.create_embedding", return_value=mock_response
"autogpt.llm.llm_utils.create_embedding", return_value=[0.1, 0.2, 0.3]
)


Expand Down Expand Up @@ -123,7 +120,32 @@ def test_get_ada_embedding(mock_create_embedding, api_manager):

assert embedding == [0.1, 0.2, 0.3]

cost = COSTS[model]["prompt"]
assert api_manager.get_total_prompt_tokens() == 5
assert api_manager.get_total_completion_tokens() == 0
assert api_manager.get_total_cost() == (5 * cost) / 1000

def test_chunked_tokens():
text = "Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model"
expected_output = [
(
13556,
12279,
2898,
374,
459,
22772,
1825,
31874,
3851,
67908,
279,
17357,
315,
279,
480,
2898,
12,
19,
4221,
1646,
)
]
output = list(chunked_tokens(text, "cl100k_base", 8191))
assert output == expected_output
Loading