Skip to content

Commit

Permalink
TokenEncoder
Browse files Browse the repository at this point in the history
  • Loading branch information
mrT23 committed Apr 3, 2024
1 parent a13c6e9 commit 9c36732
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
23 changes: 18 additions & 5 deletions pr_agent/algo/token_handler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
from jinja2 import Environment, StrictUndefined
from tiktoken import encoding_for_model, get_encoding

from pr_agent.config_loader import get_settings
from threading import Lock


class TokenEncoder:
_encoder_instance = None
_model = None
_lock = Lock() # Create a lock object

@classmethod
def get_token_encoder(cls):
model = get_settings().config.model
if cls._encoder_instance is None or model != cls._model: # Check without acquiring the lock for performance
with cls._lock: # Lock acquisition to ensure thread safety
if cls._encoder_instance is None or model != cls._model:
cls._model = model
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
"cl100k_base")
return cls._encoder_instance

def get_token_encoder():
return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding(
"cl100k_base")

class TokenHandler:
"""
Expand All @@ -31,7 +44,7 @@ def __init__(self, pr=None, vars: dict = {}, system="", user=""):
- system: The system string.
- user: The user string.
"""
self.encoder = get_token_encoder()
self.encoder = TokenEncoder.get_token_encoder()
if pr is not None:
self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)

Expand Down
4 changes: 2 additions & 2 deletions pr_agent/algo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from starlette_context import context

from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.token_handler import get_token_encoder
from pr_agent.algo.token_handler import TokenEncoder
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.algo.types import FilePatchInfo
from pr_agent.log import get_logger
Expand Down Expand Up @@ -566,7 +566,7 @@ def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
return text

try:
encoder = get_token_encoder()
encoder = TokenEncoder.get_token_encoder()
num_input_tokens = len(encoder.encode(text))
if num_input_tokens <= max_tokens:
return text
Expand Down

0 comments on commit 9c36732

Please sign in to comment.