Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 8 additions & 30 deletions kaizen/llms/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,19 @@

class LLMProvider:
DEFAULT_MODEL = "gpt-3.5-turbo-1106"
DEFAULT_MAX_TOKENS = 2000
DEFAULT_INPUT_TOKEN_COST = 0.0000005
DEFAULT_OUTPUT_TOKEN_COST = 0.0000015
DEFAULT_MAX_TOKENS = 4000
DEFAULT_TEMPERATURE = 0
DEFAULT_MODEL_CONFIG = {"model": DEFAULT_MODEL}

def __init__(
self,
system_prompt=BASIC_SYSTEM_PROMPT,
model=DEFAULT_MODEL,
max_tokens=DEFAULT_MAX_TOKENS,
temperature=DEFAULT_TEMPERATURE,
input_token_cost=DEFAULT_INPUT_TOKEN_COST,
output_token_cost=DEFAULT_OUTPUT_TOKEN_COST,
self, system_prompt=BASIC_SYSTEM_PROMPT, model_config=DEFAULT_MODEL_CONFIG
):
self.config = ConfigData().get_config_data()
self.system_prompt = system_prompt
self.model = model
self.input_token_cost = input_token_cost
self.output_token_cost = output_token_cost
self.max_tokens = max_tokens
self.temperature = temperature
self.model_config = model_config
if "default_model_config" in self.config.get("language_model", {}):
self.model_config = self.config["language_model"]["default_model_config"]
self.model = self.model_config["model"]
if self.config.get("language_model", {}).get(
"enable_observability_logging", False
):
Expand All @@ -38,23 +30,9 @@ def chat_completion(self, prompt, user: str = None):
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": prompt},
]
if "model" in self.config.get("language_model", {}):
self.model = self.config["language_model"]["model"]["name"]
self.input_token_cost = self.config["language_model"]["model"][
"input_token_cost"
]
self.output_token_cost = self.config["language_model"]["model"][
"output_token_cost"
]

response = litellm.completion(
model=self.model,
messages=messages,
max_tokens=self.max_tokens,
temperature=self.temperature,
user=user,
input_cost_per_token=self.input_token_cost,
output_cost_per_token=self.output_token_cost,
messages=messages, user=user, **self.model_config
)
return response["choices"][0]["message"]["content"], response["usage"]

Expand Down
60 changes: 60 additions & 0 deletions tests/llms/test_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import pytest
from unittest.mock import patch, Mock
from kaizen.llms.provider import LLMProvider


@pytest.fixture
def mock_config_data():
with patch("kaizen.utils.config.ConfigData") as MockConfigData:
mock_config = MockConfigData.return_value
mock_config.get_config_data.return_value = {
"language_model": {
"default_model_config": {"model": "gpt-3.5-turbo-1106"},
"enable_observability_logging": True,
}
}
yield mock_config


@pytest.fixture
def llm_provider(mock_config_data):
return LLMProvider()


def test_initialization(llm_provider):
assert llm_provider.model == "gpt-3.5-turbo-1106"
assert llm_provider.model_config == {"model": "gpt-3.5-turbo-1106"}


@patch("kaizen.llms.provider.litellm.completion")
def test_chat_completion(mock_completion, llm_provider):
mock_completion.return_value = {
"choices": [{"message": {"content": "response"}}],
"usage": {"prompt_tokens": 10, "completion_tokens": 10},
}
response, usage = llm_provider.chat_completion("test prompt")
assert response != None


@patch("kaizen.llms.provider.litellm.token_counter")
@patch("kaizen.llms.provider.litellm.get_max_tokens")
def test_is_inside_token_limit(mock_get_max_tokens, mock_token_counter, llm_provider):
mock_token_counter.return_value = 100
mock_get_max_tokens.return_value = 150
assert llm_provider.is_inside_token_limit("test prompt") is True
mock_token_counter.return_value = 120
assert llm_provider.is_inside_token_limit("test prompt") is False


@patch("kaizen.llms.provider.litellm.token_counter")
@patch("kaizen.llms.provider.litellm.get_max_tokens")
def test_available_tokens(mock_get_max_tokens, mock_token_counter, llm_provider):
mock_token_counter.return_value = 100
mock_get_max_tokens.return_value = 150
assert llm_provider.available_tokens("test message") == 20


@patch("kaizen.llms.provider.litellm.token_counter")
def test_get_token_count(mock_token_counter, llm_provider):
mock_token_counter.return_value = 50
assert llm_provider.get_token_count("test message")