Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update max token overwrite logic #1694

Merged
merged 1 commit into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions backend/llm/api_brain_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ async def make_completion(
response = completion(
model=self.model,
temperature=self.temperature,
max_tokens=2000,
max_tokens=self.max_tokens,
messages=messages,
functions=functions,
stream=True,
Expand Down Expand Up @@ -109,10 +109,14 @@ async def make_completion(
yield value

else:
if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta and hasattr(chunk.choices[0].delta, 'content'):
if (
hasattr(chunk.choices[0], "delta")
and chunk.choices[0].delta
and hasattr(chunk.choices[0].delta, "content")
):
content = chunk.choices[0].delta.content
yield content
else: # pragma: no cover
else: # pragma: no cover
yield "**...**"
break

Expand Down
66 changes: 40 additions & 26 deletions backend/routes/chat_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from llm.qa_base import QABaseBrainPicking
from llm.qa_headless import HeadlessQA
from middlewares.auth import AuthBearer, get_current_user
from models import Brain, BrainEntity, Chat, ChatQuestion, UserUsage, get_supabase_db
from models import Chat, ChatQuestion, UserUsage, get_supabase_db
from models.databases.supabase.chats import QuestionAndAnswer
from modules.user.entity.user_identity import UserIdentity
from repository.brain.get_brain_by_id import get_brain_by_id
from repository.chat import (
ChatUpdatableProperties,
CreateChatProperties,
Expand All @@ -25,6 +26,7 @@
get_chat_history_with_notifications,
)
from repository.notification.remove_chat_notifications import remove_chat_notifications

from routes.chat.factory import get_chat_strategy
from routes.chat.utils import (
NullableUUID,
Expand Down Expand Up @@ -133,32 +135,37 @@ async def create_question_handler(

chat_instance.validate_authorization(user_id=current_user.id, brain_id=brain_id)

brain = Brain(id=brain_id)
brain_details: BrainEntity | None = None
fallback_model = "gpt-3.5-turbo"
fallback_temperature = 0.1
fallback_max_tokens = 512

userDailyUsage = UserUsage(
user_daily_usage = UserUsage(
id=current_user.id,
email=current_user.email,
)
userSettings = userDailyUsage.get_user_settings()
is_model_ok = (brain_details or chat_question).model in userSettings.get("models", ["gpt-3.5-turbo"]) # type: ignore
user_settings = user_daily_usage.get_user_settings()
is_model_ok = (chat_question).model in user_settings.get("models", ["gpt-3.5-turbo"]) # type: ignore

# Retrieve chat model (temperature, max_tokens, model)
if (
not chat_question.model
or not chat_question.temperature
or not chat_question.max_tokens
):
# TODO: create ChatConfig class (pick config from brain or user or chat) and use it here
chat_question.model = chat_question.model or brain.model or "gpt-3.5-turbo"
chat_question.temperature = (
chat_question.temperature or brain.temperature or 0.1
)
chat_question.max_tokens = chat_question.max_tokens or brain.max_tokens or 512
if brain_id:
brain = get_brain_by_id(brain_id)
if brain:
fallback_model = brain.model or fallback_model
fallback_temperature = brain.temperature or fallback_temperature
fallback_max_tokens = brain.max_tokens or fallback_max_tokens

chat_question.model = chat_question.model or fallback_model
chat_question.temperature = chat_question.temperature or fallback_temperature
chat_question.max_tokens = chat_question.max_tokens or fallback_max_tokens

try:
check_user_requests_limit(current_user)
is_model_ok = (brain_details or chat_question).model in userSettings.get("models", ["gpt-3.5-turbo"]) # type: ignore
is_model_ok = (chat_question).model in user_settings.get("models", ["gpt-3.5-turbo"]) # type: ignore
gpt_answer_generator = chat_instance.get_answer_generator(
chat_id=str(chat_id),
model=chat_question.model if is_model_ok else "gpt-3.5-turbo", # type: ignore
Expand Down Expand Up @@ -199,39 +206,46 @@ async def create_stream_question_handler(
chat_instance = get_chat_strategy(brain_id)
chat_instance.validate_authorization(user_id=current_user.id, brain_id=brain_id)

brain = Brain(id=brain_id)
brain_details: BrainEntity | None = None
userDailyUsage = UserUsage(
user_daily_usage = UserUsage(
id=current_user.id,
email=current_user.email,
)

userSettings = userDailyUsage.get_user_settings()
user_settings = user_daily_usage.get_user_settings()

# Retrieve chat model (temperature, max_tokens, model)
if (
not chat_question.model
or chat_question.temperature is None
or not chat_question.max_tokens
):
# TODO: create ChatConfig class (pick config from brain or user or chat) and use it here
chat_question.model = chat_question.model or brain.model or "gpt-3.5-turbo"
chat_question.temperature = chat_question.temperature or brain.temperature or 0
chat_question.max_tokens = chat_question.max_tokens or brain.max_tokens or 256
fallback_model = "gpt-3.5-turbo"
fallback_temperature = 0
fallback_max_tokens = 256

if brain_id:
brain = get_brain_by_id(brain_id)
if brain:
fallback_model = brain.model or fallback_model
fallback_temperature = brain.temperature or fallback_temperature
fallback_max_tokens = brain.max_tokens or fallback_max_tokens

chat_question.model = chat_question.model or fallback_model
chat_question.temperature = chat_question.temperature or fallback_temperature
chat_question.max_tokens = chat_question.max_tokens or fallback_max_tokens

try:
logger.info(f"Streaming request for {chat_question.model}")
check_user_requests_limit(current_user)
gpt_answer_generator: HeadlessQA | QABaseBrainPicking
# TODO check if model is in the list of models available for the user

is_model_ok = (brain_details or chat_question).model in userSettings.get("models", ["gpt-3.5-turbo"]) # type: ignore

is_model_ok = chat_question.model in user_settings.get("models", ["gpt-3.5-turbo"]) # type: ignore
gpt_answer_generator = chat_instance.get_answer_generator(
chat_id=str(chat_id),
model=(brain_details or chat_question).model if is_model_ok else "gpt-3.5-turbo", # type: ignore
max_tokens=(brain_details or chat_question).max_tokens, # type: ignore
temperature=(brain_details or chat_question).temperature, # type: ignore
model=chat_question.model if is_model_ok else "gpt-3.5-turbo", # type: ignore
max_tokens=chat_question.max_tokens,
temperature=chat_question.temperature, # type: ignore
streaming=True,
prompt_id=chat_question.prompt_id,
brain_id=str(brain_id),
Expand Down
Loading