Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions backend/utils/llm/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@


# Base models for general use
llm_mini = ChatOpenAI(model='gpt-5-mini')
llm_mini_stream = ChatOpenAI(model='gpt-5-mini', streaming=True)
llm_mini = ChatOpenAI(model='gpt-4o-mini')
llm_mini_stream = ChatOpenAI(model='gpt-4o-mini', streaming=True)
llm_large = ChatOpenAI(model='o1-preview')
llm_large_stream = ChatOpenAI(model='o1-preview', streaming=True, temperature=1)
llm_high = ChatOpenAI(model='o4-mini')
llm_high_stream = ChatOpenAI(model='o4-mini', streaming=True, temperature=1)
llm_medium = ChatOpenAI(model='gpt-5')
llm_medium_experiment = ChatOpenAI(model='gpt-5')
llm_medium_stream = ChatOpenAI(model='gpt-5', streaming=True)
llm_medium = ChatOpenAI(model='gpt-4o')
llm_medium_experiment = ChatOpenAI(model='gpt-4.1')
llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)

# Specialized models for agentic workflows
llm_agent = ChatOpenAI(model='gpt-5')
llm_agent_stream = ChatOpenAI(model='gpt-5', streaming=True)
llm_agent = ChatOpenAI(model='gpt-4.1')
llm_agent_stream = ChatOpenAI(model='gpt-4.1', streaming=True)
llm_persona_mini_stream = ChatOpenAI(
temperature=0.8,
model="google/gemini-flash-1.5-8b",
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/other/chat_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _ensure_thread_and_assistant(self):
assistant = openai.beta.assistants.create(
name="File Reader",
instructions="You are a helpful assistant that answers questions about the provided file. Use the file_search tool to search the file contents when needed.",
model="gpt-5",
model="gpt-4o",
tools=[{"type": "file_search"}],
timeout=timeout,
)
Expand Down
3 changes: 1 addition & 2 deletions backend/utils/retrieval/agentic.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,7 @@ async def execute_agentic_chat_stream(
conversations_collected = []

# Initialize safety guard
# gpt-5 // 400k
safety_guard = AgentSafetyGuard(max_tool_calls=10, max_context_tokens=400000)
safety_guard = AgentSafetyGuard(max_tool_calls=10, max_context_tokens=500000)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The max_context_tokens value of 500000 is hardcoded here. This value is also used as a default in the AgentSafetyGuard constructor in backend/utils/retrieval/safety.py. To avoid duplication and potential inconsistencies, it would be better to define this as a constant in a shared location (e.g., backend/utils/llm/clients.py) and import it where needed. This ensures that if the agent's model changes, its context window size is updated consistently everywhere.


config = {
"configurable": {
Expand Down
4 changes: 2 additions & 2 deletions backend/utils/retrieval/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
from utils.app_integrations import get_github_docs_content
from utils.retrieval.agentic import execute_agentic_chat_stream

model = ChatOpenAI(model="gpt-5-mini")
llm_medium_stream = ChatOpenAI(model='gpt-5', streaming=True)
model = ChatOpenAI(model="gpt-4o-mini")
llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)
Comment on lines +45 to +46
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

These model definitions can be improved for better maintainability:

  1. Unused variable: The model variable defined on line 45 is not used anywhere in this file and can be safely removed.
  2. Duplicate definition: llm_medium_stream on line 46 is already defined in backend/utils/llm/clients.py. Instead of redefining it, you should import it from the centralized clients.py file. This avoids code duplication and ensures all model configurations are in a single place.

Please remove these two lines and add the following import at the top of the file with other utils imports:

from utils.llm.clients import llm_medium_stream



class StructuredFilters(TypedDict):
Expand Down
3 changes: 1 addition & 2 deletions backend/utils/retrieval/safety.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ class AgentSafetyGuard:
- Suspicious parameter patterns
"""

# gpt-5 // 400k
def __init__(self, max_tool_calls: int = 10, max_context_tokens: int = 400000):
def __init__(self, max_tool_calls: int = 10, max_context_tokens: int = 500000):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The max_context_tokens is hardcoded here with a default value of 500000. This value is also hardcoded during instantiation in backend/utils/retrieval/agentic.py. This duplication can lead to inconsistencies if the context window for the agent model changes in the future. To improve maintainability, consider defining this value as a constant in a central place, like backend/utils/llm/clients.py where the agent model is defined, and then import and use that constant in both places.

self.max_tool_calls = max_tool_calls
self.max_context_tokens = max_context_tokens

Expand Down