BasedHardware · beastoin · Nov 17, 2025 · Nov 17, 2025 · gemini-code-assist · Nov 17, 2025
diff --git a/backend/utils/llm/clients.py b/backend/utils/llm/clients.py
@@ -9,19 +9,19 @@
 
 
 # Base models for general use
-llm_mini = ChatOpenAI(model='gpt-5-mini')
-llm_mini_stream = ChatOpenAI(model='gpt-5-mini', streaming=True)
+llm_mini = ChatOpenAI(model='gpt-4o-mini')
+llm_mini_stream = ChatOpenAI(model='gpt-4o-mini', streaming=True)
 llm_large = ChatOpenAI(model='o1-preview')
 llm_large_stream = ChatOpenAI(model='o1-preview', streaming=True, temperature=1)
 llm_high = ChatOpenAI(model='o4-mini')
 llm_high_stream = ChatOpenAI(model='o4-mini', streaming=True, temperature=1)
-llm_medium = ChatOpenAI(model='gpt-5')
-llm_medium_experiment = ChatOpenAI(model='gpt-5')
-llm_medium_stream = ChatOpenAI(model='gpt-5', streaming=True)
+llm_medium = ChatOpenAI(model='gpt-4o')
+llm_medium_experiment = ChatOpenAI(model='gpt-4.1')
+llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)
 
 # Specialized models for agentic workflows
-llm_agent = ChatOpenAI(model='gpt-5')
-llm_agent_stream = ChatOpenAI(model='gpt-5', streaming=True)
+llm_agent = ChatOpenAI(model='gpt-4.1')
+llm_agent_stream = ChatOpenAI(model='gpt-4.1', streaming=True)
 llm_persona_mini_stream = ChatOpenAI(
     temperature=0.8,
     model="google/gemini-flash-1.5-8b",

diff --git a/backend/utils/other/chat_file.py b/backend/utils/other/chat_file.py
@@ -142,7 +142,7 @@ def _ensure_thread_and_assistant(self):
                 assistant = openai.beta.assistants.create(
                     name="File Reader",
                     instructions="You are a helpful assistant that answers questions about the provided file. Use the file_search tool to search the file contents when needed.",
-                    model="gpt-5",
+                    model="gpt-4o",
                     tools=[{"type": "file_search"}],
                     timeout=timeout,
                 )

diff --git a/backend/utils/retrieval/agentic.py b/backend/utils/retrieval/agentic.py
@@ -220,8 +220,7 @@ async def execute_agentic_chat_stream(
     conversations_collected = []
 
     # Initialize safety guard
-    # gpt-5 // 400k
-    safety_guard = AgentSafetyGuard(max_tool_calls=10, max_context_tokens=400000)
+    safety_guard = AgentSafetyGuard(max_tool_calls=10, max_context_tokens=500000)
 
     config = {
         "configurable": {

diff --git a/backend/utils/retrieval/graph.py b/backend/utils/retrieval/graph.py
@@ -42,8 +42,8 @@
 from utils.app_integrations import get_github_docs_content
 from utils.retrieval.agentic import execute_agentic_chat_stream
 
-model = ChatOpenAI(model="gpt-5-mini")
-llm_medium_stream = ChatOpenAI(model='gpt-5', streaming=True)
+model = ChatOpenAI(model="gpt-4o-mini")
+llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)
 
 
 class StructuredFilters(TypedDict):

diff --git a/backend/utils/retrieval/safety.py b/backend/utils/retrieval/safety.py
@@ -23,8 +23,7 @@ class AgentSafetyGuard:
     - Suspicious parameter patterns
     """
 
-    # gpt-5 // 400k
-    def __init__(self, max_tool_calls: int = 10, max_context_tokens: int = 400000):
+    def __init__(self, max_tool_calls: int = 10, max_context_tokens: int = 500000):
         self.max_tool_calls = max_tool_calls
         self.max_context_tokens = max_context_tokens