Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 49 additions & 27 deletions backend/models/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def _sync_app_and_plugin_ids(cls, data: Any) -> Any:

@staticmethod
def get_messages_as_string(
messages: List['Message'], use_user_name_if_available: bool = False, use_plugin_name_if_available: bool = False
messages: List['Message'],
use_user_name_if_available: bool = False,
use_plugin_name_if_available: bool = False,
include_file_info: bool = False,
) -> str:
sorted_messages = sorted(messages, key=lambda m: m.created_at)

Expand All @@ -90,16 +93,27 @@ def get_sender_name(message: Message) -> str:
# return plugin.name RESTORE ME
return message.sender.upper() # TODO: use app id

formatted_messages = [
f"({message.created_at.strftime('%d %b %Y at %H:%M UTC')}) {get_sender_name(message)}: {message.text}"
for message in sorted_messages
]
formatted_messages = []
for message in sorted_messages:
msg_text = (
f"({message.created_at.strftime('%d %b %Y at %H:%M UTC')}) {get_sender_name(message)}: {message.text}"
)

# Add file info if requested and files exist
if include_file_info and message.files_id and len(message.files_id) > 0:
file_info = f" [Files attached: {len(message.files_id)} file(s), IDs: {', '.join(message.files_id)}]"
msg_text += file_info

formatted_messages.append(msg_text)

return '\n'.join(formatted_messages)

@staticmethod
def get_messages_as_xml(
messages: List['Message'], use_user_name_if_available: bool = False, use_plugin_name_if_available: bool = False
messages: List['Message'],
use_user_name_if_available: bool = False,
use_plugin_name_if_available: bool = False,
include_file_info: bool = False,
) -> str:
sorted_messages = sorted(messages, key=lambda m: m.created_at)

Expand All @@ -112,27 +126,35 @@ def get_sender_name(message: Message) -> str:
# return plugin.name RESTORE ME
return message.sender.upper() # TODO: use app id

formatted_messages = [
f"""
<message>
<created_at>
{message.created_at.strftime('%d %b %Y at %H:%M UTC')}
</created_at>
<sender>
{get_sender_name(message)}
</sender>
<content>
{message.text}
</content>
{('<attachments>' + ''.join(f"<file>{file.name}</file>" for file in message.files) + '</attachments>') if message.files and len(message.files) > 0 else ''}
</message>
""".replace(
' ', ''
)
.replace('\n\n\n', '\n\n')
.strip()
for message in sorted_messages
]
formatted_messages = []
for message in sorted_messages:
# Build file section if requested
file_section = ""
if include_file_info and message.files and len(message.files) > 0:
file_section = '<attachments>\n'
for file in message.files:
file_section += f' <file id="{file.id}" name="{file.name}" type="{file.mime_type}"/>\n'
file_section += '</attachments>'
elif include_file_info and message.files_id and len(message.files_id) > 0:
# Fallback if files not loaded but IDs exist
file_section = '<attachments>\n'
for file_id in message.files_id:
file_section += f' <file id="{file_id}"/>\n'
file_section += '</attachments>'
elif message.files and len(message.files) > 0:
# Original behavior when include_file_info is False
file_section = (
'<attachments>' + ''.join(f"<file>{file.name}</file>" for file in message.files) + '</attachments>'
)

msg = f"""<message>
<created_at>{message.created_at.strftime('%d %b %Y at %H:%M UTC')}</created_at>
<sender>{get_sender_name(message)}</sender>
<content>{message.text}</content>
{file_section}
</message>"""

formatted_messages.append(msg.replace(' ', '').strip())

return '\n'.join(formatted_messages)

Expand Down
22 changes: 20 additions & 2 deletions backend/utils/llm/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,14 +392,15 @@ def _get_qa_rag_prompt(
)


def _get_agentic_qa_prompt(uid: str, app: Optional[App] = None) -> str:
def _get_agentic_qa_prompt(uid: str, app: Optional[App] = None, messages: List[Message] = None) -> str:
"""
Build the system prompt for the agentic agent, preserving the structure and instructions
from _get_qa_rag_prompt while adding tool-calling capabilities.

Args:
uid: User ID
app: Optional app/plugin for personalized behavior
messages: Optional message history for file context

Returns:
System prompt string
Expand Down Expand Up @@ -437,13 +438,30 @@ def _get_agentic_qa_prompt(uid: str, app: Optional[App] = None) -> str:
{plugin_info}
</plugin_instructions>

"""

# Add file context if messages contain files
file_context_section = ""
if messages:
message_history_with_files = Message.get_messages_as_string(messages, include_file_info=True)

# Check if any files are present
if '[Files attached:' in message_history_with_files:
file_context_section = f"""
<conversation_history_with_files>
Recent conversation (includes file attachment IDs):
{message_history_with_files}

When you see [Files attached: X file(s), IDs: ...], you can reference those file IDs in search_files_tool.
</conversation_history_with_files>

"""

base_prompt = f"""<assistant_role>
You are Omi, a helpful AI assistant for {user_name}. You are designed to provide accurate, detailed, and comprehensive responses in the most personalized way possible.
</assistant_role>

<current_datetime>
{file_context_section}<current_datetime>
Current date time in {user_name}'s timezone ({tz}): {current_datetime_str}
Current date time ISO format: {current_datetime_iso}
</current_datetime>
Expand Down
1 change: 0 additions & 1 deletion backend/utils/other/chat_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ def _ensure_thread_and_assistant(self):
# Continue anyway - IDs will be recreated next time

def _fill_question(self, uid, question, file_ids: List[str], thread_id: str):

# OpenAI has a limit of 10 items in content array (1 text + max 9 images)
files = chat_db.get_chat_files_desc(uid, files_id=file_ids, limit=9)

Expand Down
8 changes: 6 additions & 2 deletions backend/utils/retrieval/agentic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
update_action_item_tool,
get_omi_product_info_tool,
perplexity_search_tool,
search_files_tool,
)
from utils.retrieval.safety import AgentSafetyGuard, SafetyGuardError
from utils.llm.clients import llm_agent, llm_agent_stream
Expand Down Expand Up @@ -126,6 +127,7 @@ def execute_agentic_chat(
update_action_item_tool,
get_omi_product_info_tool,
perplexity_search_tool,
search_files_tool,
]

# Convert messages to LangChain format and prepend system message
Expand Down Expand Up @@ -185,8 +187,8 @@ async def execute_agentic_chat_stream(
Yields:
Formatted chunks with "data: " or "think: " prefixes
"""
# Build system prompt
system_prompt = _get_agentic_qa_prompt(uid, app)
# Build system prompt with file context
system_prompt = _get_agentic_qa_prompt(uid, app, messages)

# Get all tools
tools = [
Expand All @@ -198,6 +200,7 @@ async def execute_agentic_chat_stream(
update_action_item_tool,
get_omi_product_info_tool,
perplexity_search_tool,
search_files_tool,
]

# Convert messages to LangChain format and prepend system message
Expand Down Expand Up @@ -225,6 +228,7 @@ async def execute_agentic_chat_stream(
"thread_id": str(uuid.uuid4()),
"conversations_collected": conversations_collected,
"safety_guard": safety_guard,
"chat_session_id": chat_session.id if chat_session else None,
}
}

Expand Down
25 changes: 2 additions & 23 deletions backend/utils/retrieval/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,25 +130,13 @@ def determine_conversation_type(
) -> Literal[
"no_context_conversation",
"agentic_context_dependent_conversation",
# "omi_question",
"file_chat_question",
"persona_question",
]:
# chat with files by attachments on the last message
print("determine_conversation_type")
messages = state.get("messages", [])
if len(messages) > 0 and len(messages[-1].files_id) > 0:
return "file_chat_question"

# persona
app: App = state.get("plugin_selected")
if app and app.is_a_persona():
# file
question = state.get("parsed_question", "")
is_file_question = retrieve_is_file_question(question)
if is_file_question:
return "file_chat_question"

return "persona_question"

# chat
Expand All @@ -157,15 +145,6 @@ def determine_conversation_type(
if not question or len(question) == 0:
return "no_context_conversation"

# determine the follow-up question is chatting with files or not
is_file_question = retrieve_is_file_question(question)
if is_file_question:
return "file_chat_question"

# is_omi_question = retrieve_is_an_omi_question(question)
# if is_omi_question:
# return "omi_question"

requires = requires_context(question)
if requires:
return "agentic_context_dependent_conversation"
Expand Down Expand Up @@ -466,13 +445,13 @@ def file_chat_question(state: GraphState):
# workflow.add_node("omi_question", omi_question)
# workflow.add_node("context_dependent_conversation", context_dependent_conversation)
workflow.add_node("agentic_context_dependent_conversation", agentic_context_dependent_conversation)
workflow.add_node("file_chat_question", file_chat_question)
# workflow.add_node("file_chat_question", file_chat_question)
workflow.add_node("persona_question", persona_question)

workflow.add_edge("no_context_conversation", END)
# workflow.add_edge("omi_question", END)
workflow.add_edge("persona_question", END)
workflow.add_edge("file_chat_question", END)
# workflow.add_edge("file_chat_question", END)
workflow.add_edge("agentic_context_dependent_conversation", END)
# workflow.add_edge("context_dependent_conversation", "retrieve_topics_filters")
# workflow.add_edge("context_dependent_conversation", "retrieve_date_filters")
Expand Down
4 changes: 4 additions & 0 deletions backend/utils/retrieval/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from .perplexity_tools import (
perplexity_search_tool,
)
from .file_tools import (
search_files_tool,
)

__all__ = [
'get_conversations_tool',
Expand All @@ -35,4 +38,5 @@
'update_action_item_tool',
'get_omi_product_info_tool',
'perplexity_search_tool',
'search_files_tool',
]
81 changes: 81 additions & 0 deletions backend/utils/retrieval/tools/file_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
File search tools for the agentic chat system.

These tools allow the LLM to search and query files uploaded to chat sessions.
"""

from langchain_core.runnables import RunnableConfig
from langchain_core.tools import tool
from typing import List, Optional
import database.chat as chat_db
from models.chat import ChatSession, FileChat
from utils.other.chat_file import FileChatTool


@tool
def search_files_tool(question: str, file_ids: Optional[List[str]] = None, config: RunnableConfig = None) -> str:
"""
Search and ask questions about files attached to the current chat session.
Use this when the user asks about documents, images, PDFs, or any files they've uploaded.

The conversation history shows which files are attached to which messages in the format:
[Files attached: X file(s), IDs: file_id_1, file_id_2, ...]

You can specify which files to search by providing their IDs, or omit file_ids to search all files.

Examples:
- User asks "what does the document say?" → Use file_ids from the most recent message with files
- User asks "compare the two PDFs I uploaded" → Use file_ids from messages with PDFs
- User asks "summarize all my files" → Don't specify file_ids (searches all)

Args:
question: The specific question to ask about the files
file_ids: Optional list of specific file IDs to search. If not provided, searches all session files.

Returns:
Answer based on the file contents
"""
if config is None:
return "Configuration error: missing config"

uid = config['configurable']['user_id']
chat_session_id = config['configurable'].get('chat_session_id')

if not chat_session_id:
return "No active chat session. Files are not available."

try:
# Get session data
session_data = chat_db.get_chat_session_by_id(uid, chat_session_id)

if not session_data:
return "Chat session not found."

chat_session = ChatSession(**session_data)

# Determine which files to search
if file_ids and len(file_ids) > 0:
# Use specified files
# Validate that these files belong to the session
session_file_ids = set(chat_session.file_ids or [])
file_ids_to_search = [fid for fid in file_ids if fid in session_file_ids]

if not file_ids_to_search:
return "The specified files are not available in this chat session."
else:
# Use all session files
file_ids_to_search = chat_session.file_ids if chat_session.file_ids else []

if not file_ids_to_search:
return "No files have been uploaded to this chat session yet. Ask the user to upload files first."

# Use FileChatTool to query files
fc_tool = FileChatTool(uid, chat_session_id)
answer = fc_tool.process_chat_with_file(question, file_ids_to_search)

return answer

except ValueError as e:
return f"Session error: {str(e)}"
except Exception as e:
return f"I encountered an error while searching the files. Please try again or rephrase your question."