Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from dotenv import load_dotenv

from memos.configs.mem_reader import MultiModelStructMemReaderConfig
from memos.mem_reader.multi_model_struct import MultiModelStructMemReader
from memos.configs.mem_reader import MultiModalStructMemReaderConfig
from memos.mem_reader.multi_modal_struct import MultiModalStructMemReader
from memos.memories.textual.item import (
SourceMessage,
TextualMemoryItem,
Expand Down Expand Up @@ -111,11 +111,11 @@ def get_reader_config() -> dict[str, Any]:
"""
Get reader configuration from environment variables.

Returns a dictionary that can be used to create MultiModelStructMemReaderConfig.
Returns a dictionary that can be used to create MultiModalStructMemReaderConfig.
Similar to APIConfig.get_reader_config() in server_router_api.py.

Returns:
Configuration dictionary for MultiModelStructMemReaderConfig
Configuration dictionary for MultiModalStructMemReaderConfig
"""
openai_api_key = os.getenv("OPENAI_API_KEY")
openai_base_url = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
Expand Down Expand Up @@ -228,13 +228,13 @@ def main():
if openai_api_key:
# Use environment variables (similar to server_router_api.py)
config_dict = get_reader_config()
reader_config = MultiModelStructMemReaderConfig.model_validate(config_dict)
reader_config = MultiModalStructMemReaderConfig.model_validate(config_dict)
else:
# Fall back to JSON file
reader_config = MultiModelStructMemReaderConfig.from_json_file(
reader_config = MultiModalStructMemReaderConfig.from_json_file(
"examples/data/config/simple_struct_reader_config.json"
)
reader = MultiModelStructMemReader(reader_config)
reader = MultiModalStructMemReader(reader_config)

# 2. Define scene data
scene_data = [
Expand Down
1 change: 1 addition & 0 deletions examples/mem_reader/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Parser examples for different message types."""
132 changes: 132 additions & 0 deletions examples/mem_reader/parser/config_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Shared configuration utilities for parser examples.
This module provides configuration functions that match the configuration
logic in examples/mem_reader/multimodal_struct_reader.py.
"""

import os

from typing import Any

from memos.configs.embedder import EmbedderConfigFactory
from memos.configs.llm import LLMConfigFactory
from memos.embedders.factory import EmbedderFactory
from memos.llms.factory import LLMFactory


def get_reader_config() -> dict[str, Any]:
"""
Get reader configuration from environment variables.
Returns a dictionary that can be used to create MultiModalStructMemReaderConfig.
Matches the configuration logic in examples/mem_reader/multimodal_struct_reader.py.
Returns:
Configuration dictionary with llm, embedder, and chunker configs
"""
openai_api_key = os.getenv("OPENAI_API_KEY")
openai_base_url = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
ollama_api_base = os.getenv("OLLAMA_API_BASE", "http://localhost:11434")

# Get LLM backend and config
llm_backend = os.getenv("MEM_READER_LLM_BACKEND", "openai")
if llm_backend == "ollama":
llm_config = {
"backend": "ollama",
"config": {
"model_name_or_path": os.getenv("MEM_READER_LLM_MODEL", "qwen3:0.6b"),
"api_base": ollama_api_base,
"temperature": float(os.getenv("MEM_READER_LLM_TEMPERATURE", "0.0")),
"remove_think_prefix": os.getenv(
"MEM_READER_LLM_REMOVE_THINK_PREFIX", "true"
).lower()
== "true",
"max_tokens": int(os.getenv("MEM_READER_LLM_MAX_TOKENS", "8192")),
},
}
else: # openai
llm_config = {
"backend": "openai",
"config": {
"model_name_or_path": os.getenv("MEM_READER_LLM_MODEL", "gpt-4o-mini"),
"api_key": openai_api_key or os.getenv("MEMRADER_API_KEY", "EMPTY"),
"api_base": openai_base_url,
"temperature": float(os.getenv("MEM_READER_LLM_TEMPERATURE", "0.5")),
"remove_think_prefix": os.getenv(
"MEM_READER_LLM_REMOVE_THINK_PREFIX", "true"
).lower()
== "true",
"max_tokens": int(os.getenv("MEM_READER_LLM_MAX_TOKENS", "8192")),
},
}

# Get embedder backend and config
embedder_backend = os.getenv(
"MEM_READER_EMBEDDER_BACKEND", os.getenv("MOS_EMBEDDER_BACKEND", "ollama")
)
if embedder_backend == "universal_api":
embedder_config = {
"backend": "universal_api",
"config": {
"provider": os.getenv(
"MEM_READER_EMBEDDER_PROVIDER", os.getenv("MOS_EMBEDDER_PROVIDER", "openai")
),
"api_key": os.getenv(
"MEM_READER_EMBEDDER_API_KEY",
os.getenv("MOS_EMBEDDER_API_KEY", openai_api_key or "sk-xxxx"),
),
"model_name_or_path": os.getenv(
"MEM_READER_EMBEDDER_MODEL",
os.getenv("MOS_EMBEDDER_MODEL", "text-embedding-3-large"),
),
"base_url": os.getenv(
"MEM_READER_EMBEDDER_API_BASE",
os.getenv("MOS_EMBEDDER_API_BASE", openai_base_url),
),
},
}
else: # ollama
embedder_config = {
"backend": "ollama",
"config": {
"model_name_or_path": os.getenv(
"MEM_READER_EMBEDDER_MODEL",
os.getenv("MOS_EMBEDDER_MODEL", "nomic-embed-text:latest"),
),
"api_base": ollama_api_base,
},
}

return {
"llm": llm_config,
"embedder": embedder_config,
"chunker": {
"backend": "sentence",
"config": {
"tokenizer_or_token_counter": "gpt2",
"chunk_size": 512,
"chunk_overlap": 128,
"min_sentences_per_chunk": 1,
},
},
}


def init_embedder_and_llm():
"""
Initialize embedder and LLM from environment variables.
Returns:
Tuple of (embedder, llm) instances
"""
config_dict = get_reader_config()

# Initialize embedder
embedder_config = EmbedderConfigFactory.model_validate(config_dict["embedder"])
embedder = EmbedderFactory.from_config(embedder_config)

# Initialize LLM
llm_config = LLMConfigFactory.model_validate(config_dict["llm"])
llm = LLMFactory.from_config(llm_config)

return embedder, llm
94 changes: 94 additions & 0 deletions examples/mem_reader/parser/example_assistant_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Example demonstrating AssistantParser usage.
AssistantParser handles assistant messages in chat conversations.
"""

import sys

from pathlib import Path

from dotenv import load_dotenv

from memos.mem_reader.read_multi_modal.assistant_parser import AssistantParser


# Handle imports for both script and module usage
try:
from .config_utils import init_embedder_and_llm
except ImportError:
# When running as script, add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))
from config_utils import init_embedder_and_llm

# Load environment variables
load_dotenv()


def main():
"""Demonstrate AssistantParser usage."""
print("=== AssistantParser Example ===\n")

# 1. Initialize embedder and LLM (using shared config)
embedder, llm = init_embedder_and_llm()

# 3. Create AssistantParser
parser = AssistantParser(embedder=embedder, llm=llm)

# 4. Example assistant messages
assistant_messages = [
{
"role": "assistant",
"content": "I'm sorry to hear that you're feeling down. Would you like to talk about what's been going on?",
"chat_time": "2025-01-15T10:00:30",
"message_id": "msg_001",
},
{
"role": "assistant",
"content": "Based on the document you provided, I can see several key points: 1) The project timeline, 2) Budget considerations, and 3) Resource allocation.",
"chat_time": "2025-01-15T10:05:30",
"message_id": "msg_002",
},
{
"role": "assistant",
"content": "Here's a Python solution for your problem:\n```python\ndef solve_problem():\n return 'solution'\n```",
"chat_time": "2025-01-15T10:10:30",
"message_id": "msg_003",
},
]

print("📝 Processing assistant messages:\n")
for i, message in enumerate(assistant_messages, 1):
print(f"Assistant Message {i}:")
print(f" Content: {message['content'][:60]}...")

# Create source from assistant message
info = {"user_id": "user1", "session_id": "session1"}
source = parser.create_source(message, info)

print(" ✅ Created SourceMessage:")
print(f" - Type: {source.type}")
print(f" - Role: {source.role}")
print(f" - Content: {source.content[:60]}...")
print(f" - Chat Time: {source.chat_time}")
print(f" - Message ID: {source.message_id}")
print()

# Parse in fast mode
memory_items = parser.parse_fast(message, info)
print(f" 📊 Fast mode generated {len(memory_items)} memory item(s)")
if memory_items:
print(f" - Memory: {memory_items[0].memory[:60]}...")
print(f" - Memory Type: {memory_items[0].metadata.memory_type}")
print(f" - Tags: {memory_items[0].metadata.tags}")
print()

# Rebuild assistant message from source
rebuilt = parser.rebuild_from_source(source)
print(f" 🔄 Rebuilt message: role={rebuilt['role']}, content={rebuilt['content'][:40]}...")
print()

print("✅ AssistantParser example completed!")


if __name__ == "__main__":
main()
Loading
Loading