Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion methods/evermemos/demo/utils/simple_memory_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,4 @@ def print_summary(self):
print(" - ❌ Won't extract: Too brief, low-information small talk")
print(
" - 🎯 Best practice: Multi-turn conversations, rich context, specific details"
)
)
17 changes: 7 additions & 10 deletions methods/evermemos/env.template
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ LLM_BASE_URL=https://openrouter.ai/api/v1
# OpenRouter Configuration
# Preferred provider naming rule:
# {PROVIDER}_API_KEY / {PROVIDER}_BASE_URL
OPENROUTER_API_KEY=sk-or-v1-xxxx
# Supports multiple keys (comma-separated) for rate-limit distribution:
# OPENROUTER_API_KEY=key1,key2,key3
OPENROUTER_API_KEY=your-openrouter-api-key
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# Also supported:
# {PROVIDER}_LLM_API_KEY / {PROVIDER}_LLM_BASE_URL
Expand Down Expand Up @@ -215,19 +217,14 @@ RERANK_SCORE_THRESHOLD=0.6
AGENTIC_ROUND1_RERANK_TOP_N=10

# ===================
# Agent Memorize Configuration
# Agent Memorize Mode
# ===================

# Agent memorize mode: "online" (real-time, embedding-based clustering;
# LLM-based clustering is WIP) or "fast_skill" (batched, LLM-based clustering;
# skips non-skill memory).
# Default: online
# Controls which MemorizeConfig is used for agent conversations.
# - online: full pipeline, fast skill search (default)
# - fast_skill: skip profile/foresight/eventlog, skip maturity scoring
AGENT_MEMORIZE_MODE=online

# Clustering similarity threshold for agent memory (0.0-1.0)
# Default: 0.5
AGENT_CLUSTER_SIMILARITY_THRESHOLD=0.5

# ===================
# Environment & Logging Configuration
# ===================
Expand Down
4 changes: 2 additions & 2 deletions methods/evermemos/src/agentic_layer/get_mem_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from infra_layer.adapters.out.persistence.repository.agent_skill_raw_repository import (
AgentSkillRawRepository,
)
from biz_layer.memorize_config import AGENT_DEFAULT_MEMORIZE_CONFIG
from biz_layer.memorize_config import DEFAULT_MEMORIZE_CONFIG

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -243,7 +243,7 @@ async def _get_agent_skills(
self, mongo_filter: dict, skip: int, limit: int, sort: list
) -> GetMemResponse:
"""Query v1_agent_skills via repository and return GetMemResponse."""
retire_confidence = AGENT_DEFAULT_MEMORIZE_CONFIG.skill_retire_confidence
retire_confidence = DEFAULT_MEMORIZE_CONFIG.skill_retire_confidence
mongo_filter.setdefault("confidence", {"$gte": retire_confidence})
with timed("query_memories"):
docs, total_count = await self._agent_skill_repo.find_by_query(
Expand Down
3 changes: 3 additions & 0 deletions methods/evermemos/src/agentic_layer/memory_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1077,6 +1077,9 @@ async def do_search(q: str) -> List[Dict]:
round2_results = await asyncio.gather(
*[do_search(q) for q in refined_queries], return_exceptions=True
)
from common_utils.async_utils import reraise_critical_errors

reraise_critical_errors(round2_results)
all_round2 = [
h for r in round2_results if not isinstance(r, Exception) for h in r
]
Expand Down
3 changes: 3 additions & 0 deletions methods/evermemos/src/agentic_layer/retrieval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ async def multi_query_retrieval(
]

multi_query_results = await asyncio.gather(*tasks, return_exceptions=True)
from common_utils.async_utils import reraise_critical_errors

reraise_critical_errors(multi_query_results)

# Collect valid results
valid_results = []
Expand Down
151 changes: 73 additions & 78 deletions methods/evermemos/src/agentic_layer/search_mem_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
from service.raw_message_service import RawMessageService

# Memorize config (for skill_retire_confidence threshold)
from biz_layer.memorize_config import AGENT_DEFAULT_MEMORIZE_CONFIG
from biz_layer.memorize_config import DEFAULT_MEMORIZE_CONFIG

# Constants
from core.oxm.constants import MAGIC_ALL
Expand Down Expand Up @@ -510,6 +510,11 @@ async def search_memories(
results = await asyncio.gather(*search_tasks, return_exceptions=True)
search_duration = time.perf_counter() - search_start

# Propagate critical system errors before processing results
from common_utils.async_utils import reraise_critical_errors

reraise_critical_errors(results)

# Collect results from parallel searches
has_error = False
total_result_count = 0
Expand Down Expand Up @@ -1393,7 +1398,7 @@ async def _fetch_agent_skills_by_ids(
id_filter = self._build_mongo_id_filter(skill_ids)
if not id_filter:
return {}
retire_confidence = AGENT_DEFAULT_MEMORIZE_CONFIG.skill_retire_confidence
retire_confidence = DEFAULT_MEMORIZE_CONFIG.skill_retire_confidence
id_filter["confidence"] = {"$gte": retire_confidence}
docs = await AgentSkillRecord.find_many(id_filter).to_list()
return {str(d.id): d for d in docs}
Expand Down Expand Up @@ -1442,7 +1447,7 @@ async def _search_agent_skills(

agent_skill_mt = MemoryType.AGENT_SKILL.value
with timed("agent_skill_search"):
retire_confidence = AGENT_DEFAULT_MEMORIZE_CONFIG.skill_retire_confidence
retire_confidence = DEFAULT_MEMORIZE_CONFIG.skill_retire_confidence

if method == "keyword":
stage_start = time.perf_counter()
Expand Down Expand Up @@ -1575,12 +1580,74 @@ async def _search_agent_skills(
query=query, filter_values=filter_values, top_k=top_k
)

if AGENT_DEFAULT_MEMORIZE_CONFIG.enable_skill_llm_verify:
if results and query:
results = await self._verify_skill_relevance(query, results)
if results and DEFAULT_MEMORIZE_CONFIG.enable_skill_llm_verify:
results = await self._verify_skill_relevance(query, results)

return results

async def _verify_skill_relevance(
self, query: str, skills: List[SearchAgentSkillItem]
) -> List[SearchAgentSkillItem]:
"""Use LLM to post-verify whether retrieved skills are relevant to the query."""
import json
from common_utils.json_utils import parse_json_response
from memory_layer.prompts import get_prompt_by
from memory_layer.llm.llm_provider import build_default_provider

if not skills or not query:
return skills

skills_for_prompt = [
{
"index": i,
"name": skill.name or "",
"description": skill.description or "",
"content": skill.content or "",
}
for i, skill in enumerate(skills)
]

prompt_template = get_prompt_by("AGENT_SKILL_RELEVANCE_VERIFY_PROMPT")
prompt = prompt_template.format(
query=query, skills_json=json.dumps(skills_for_prompt, ensure_ascii=False)
)

try:
llm_provider = build_default_provider()
response_text = await llm_provider.generate(
prompt, temperature=0.0, response_format={"type": "json_object"}
)

result = parse_json_response(response_text)
score_map = {
item["index"]: item.get("score", 0.0)
for item in result.get("results", [])
}

scored = []
for i, skill in enumerate(skills):
relevance_score = score_map.get(i, 0.0)
if relevance_score >= 0.4:
skill.score = relevance_score
scored.append(skill)

scored.sort(key=lambda s: s.score, reverse=True)

logger.info(
"Skill relevance verification: %d/%d skills passed (threshold=0.4) for query: %s",
len(scored),
len(skills),
query[:60],
)

return scored

except Exception as e:
logger.warning(
"Skill relevance verification failed, returning all results: %s", e
)
return skills

# ------------------------------------------------------------------
# Agentic retrieval for agent memory types
# ------------------------------------------------------------------
Expand Down Expand Up @@ -1628,7 +1695,6 @@ async def _search_agentic_agent_skills(
) -> List[SearchAgentSkillItem]:
"""Search agent skills using agentic retrieval (LLM-guided multi-round).

After retrieval, uses LLM to verify skill relevance to the query.
"""
retrieve_request = RetrieveMemRequest(
query=query,
Expand Down Expand Up @@ -1663,74 +1729,3 @@ async def _search_agentic_agent_skills(
results.append(self._agent_skill_doc_to_item(doc, score=score))

return results

async def _verify_skill_relevance(
self, query: str, skills: List[SearchAgentSkillItem]
) -> List[SearchAgentSkillItem]:
"""Use LLM to post-verify whether retrieved skills are relevant to the query.

Args:
query: The user's search query
skills: List of SearchAgentSkillItem results

Returns:
Filtered list containing only helpful skills
"""
import json
from common_utils.json_utils import parse_json_response
from memory_layer.prompts import get_prompt_by
from memory_layer.llm.llm_provider import build_default_provider

if not skills or not query:
return skills

skills_for_prompt = [
{
"index": i,
"name": skill.name or "",
"description": skill.description or "",
"content": skill.content or "",
}
for i, skill in enumerate(skills)
]

prompt_template = get_prompt_by("AGENT_SKILL_RELEVANCE_VERIFY_PROMPT")
prompt = prompt_template.format(
query=query, skills_json=json.dumps(skills_for_prompt, ensure_ascii=False)
)

try:
llm_provider = build_default_provider()
response_text = await llm_provider.generate(
prompt, temperature=0.0, response_format={"type": "json_object"}
)

result = parse_json_response(response_text)
score_map = {
item["index"]: item.get("score", 0.0)
for item in result.get("results", [])
}

scored = []
for i, skill in enumerate(skills):
relevance_score = score_map.get(i, 0.0)
if relevance_score >= 0.4:
skill.score = relevance_score
scored.append(skill)

scored.sort(key=lambda s: s.score, reverse=True)

logger.info(
"Skill relevance verification: %d/%d skills passed (threshold=0.4) for query: %s",
len(scored),
len(skills),
query[:60],
)

return scored

except Exception as e:
logger.warning(
"Skill relevance verification failed, returning all results: %s", e
)
return skills
31 changes: 0 additions & 31 deletions methods/evermemos/src/api_specs/dtos/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,37 +568,6 @@ class FlushResponse(BaseApiResponse[FlushResult]):
data: FlushResult = Field(default_factory=FlushResult, description="Flush result")


# =============================================================================
# Flush Clustering DTOs (POST /api/v1/memories/agent/flush-clustering)
# =============================================================================


class AgentFlushClusteringRequest(BaseModel):
"""Request to force-drain pending memcells and run batch clustering."""

user_id: str = Field(
...,
description="User ID (used to derive group_id for solo scene)",
)


class AgentFlushClusteringResult(BaseModel):
"""Result of flush-clustering operation."""

request_id: str = Field(default="", description="Request ID")
status: str = Field(default="", description="Flush clustering status")
message: str = Field(default="", description="Status message")


class AgentFlushClusteringResponse(BaseApiResponse[AgentFlushClusteringResult]):
"""Flush-clustering endpoint response."""

data: AgentFlushClusteringResult = Field(
default_factory=AgentFlushClusteringResult,
description="Flush clustering result",
)


# =============================================================================
# Search/Retrieve DTOs (GET /api/v1/memories/search)
# =============================================================================
Expand Down
Loading