Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/memos/memories/textual/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ class PreferenceTextualMemoryMetadata(TextualMemoryMetadata):
preference: str | None = Field(default=None, description="Preference.")
created_at: str | None = Field(default=None, description="Timestamp of the dialog.")
mem_cube_id: str | None = Field(default=None, description="ID of the MemCube.")
score: float | None = Field(default=None, description="Score of the retrieval result.")


class TextualMemoryItem(BaseModel):
Expand Down
29 changes: 17 additions & 12 deletions src/memos/memories/textual/prefer_text_memory/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
response = self.llm_provider.generate([{"role": "user", "content": prompt}])
response = response.strip().replace("```json", "").replace("```", "").strip()
result = json.loads(response)
result["preference"] = result.pop("implicit_preference")
for d in result:
d["preference"] = d.pop("implicit_preference")
return result
except Exception as e:
logger.error(f"Error extracting implicit preferences: {e}, return None")
Expand Down Expand Up @@ -136,20 +137,24 @@ def _process_single_chunk_implicit(
if not implicit_pref:
return None

vector_info = {
"embedding": self.embedder.embed([implicit_pref["context_summary"]])[0],
}
memories = []
for pref in implicit_pref:
vector_info = {
"embedding": self.embedder.embed([pref["context_summary"]])[0],
}

extract_info = {**basic_info, **implicit_pref, **vector_info, **info}
extract_info = {**basic_info, **pref, **vector_info, **info}

metadata = PreferenceTextualMemoryMetadata(
type=msg_type, preference_type="implicit_preference", **extract_info
)
memory = TextualMemoryItem(
id=extract_info["dialog_id"], memory=implicit_pref["context_summary"], metadata=metadata
)
metadata = PreferenceTextualMemoryMetadata(
type=msg_type, preference_type="implicit_preference", **extract_info
)
memory = TextualMemoryItem(
id=str(uuid.uuid4()), memory=pref["context_summary"], metadata=metadata
)

return memory
memories.append(memory)

return memories

def extract(
self,
Expand Down
40 changes: 30 additions & 10 deletions src/memos/memories/textual/prefer_text_memory/retrievers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from abc import ABC, abstractmethod
from typing import Any

Expand Down Expand Up @@ -34,9 +36,12 @@ def _naive_reranker(
self, query: str, prefs_mem: list[TextualMemoryItem], top_k: int, **kwargs: Any
) -> list[TextualMemoryItem]:
if self.reranker:
prefs_mem = self.reranker.rerank(query, prefs_mem, top_k)
return [item for item, _ in prefs_mem]
return prefs_mem
prefs_mem_reranked = []
prefs_mem_tuple = self.reranker.rerank(query, prefs_mem, top_k)
for item, score in prefs_mem_tuple:
item.metadata.score = score
prefs_mem_reranked.append(item)
return prefs_mem_reranked

def _original_text_reranker(
self,
Expand All @@ -52,11 +57,22 @@ def _original_text_reranker(
prefs_mem_for_reranker = deepcopy(prefs_mem)
for pref_mem, pref in zip(prefs_mem_for_reranker, prefs, strict=False):
pref_mem.memory = pref_mem.memory + "\n" + pref.original_text
prefs_mem_for_reranker = self.reranker.rerank(query, prefs_mem_for_reranker, top_k)
prefs_mem_for_reranker = [item for item, _ in prefs_mem_for_reranker]
reranked_results = self.reranker.rerank(query, prefs_mem_for_reranker, top_k)
prefs_mem_for_reranker = [item for item, _ in reranked_results]
prefs_ids = [item.id for item in prefs_mem_for_reranker]
prefs_dict = {item.id: item for item in prefs_mem}
return [prefs_dict[item_id] for item_id in prefs_ids if item_id in prefs_dict]

# Create mapping from id to score from reranked results
reranked_scores = {item.id: score for item, score in reranked_results}

# Assign scores to the original items
result_items = []
for item_id in prefs_ids:
if item_id in prefs_dict:
original_item = prefs_dict[item_id]
original_item.metadata.score = reranked_scores.get(item_id)
result_items.append(original_item)
return result_items
return prefs_mem

def retrieve(
Expand Down Expand Up @@ -119,9 +135,6 @@ def retrieve(
if pref.payload.get("preference", None)
]

# store explicit id and score, use it after reranker
explicit_id_scores = {item.id: item.score for item in explicit_prefs}

reranker_map = {
"naive": self._naive_reranker,
"original_text": self._original_text_reranker,
Expand All @@ -136,7 +149,14 @@ def retrieve(

# filter explicit mem by score bigger than threshold
explicit_prefs_mem = [
item for item in explicit_prefs_mem if explicit_id_scores.get(item.id, 0) >= 0.0
item
for item in explicit_prefs_mem
if item.metadata.score >= float(os.getenv("PREFERENCE_SEARCH_THRESHOLD", 0.0))
]
implicit_prefs_mem = [
item
for item in implicit_prefs_mem
if item.metadata.score >= float(os.getenv("PREFERENCE_SEARCH_THRESHOLD", 0.0))
]

return explicit_prefs_mem + implicit_prefs_mem
46 changes: 29 additions & 17 deletions src/memos/templates/prefer_complete_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
Requirements:
1. Keep only the preferences explicitly mentioned by the user. Do not infer or assume. If the user mentions reasons for their preferences, include those reasons as well.
2. Output should be a list of entries concise natural language summaries and the corresponding context summary, context summary must contain complete information of the conversation fragment that the preference is mentioned.
3. If multiple preferences are mentioned within the same topic or domain, you MUST combine them into a single entry, keep each entry information complete.
3. If multiple preferences are mentioned within the same topic or domain, you MUST combine them into a single entry, keep each entry information complete. Different topics of preferences should be divided into multiple entries.
4. If no explicit preference can be reasonably extracted, return [].

Conversation:
{qa_pair}
Expand All @@ -23,6 +24,7 @@
"explicit_preference": "A short natural language summary of the preferences",
"context_summary": "The corresponding context summary, which is a summary of the corresponding conversation, do not lack any scenario information",
"reasoning": "reasoning process to find the explicit preferences"
"topic": "preference topic, which can only belong to one topic or domain, such as: sports, hotel, education, etc.",
},
]
```
Expand All @@ -42,7 +44,8 @@
要求:
1. 只保留用户明确提到的偏好,不要推断或假设。如果用户提到了偏好的原因,也要包含这些原因。
2. 输出应该是一个条目列表,包含简洁的自然语言摘要和相应的上下文摘要,上下文摘要必须包含提到偏好的对话片段的完整信息。
3. 如果在同一主题或领域内提到了多个偏好,你必须将它们合并为一个条目,保持每个条目信息完整。
3. 如果在同一主题或领域内提到了多个偏好,你必须将它们合并为一个条目,保持每个条目信息完整。不同话题的偏好要分为多个条目。
4. 如果没有可以合理提取的显式偏好,返回[]。

对话:
{qa_pair}
Expand All @@ -51,9 +54,10 @@
```json
[
{
"explicit_preference": "偏好的简短自然语言摘要",
"explicit_preference": "偏好的简短自然语言摘要,需要描述为“用户偏好于/不喜欢/想要/不想要/偏好什么”",
"context_summary": "对应的上下文摘要,即对应对话的摘要,不要遗漏任何场景信息",
"reasoning": "寻找显式偏好的推理过程"
"reasoning": "寻找显式偏好的推理过程",
"topic": "偏好所属的主题或领域,例如:体育、酒店、教育等, topic只能属于一个主题或领域",
},
]
```
Expand All @@ -79,18 +83,22 @@
2. Inferred implicit preferences must not conflict with explicit preferences.
3. For implicit_preference: only output the preference statement itself; do not include any extra explanation, reasoning, or confidence information. Put all reasoning and explanation in the reasoning field.
4. In the reasoning field, explicitly explain the underlying logic and hidden motivations you identified.
5. If no implicit preference can be reasonably inferred, leave the implicit_preference field empty (do not output anything else).
5. Different topics of preferences should be divided into multiple entries.
6. If no implicit preference can be reasonably inferred, return [].

Conversation:
{qa_pair}

Output format:
```json
{
"implicit_preference": "A concise natural language statement of the implicit preferences reasonably inferred from the conversation, or an empty string",
"context_summary": "The corresponding context summary, which is a summary of the corresponding conversation, do not lack any scenario information",
"reasoning": "Explain the underlying logic, hidden motivations, and behavioral patterns that led to this inference"
}
[
```json
{
"implicit_preference": "A concise natural language statement of the implicit preferences reasonably inferred from the conversation, or an empty string",
"context_summary": "The corresponding context summary, which is a summary of the corresponding conversation, do not lack any scenario information",
"reasoning": "Explain the underlying logic, hidden motivations, and behavioral patterns that led to this inference",
"topic": "preference topic, which can only belong to one topic or domain, such as: sports, hotel, education, etc.",
}
]
```
Don't output anything except the JSON.
"""
Expand All @@ -115,18 +123,22 @@
2. 推断的隐式偏好不得与显式偏好冲突。
3. 对于 implicit_preference:仅输出偏好陈述本身;不要包含任何额外的解释、推理或置信度信息。将所有推理和解释放在 reasoning 字段中。
4. 在 reasoning 字段中,明确解释你识别出的底层逻辑和隐藏动机。
5. 如果无法合理推断出隐式偏好,则将 implicit_preference 字段留空(不要输出其他任何内容)。
5. 如果在同一主题或领域内提到了多个偏好,你必须将它们合并为一个条目,保持每个条目信息完整。不同话题的偏好要分为多个条目。
6. 如果没有可以合理推断的隐式偏好,返回[]。

对话:
{qa_pair}

输出格式:
```json
{
"implicit_preference": "从对话中合理推断出的隐式偏好的简洁自然语言陈述,或空字符串",
"context_summary": "对应的上下文摘要,即对应对话的摘要,不要遗漏任何场景信息",
"reasoning": "解释推断出该偏好的底层逻辑、隐藏动机和行为模式"
}
[
{
"implicit_preference": "从对话中合理推断出的隐式偏好的简洁自然语言陈述,或空字符串",
"context_summary": "对应的上下文摘要,即对应对话的摘要,不要遗漏任何场景信息",
"reasoning": "解释推断出该偏好的底层逻辑、隐藏动机和行为模式",
"topic": "偏好所属的主题或领域,例如:体育、酒店、教育等, topic只能属于一个主题或领域",
}
]
```
除JSON外不要输出任何其他内容。
"""
Expand Down
Loading