MemTensor · CaralHsi · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/examples/mem_reader/multimodal_struct_reader.py b/examples/mem_reader/multimodal_struct_reader.py
@@ -327,6 +327,102 @@ def get_info(self) -> dict[str, Any]:
             ]
         ],
     ),
+    TestCase(
+        name="oss_text_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "@http://139.196.232.20:9090/graph-test/algorithm/2025_11_13/1763043889_1763043782_PM1%E8%BD%A6%E9%97%B4PMT%E9%9D%B4%E5%8E%8B%E8%BE%B9%E5%8E%8B%E5%8E%8B%E5%8A%9B%E6%97%A0%E6%B3%95%E5%BB%BA%E7%AB%8B%E6%95%85%E9%9A%9C%E6%8A%A5%E5%91%8A20240720.md",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
+    TestCase(
+        name="pure_data_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "明文记忆是系统与用户对话、操作等交互中动态习得，以及外部提供的、可显式管理的结构化知识形态，通常以文档、提示模板、图结构或用户规则等形式存在。它具备编辑性、可共享性与治理友好性，适合存储需要频繁修改、可审计或多方协同使用的信息。 在 MemOS 中，明文记忆可用于动态生成推理上下文、个性化偏好注入、多代理协作共享等场景，成为连接人类输入与模型认知的关键桥梁。激活记忆是指模型在推理过程中产生的瞬时性认知状态，包括 KV cache、隐藏层激活、注意力权重等中间张量结构。它通常用于维持上下文连续性、对话一致性与行为风格控制。 MemOS 将激活记忆抽象为可调度资源，支持按需唤醒、延迟卸载与结构变换。例如，某些上下文状态可以被压缩为“半结构化记忆片段”用于未来复用，也可以在任务级别转化为参数化模块，支持短期记忆的长期化演进。这一机制为模型行为一致性、风格保持与状态持续性提供了基础。",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
+    TestCase(
+        name="local_data_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "./my_local_file/report.pdf",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
+    TestCase(
+        name="internet_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "https://upload.wikimedia.org/wikipedia/commons/c/cb/NLC416-16jh004830-88775_%E7%B4%85%E6%A8%93%E5%A4%A2.pdf",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
     TestCase(
         name="multimodal_mixed",
         description="Mixed multimodal message (text + file + image)",
@@ -661,6 +757,12 @@ def get_reader_config() -> dict[str, Any]:
             },
         }
 
+    # Get direct markdown hostnames from environment variable
+    direct_markdown_hostnames = None
+    env_hostnames = os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "139.196.232.20")
+    if env_hostnames:
+        direct_markdown_hostnames = [h.strip() for h in env_hostnames.split(",") if h.strip()]
+
     return {
         "llm": llm_config,
         "embedder": embedder_config,
@@ -673,6 +775,7 @@ def get_reader_config() -> dict[str, Any]:
                 "min_sentences_per_chunk": 1,
             },
         },
+        "direct_markdown_hostnames": direct_markdown_hostnames,
     }
 
 
@@ -863,13 +966,13 @@ def main():
     parser.add_argument(
         "--example",
         type=str,
-        default="all",
+        default="oss_text_file",
         help="Test case name, category name, or 'all' to run all cases (default: all)",
     )
     parser.add_argument(
         "--mode",
         choices=["fast", "fine"],
-        default="fast",
+        default="fine",
         help="Processing mode: fast (quick) or fine (with LLM) (default: fast)",
     )
     parser.add_argument(

diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -707,6 +707,13 @@ def get_product_default_config() -> dict[str, Any]:
                         },
                     },
                     "chat_chunker": reader_config,
+                    "direct_markdown_hostnames": [
+                        h.strip()
+                        for h in os.getenv(
+                            "FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "139.196.232.20"
+                        ).split(",")
+                        if h.strip()
+                    ],
                 },
             },
             "enable_textual_memory": True,

diff --git a/src/memos/configs/mem_reader.py b/src/memos/configs/mem_reader.py
@@ -48,6 +48,12 @@ class SimpleStructMemReaderConfig(BaseMemReaderConfig):
 class MultiModalStructMemReaderConfig(BaseMemReaderConfig):
     """MultiModalStruct MemReader configuration class."""
 
+    direct_markdown_hostnames: list[str] | None = Field(
+        default=None,
+        description="List of hostnames that should return markdown directly without parsing. "
+        "If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES environment variable.",
+    )
+
 
 class StrategyStructMemReaderConfig(BaseMemReaderConfig):
     """StrategyStruct MemReader configuration class."""

diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py
@@ -29,7 +29,13 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
         """
         from memos.configs.mem_reader import SimpleStructMemReaderConfig
 
+        # Extract direct_markdown_hostnames before converting to SimpleStructMemReaderConfig
+        direct_markdown_hostnames = getattr(config, "direct_markdown_hostnames", None)
+
+        # Create config_dict excluding direct_markdown_hostnames for SimpleStructMemReaderConfig
         config_dict = config.model_dump(exclude_none=True)
+        config_dict.pop("direct_markdown_hostnames", None)
+
         simple_config = SimpleStructMemReaderConfig(**config_dict)
         super().__init__(simple_config)
 
@@ -38,6 +44,7 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
             embedder=self.embedder,
             llm=self.llm,
             parser=None,
+            direct_markdown_hostnames=direct_markdown_hostnames,
         )
 
     def _concat_multi_modal_memories(
@@ -271,7 +278,7 @@ def _process_multi_modal_data(
                 sources = fast_item.metadata.sources
                 for source in sources:
                     items = self.multi_modal_parser.process_transfer(
-                        source, context_items=[fast_item], custom_tags=custom_tags
+                        source, context_items=[fast_item], custom_tags=custom_tags, info=info
                     )
                     fine_memory_items.extend(items)
             return fine_memory_items