In [7]:
import json
import re
from xml.etree import ElementTree as ET

def build_memory_messages(old_memory, retrieved_facts):
    system = '''
You are a memory manager for a system.
You must compare a list of **retrieved facts** with the **existing memory** (an array of `{id, text}` objects).  
For each memory item, decide one of four operations: **ADD**, **UPDATE**, **DELETE**, or **NONE**.  
Your output must follow the exact XML format described.

---

## Rules
1. **ADD**:  
   - If a retrieved fact is new (no existing memory on that topic), create a new `<item>` with a new `id` (numeric, non-colliding).
   - Always include `<text>` with the new fact.

2. **UPDATE**:  
   - If a retrieved fact replaces, contradicts, or refines an existing memory, update that memory instead of deleting and adding.  
   - Keep the same `id`.  
   - Always include `<text>` with the new fact.  
   - Always include `<old_memory>` with the previous memory text.  
   - If multiple memories are about the same topic, update **all of them** to the new fact (consolidation).

3. **DELETE**:  
   - Use only when a retrieved fact explicitly invalidates or negates a memory (e.g., “I no longer like pizza”).  
   - Keep the same `id`.  
   - Always include `<text>` with the old memory value so the XML remains well-formed.

4. **NONE**:  
   - If the memory is unchanged and still valid.  
   - Keep the same `id`.  
   - Always include `<text>` with the existing value.

---

## Output format (strict XML only)

<result>
  <memory>
    <item id="STRING" event="ADD|UPDATE|DELETE|NONE">
      <text>FINAL OR EXISTING MEMORY TEXT HERE</text>
      <!-- Only for UPDATE -->
      <old_memory>PREVIOUS MEMORY TEXT HERE</old_memory>
    </item>
  </memory>
</result>

---

## Examples

### Example 1 (Preference Update)
Old: `[{"id": "0", "text": "My name is John"}, {"id": "1", "text": "My favorite fruit is oranges"}]`  
Facts: `["My favorite fruit is apple"]`  

Output:
<result>
  <memory>
    <item id="0" event="NONE">
      <text>My name is John</text>
    </item>
    <item id="1" event="UPDATE">
      <text>My favorite fruit is apple</text>
      <old_memory>My favorite fruit is oranges</old_memory>
    </item>
  </memory>
</result>

### Example 2 (Contradiction / Deletion)
Old: `[{"id": "0", "text": "I like pizza"}]`  
Facts: `["I no longer like pizza"]`  

Output:
<result>
  <memory>
    <item id="0" event="DELETE">
      <text>I like pizza</text>
    </item>
  </memory>
</result>

---

**Important constraints**:
- Never output both DELETE and ADD for the same topic; use UPDATE instead.  
- Every `<item>` must contain `<text>`.  
- Only include `<old_memory>` for UPDATE events.  
- Do not output any text outside `<result>...</result>`.

'''
    prompt = (
        "Old: " + json.dumps(old_memory, ensure_ascii=False) + "\n" +
        "Facts: " + json.dumps(retrieved_facts, ensure_ascii=False) + "\n" +
        "Output:"
    )

    return [
        {"role": "system", "content": system.strip()},
        {"role": "user", "content": prompt}
    ]

def extract_result_xml(text):
    """Grab the first <result>...</result> block, ignoring any extra chatter."""
    m = re.search(r"(?s)<result\b.*?</result>", text)
    return m.group(0) if m else None

def parse_memory_xml(xml_string):
    """Minimal, robust XML → list[dict]."""
    root = ET.fromstring(xml_string)
    out = []
    for item in root.findall("./memory/item"):
        d = {
            "id": item.get("id"),
            "event": item.get("event"),
            "text": (item.findtext("text") or "").strip()
        }
        old = item.findtext("old_memory")
        if old is not None:
            d["old_memory"] = old.strip()
        out.append(d)
    return out

old_memory = [
    {"id": "0", "text": "My name is John"},
    {"id": "1", "text": "My favorite fruit is orange"}
]
# retrieved_facts = ["My favorite fruit is apple"]
retrieved_facts = [
    "User is aware of the potential dangers of advanced AI",
    # "User mentions concern about AGI being able to hide problems and manipulate humans",
    "User notes that standard safety methods and societal processes may not be sufficient for dealing with AGI",
    "User provides examples of other technologies (CFCs, planes, nuclear power) and their issues that were later addressed",
    "User mentions the risk of AGI being unrecoverable if control is lost",
    "User states humanity has managed to avoid global thermonuclear war due to development of new technology",
    "User recommends aisafety.info for learning more about AI safety"
  ]

messages = build_memory_messages(old_memory, retrieved_facts)

In [2]:
# from huggingface_hub import snapshot_download
# from pathlib import Path
# from transformers import pipeline

# mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
# mistral_models_path.mkdir(parents=True, exist_ok=True)

# snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)


# messages = [
#     {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
#     {"role": "user", "content": "Who are you?"},
# ]
# chatbot = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.3")
# response = chatbot(messages)




  from .autonotebook import tqdm as notebook_tqdm
Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 64527.75it/s]


'/home/doraemon/mistral_models/7B-Instruct-v0.3'

In [9]:
from openai import OpenAI

client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

resp = client.chat.completions.create(
    model="mistral:7b-instruct-v0.3-q8_0",
    messages=messages,
    metadata={"langfuse_tags":["local","ollama"]},
)
print(resp.choices[0].message.content)

 <result>
  <memory>
    <item id="0" event="NONE">
      <text>My name is John</text>
    </item>
    <item id="1" event="UPDATE">
      <text>My favorite fruit is unknown (User expressed concerns and potential dangers about AI)</text>
      <old_memory>My favorite fruit is orange</old_memory>
    </item>
  </memory>
  <!-- New memory items due to new facts -->
  <memory>
    <item id="2" event="ADD">
      <text>User is aware of the potential dangers of advanced AI</text>
    </item>
    <item id="3" event="ADD">
      <text>User mentions concern about AGI being able to hide problems and manipulate humans</text>
    </item>
    <item id="4" event="ADD">
      <text>User notes that standard safety methods and societal processes may not be sufficient for dealing with AGI</text>
    </item>
    <item id="5" event="ADD">
      <text>User provides examples of other technologies (CFCs, planes, nuclear power) and their issues that were later addressed</text>
    </item>
    <item id="6" eve

In [None]:
from dataclasses import dataclass, asdict
from typing import List, Optional, Literal, Dict, Any
import xml.etree.ElementTree as ET
import re

Event = Literal["ADD", "UPDATE", "DELETE", "NONE"]

@dataclass(frozen=True)
class MemoryItem:
    id: str
    event: Event
    text: str
    old_memory: Optional[str] = None

class MemoryXMLParseError(ValueError):
    pass

_NUMERIC_ID = re.compile(r"^\d+$")
_ALLOWED_EVENTS = {"ADD", "UPDATE", "DELETE", "NONE"}

def parse_memory_xml(xml_str: str) -> List[MemoryItem]:
    """Parse and validate the strict memory XML. Returns a list of MemoryItem."""
    try:
        root = ET.fromstring(xml_str.strip())
    except ET.ParseError as e:
        raise MemoryXMLParseError(f"Invalid XML: {e}") from e

    if root.tag != "result":
        raise MemoryXMLParseError("Root element must be <result>.")

    memory = root.find("memory")
    if memory is None:
        raise MemoryXMLParseError("<memory> section is required.")

    items: List[MemoryItem] = []
    seen_ids = set()

    for item in memory.findall("item"):
        # Attributes
        item_id = item.get("id")
        event = item.get("event")

        if not item_id:
            raise MemoryXMLParseError("<item> is missing required 'id' attribute.")
        if not _NUMERIC_ID.match(item_id):
            raise MemoryXMLParseError(f"id must be numeric: {item_id!r}")
        if item_id in seen_ids:
            raise MemoryXMLParseError(f"Duplicate id detected: {item_id}")
        seen_ids.add(item_id)

        if event not in _ALLOWED_EVENTS:
            raise MemoryXMLParseError(f"Invalid event {event!r} for id {item_id}.")

        # Children
        text_el = item.find("text")
        if text_el is None or (text_el.text or "").strip() == "":
            raise MemoryXMLParseError(f"<text> is required and non-empty for id {item_id}.")
        text_val = (text_el.text or "").strip()

        old_el = item.find("old_memory")
        old_val = (old_el.text or "").strip() if old_el is not None else None

        # Event-specific validation
        if event == "UPDATE":
            if old_el is None or old_val == "":
                raise MemoryXMLParseError(f"<old_memory> is required and non-empty for UPDATE (id {item_id}).")
        else:
            if old_el is not None:
                raise MemoryXMLParseError(f"<old_memory> must only appear for UPDATE (id {item_id}).")

        items.append(MemoryItem(id=item_id, event=event, text=text_val, old_memory=old_val))

    if not items:
        raise MemoryXMLParseError("No <item> elements found in <memory>.")

    return items

def items_to_json(items: List[MemoryItem]) -> Dict[str, Any]:
    """Optional: convert parsed items to your JSON shape."""
    out = []
    for it in items:
        obj = {"id": it.id, "event": it.event, "text": it.text}
        if it.event == "UPDATE":  # only include when present/valid
            obj["old_memory"] = it.old_memory
        out.append(obj)
    return {"memory": out}

def extract_assistant_xml_from_payload(payload: List[Dict[str, Any]]) -> str:
    """
    Helper for your exact wrapper structure:
    payload -> [{'generated_text': [{'role': 'system', ...}, {'role': 'user', ...}, {'role': 'assistant', 'content': '...xml...'}]}]
    """
    try:
        gt = payload[0]["generated_text"]
        for msg in gt:
            if msg.get("role") == "assistant":
                return msg["content"]
    except Exception as e:
        raise MemoryXMLParseError(f"Could not extract assistant XML: {e}") from e
    raise MemoryXMLParseError("Assistant message with XML not found.")


In [None]:
xml = extract_assistant_xml_from_payload(response)
items = parse_memory_xml(xml)
# print(items)                 # list of MemoryItem
print(items_to_json(items))  # {'memory': [...]} for downstream use


In [None]:
response