In [None]:
# nb29_memory_and_scratchpad.ipynb
# Stage 3: Memory and Scratchpad for Multi-Step Reasoning


import os
import pathlib
import torch
import json
import time
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
from enum import Enum
import asyncio
import logging

import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

In [None]:
## Cell 2: Import Dependencies
import json
import time
from datetime import datetime
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
from collections import deque
import re

# For LLM (minimal transformers setup)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [None]:
## Cell 3: Memory Entry Data Structure
@dataclass
class MemoryEntry:
    """Single memory entry with metadata"""

    timestamp: str
    step_id: int
    action_type: str  # "thought", "action", "observation", "note"
    content: str
    importance: float = 0.5  # 0.0 to 1.0
    tags: List[str] = None

    def __post_init__(self):
        if self.tags is None:
            self.tags = []

    def to_dict(self) -> Dict[str, Any]:
        return asdict(self)

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "MemoryEntry":
        return cls(**data)

In [None]:
## Cell 4: Scratchpad Memory Manager
class ScratchpadMemory:
    """Manages short-term memory with capacity limits and importance scoring"""

    def __init__(self, max_entries: int = 20, max_tokens: int = 1500):
        self.max_entries = max_entries
        self.max_tokens = max_tokens
        self.entries: deque = deque(maxlen=max_entries)
        self.current_step = 0

    def add_entry(
        self,
        action_type: str,
        content: str,
        importance: float = 0.5,
        tags: List[str] = None,
    ) -> MemoryEntry:
        """Add new memory entry"""
        entry = MemoryEntry(
            timestamp=datetime.now().isoformat(),
            step_id=self.current_step,
            action_type=action_type,
            content=content,
            importance=importance,
            tags=tags or [],
        )
        self.entries.append(entry)
        self._manage_capacity()
        return entry

    def _manage_capacity(self):
        """Remove low-importance entries if over token limit"""
        while len(self.entries) > 0 and self._estimate_tokens() > self.max_tokens:
            # Remove least important entry
            min_entry = min(self.entries, key=lambda x: x.importance)
            self.entries.remove(min_entry)

    def _estimate_tokens(self) -> int:
        """Rough token estimation for all entries"""
        total_chars = sum(len(entry.content) for entry in self.entries)
        return total_chars // 4  # Rough estimate: 4 chars per token

    def get_context_summary(self, include_tags: List[str] = None) -> str:
        """Generate context summary for LLM prompt"""
        if not self.entries:
            return "## Memory: Empty\n"

        summary_lines = ["## Recent Memory:"]

        # Filter by tags if specified
        relevant_entries = list(self.entries)
        if include_tags:
            relevant_entries = [
                e for e in self.entries if any(tag in e.tags for tag in include_tags)
            ]

        # Sort by importance (descending) and recency
        relevant_entries.sort(key=lambda x: (x.importance, x.step_id), reverse=True)

        for entry in relevant_entries[:10]:  # Top 10 most relevant
            summary_lines.append(
                f"Step {entry.step_id} [{entry.action_type}]: {entry.content}"
            )

        return "\n".join(summary_lines) + "\n"

    def add_thought(self, thought: str, importance: float = 0.6):
        return self.add_entry("thought", thought, importance, ["reasoning"])

    def add_action(self, action: str, importance: float = 0.7):
        return self.add_entry("action", action, importance, ["execution"])

    def add_observation(self, observation: str, importance: float = 0.8):
        return self.add_entry("observation", observation, importance, ["result"])

    def add_note(self, note: str, importance: float = 0.9):
        return self.add_entry("note", note, importance, ["important"])

    def next_step(self):
        """Increment step counter"""
        self.current_step += 1

    def export_history(self) -> List[Dict]:
        """Export all entries as JSON-serializable list"""
        return [entry.to_dict() for entry in self.entries]

In [None]:
## Cell 5: Key Information Extractor
class KeyInfoExtractor:
    """Extract important information from text for memory storage"""

    def __init__(self):
        # Patterns for detecting important information
        self.important_patterns = [
            r'result:\s*(.+)',
            r'answer:\s*(.+)',
            r'found:\s*(.+)',
            r'error:\s*(.+)',
            r'(\d+\.?\d*)\s*(dollars?|USD|\$)',  # Money amounts
            r'(\d{4}-\d{2}-\d{2})',  # Dates
            r'(\w+@\w+\.\w+)',  # Emails
            r'(https?://\S+)',  # URLs
        ]

    def extract_key_info(self, text: str) -> List[str]:
        """Extract key information pieces from text"""
        key_info = []

        # Extract using patterns
        for pattern in self.important_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            if matches:
                key_info.extend([str(match) for match in matches])

        # Extract sentences with high-value keywords
        high_value_keywords = [
            'result', 'answer', 'found', 'discovered', 'calculated',
            'error', 'failed', 'succeeded', 'completed', 'important'
        ]

        sentences = text.split('.')
        for sentence in sentences:
            if any(keyword in sentence.lower() for keyword in high_value_keywords):
                key_info.append(sentence.strip())

        return key_info[:5]  # Limit to top 5 pieces

    def calculate_importance(self, text: str) -> float:
        """Calculate importance score based on content"""
        importance = 0.5  # Base importance

        # Boost for error/success indicators
        if any(word in text.lower() for word in ['error', 'failed', 'exception']):
            importance += 0.3
        if any(word in text.lower() for word in ['success', 'completed', 'found']):
            importance += 0.2

        # Boost for numbers and specific data
        if re.search(r'\d+', text):
            importance += 0.1

        # Boost for longer, detailed content
        if len(text) > 100:
            importance += 0.1

        return min(importance, 1.0)

In [None]:
## Cell 6: Memory-Enhanced ReAct Agent
class MemoryReActAgent:
    """ReAct agent with scratchpad memory"""

    def __init__(self, model_id: str = "Qwen/Qwen2.5-7B-Instruct"):
        # Load model with low-VRAM settings
        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",
            torch_dtype=torch.float16,
            load_in_8bit=True,  # For VRAM efficiency
        )

        self.memory = ScratchpadMemory(max_entries=25, max_tokens=2000)
        self.extractor = KeyInfoExtractor()

        # Available tools (simplified for demo)
        self.tools = {"calculator": self._calculator, "memory_note": self._memory_note}

    def _calculator(self, expression: str) -> str:
        """Safe calculator tool"""
        try:
            # Simple safety check
            if any(char in expression for char in ["import", "exec", "eval", "__"]):
                return "Error: Unsafe expression"

            result = eval(expression)
            return f"Result: {result}"
        except Exception as e:
            return f"Error: {str(e)}"

    def _memory_note(self, note: str) -> str:
        """Add important note to memory"""
        self.memory.add_note(note, importance=0.9)
        return f"Note saved to memory: {note}"

    def _generate_response(self, prompt: str, max_tokens: int = 150) -> str:
        """Generate response using local LLM"""
        inputs = self.tokenizer(
            prompt, return_tensors="pt", truncation=True, max_length=2048
        )
        inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract only the new part
        prompt_length = len(
            self.tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
        )
        return response[prompt_length:].strip()

    def solve_task(self, task: str, max_iterations: int = 5) -> Dict[str, Any]:
        """Solve task using memory-enhanced ReAct"""
        self.memory.add_entry(
            "task", f"Starting task: {task}", importance=1.0, tags=["task"]
        )

        iteration = 0
        final_answer = None

        while iteration < max_iterations:
            # Build prompt with memory context
            memory_context = self.memory.get_context_summary()

            prompt = f"""You are a helpful assistant with access to tools and memory.

{memory_context}

Current Task: {task}

Available tools: calculator(expression), memory_note(note)

Think step by step. Use this format:
Thought: [your reasoning]
Action: [tool_name(arguments) or FINISH]
Observation: [tool result]

Continue until you can provide a final answer."""

            # Generate thought/action
            response = self._generate_response(prompt, max_tokens=200)

            # Parse response
            thought_match = re.search(
                r"Thought:\s*(.+?)(?=Action:|$)", response, re.DOTALL
            )
            action_match = re.search(
                r"Action:\s*(.+?)(?=Observation:|$)", response, re.DOTALL
            )

            if thought_match:
                thought = thought_match.group(1).strip()
                self.memory.add_thought(thought)
                print(f"💭 Thought: {thought}")

            if action_match:
                action = action_match.group(1).strip()
                self.memory.add_action(action)
                print(f"🔧 Action: {action}")

                # Execute action
                if action.upper() == "FINISH":
                    final_answer = thought if thought_match else "Task completed"
                    break

                # Parse and execute tool call
                observation = self._execute_action(action)
                self.memory.add_observation(observation)
                print(f"👁️ Observation: {observation}")

                # Extract key information for memory
                key_info = self.extractor.extract_key_info(observation)
                for info in key_info:
                    importance = self.extractor.calculate_importance(info)
                    self.memory.add_entry("key_info", info, importance, ["extracted"])

            self.memory.next_step()
            iteration += 1
            print(f"--- Step {iteration} completed ---\n")

        return {
            "final_answer": final_answer or "Max iterations reached",
            "iterations": iteration,
            "memory_entries": len(self.memory.entries),
            "memory_summary": self.memory.get_context_summary(),
            "full_history": self.memory.export_history(),
        }

    def _execute_action(self, action: str) -> str:
        """Execute tool action"""
        # Parse tool call
        tool_match = re.match(r"(\w+)\((.+)\)", action)
        if not tool_match:
            return "Error: Invalid action format"

        tool_name, args = tool_match.groups()

        if tool_name not in self.tools:
            return f"Error: Unknown tool '{tool_name}'"

        try:
            # Simple argument parsing (remove quotes)
            args = args.strip("'\"")
            return self.tools[tool_name](args)
        except Exception as e:
            return f"Error executing {tool_name}: {str(e)}"

In [None]:
## Cell 7: Smoke Test - Basic Memory Operations
print("=== Testing Basic Memory Operations ===")

# Test memory entry creation
memory = ScratchpadMemory(max_entries=5, max_tokens=500)

# Add different types of entries
memory.add_thought("I need to solve a math problem")
memory.add_action("calculator(2 + 3)")
memory.add_observation("Result: 5")
memory.add_note("The answer is 5")

print("Memory entries:", len(memory.entries))
print("\nMemory summary:")
print(memory.get_context_summary())

# Test capacity management
print("\n=== Testing Capacity Management ===")
for i in range(10):
    memory.add_entry("test", f"Long test entry {i} " * 20, importance=0.1)

print("Entries after capacity management:", len(memory.entries))
print("Estimated tokens:", memory._estimate_tokens())



In [None]:
## Cell 8: Smoke Test - Key Information Extraction
print("\n=== Testing Key Information Extraction ===")

extractor = KeyInfoExtractor()

test_text = """
I searched the web and found that the price is $29.99.
The result was successful and the email contact is support@example.com.
Error: Connection timeout occurred.
The date is 2024-01-15 and the calculation result: 142.5
"""

key_info = extractor.extract_key_info(test_text)
print("Key information extracted:")
for i, info in enumerate(key_info, 1):
    importance = extractor.calculate_importance(info)
    print(f"{i}. {info} (importance: {importance:.2f})")

In [None]:
## Cell 9: Smoke Test - Memory-Enhanced ReAct
print("\n=== Testing Memory-Enhanced ReAct ===")

# Note: This is a minimal test. Full test requires actual model loading
agent = MemoryReActAgent()

# Test with a simple task
task = "Calculate 15 * 8 and remember the result"

print(f"Task: {task}")
print("\nStarting ReAct with memory...")

# For demo purposes, we'll simulate the process without actual LLM calls
print("💭 Thought: I need to calculate 15 * 8")
agent.memory.add_thought("I need to calculate 15 * 8")

print("🔧 Action: calculator(15 * 8)")
result = agent._calculator("15 * 8")
agent.memory.add_action("calculator(15 * 8)")

print(f"👁️ Observation: {result}")
agent.memory.add_observation(result)

print("🔧 Action: memory_note(15 * 8 = 120)")
note_result = agent._memory_note("15 * 8 = 120")
print(f"👁️ Observation: {note_result}")

print("\nFinal memory summary:")
print(agent.memory.get_context_summary())

print("\n✅ Smoke test completed!")

In [None]:
## Cell 10: Advanced Features Demo
print("\n=== Advanced Memory Features ===")

# Test memory filtering by tags
memory_advanced = ScratchpadMemory()

# Add entries with different tags
memory_advanced.add_entry(
    "action", "Searched for weather", importance=0.6, tags=["search", "weather"]
)
memory_advanced.add_entry(
    "result", "Temperature is 25°C", importance=0.8, tags=["weather", "data"]
)
memory_advanced.add_entry(
    "action", "Calculated sum", importance=0.5, tags=["math", "calculation"]
)
memory_advanced.add_entry(
    "error", "Network timeout", importance=0.9, tags=["error", "network"]
)

print("All entries:")
print(memory_advanced.get_context_summary())

print("\nWeather-related entries only:")
print(memory_advanced.get_context_summary(include_tags=["weather"]))

print("\nError entries only:")
print(memory_advanced.get_context_summary(include_tags=["error"]))

# Test memory export
print("\nMemory export (JSON):")
exported = memory_advanced.export_history()
print(json.dumps(exported[0], indent=2))  # Show first entry

print("\n=== Memory and Scratchpad Demo Complete ===")

In [None]:
## Cell 11: Key Parameters and Configuration
print("\n=== Key Parameters for Production Use ===")

config = {
    "memory": {
        "max_entries": 25,  # Adjust based on context window
        "max_tokens": 2000,  # Token budget for memory
        "importance_threshold": 0.3,  # Minimum importance to keep
    },
    "extraction": {
        "max_key_info": 5,  # Max pieces per observation
        "importance_boost": {
            "error": 0.3,  # Boost for errors
            "success": 0.2,  # Boost for success indicators
            "numbers": 0.1,  # Boost for numerical data
        },
    },
    "model": {
        "max_new_tokens": 200,  # For memory-aware generation
        "temperature": 0.7,  # Balance creativity/consistency
        "load_in_8bit": True,  # For VRAM efficiency
    },
}

print("Production configuration:")
print(json.dumps(config, indent=2))

print("\n📝 Low-VRAM options:")
print("- Use load_in_8bit or load_in_4bit")
print("- Reduce max_entries to 15-20")
print("- Lower max_tokens to 1000-1500")
print("- Use CPU offload for embeddings")

In [None]:
## Cell 12: When to Use This Pattern
print("\n=== When to Use Memory and Scratchpad ===")

use_cases = [
    "✅ Multi-step calculations with intermediate results",
    "✅ Research tasks requiring information synthesis",
    "✅ Debugging scenarios where error tracking is crucial",
    "✅ Long conversations where context matters",
    "✅ Sequential problem-solving with dependencies",
    "❌ Simple, single-step tasks",
    "❌ Stateless API endpoints with no session",
    "❌ Tasks with very limited context windows",
]

print("Use cases:")
for case in use_cases:
    print(f"  {case}")

print("\n🔧 Integration notes:")
print("- Memory persists within a session/conversation")
print("- Export/import for session restoration")
print("- Combine with RAG for long-term knowledge")
print("- Use importance scoring to prioritize relevant info")
print("- Consider privacy: don't store sensitive data")

print("\n🎯 Ready for Stage 4 multi-agent coordination!")

In [None]:
# 基本記憶操作
memory = ScratchpadMemory(max_entries=5, max_tokens=500)
memory.add_thought("需要解決數學問題")
memory.add_action("calculator(2 + 3)")
memory.add_observation("結果：5")

# 容量管理測試
for i in range(10):
    memory.add_entry("test", f"長測試條目 {i}", importance=0.1)

# 關鍵資訊提取
extractor = KeyInfoExtractor()
key_info = extractor.extract_key_info("結果：$29.99，錯誤：連線逾時")