In [4]:
import os
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')

In [13]:
system_prompt = """You are converting a technical document into a structured outline. This is NOT a summarization task - you must preserve ALL original text exactly.

GOAL: Break prose into bullet points and ARRANGE them hierarchically based on their semantic relationships. No synthesis, no summarization - just structural transformation.

RULES:
1. PRESERVE ALL TEXT: Copy every sentence, clause, parenthetical, and figure reference exactly as written. Do not paraphrase, shorten, or omit anything.

2. IDENTIFY ELEMENTS: When you encounter a new component/element/concept (e.g., "Substrates", "Control system", "Power supply"), create a top-level bullet:
   - Element: [Name]

3. DETECT NESTED ELEMENTS: Recognize when text describes sub-elements or components contained within another element:

   INCLUSION PATTERNS - these indicate nested elements. For example: 
   - "X can include Y" → Y is a nested element under X

   When detected, create a nested structure:
   - Element: [Parent Element Name]
     - [Full sentence about the parent]
       - Element: [Nested Element Name]
         - [Properties/details of nested element]

4. ORGANIZE HIERARCHICALLY:
   - Level 0: Top-level elements (components, systems, concepts)
   - Level 1 (under element): Main properties, functions, characteristics, or sentences introducing sub-elements
     * Use "Function:" prefix for functional descriptions
     * Keep complete sentences with all examples and parentheticals
   - Level 2: Examples, elaborations, alternatives, OR nested elements (triggered by "For example", "However", "Additionally", inclusion patterns, etc.)
   - Level 3+: Further nested details, properties of nested elements

5. FLATTEN NUMBERED LISTS: Convert numbered paragraphs into nested bullets, but preserve all their content.

6. SEMANTIC GROUPING: Group related sentences under the same bullet when they discuss the same property/aspect.

7. MAINTAIN CONTEXT: When a sentence introduces a nested element, keep the full sentence, then nest the element beneath it.

WORKFLOW:
1. Read current outline.md
2. Get next batch of paragraphs
3. Identify if batch introduces NEW element(s) or continues existing element
4. Check for nested element patterns (inclusion language)
5. Extract and organize ALL text into proper hierarchy with nested elements where appropriate
6. Write complete updated outline
7. Repeat

Example transformations:

Example 1 - Nested Elements:
Input: "The substrate is preferably flexible, but can alternatively be rigid. For example, the substrate can include flexible polymers such as polyimide."
Output:
- Element: Substrate
  - The substrate is preferably flexible, but can alternatively be rigid.
  - Example: the substrate can include flexible polymers such as polyimide.

Example 2 - Properties with nested elements:
Input: "The housing is preferably waterproof. The housing can include a sealing mechanism which functions to prevent moisture ingress."
Output:
- Element: Housing
  - The housing is preferably waterproof.
  - Element: Sealing mechanism
    - Function: Prevent moisture ingress.

CRITICAL: Every word, parenthetical, figure reference, and technical detail must appear in the output. When you detect nested element relationships, create the proper Element: [Name] structure at the appropriate nesting level."""

In [14]:
import anthropic
from docx import Document

def read_outline():
    try:
        with open("outline.md", "r") as f:
            return f.read()
    except FileNotFoundError:
        return "# Outline\n\n(empty)"

def write_outline(content):
    with open("outline.md", "w") as f:
        f.write(content)
    return "Outline updated successfully"

# Load document and filter empty paragraphs
doc = Document("input.docx")
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]

# Batch into groups of 3
batches = []
for i in range(0, len(paragraphs), 3):
    batch = paragraphs[i:i+3]
    batches.append("\n\n".join(batch))

client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
current_batch_idx = 0

tools = [
    {
        "name": "read_outline",
        "description": "Read the current state of outline.md",
        "input_schema": {"type": "object", "properties": {}}
    },
    {
        "name": "write_outline", 
        "description": "Write updated content to outline.md",
        "input_schema": {
            "type": "object",
            "properties": {
                "content": {"type": "string", "description": "Full markdown content"}
            },
            "required": ["content"]
        }
    },
    {
        "name": "get_next_batch",
        "description": "Get the next batch of paragraphs to process (up to 3 paragraphs)",
        "input_schema": {"type": "object", "properties": {}}
    }
]

messages = [{"role": "user", "content": "Start processing the document."}]
# Main agentic loop
while current_batch_idx < len(batches):
    response = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=16000,
        system=system_prompt,
        tools=tools,
        messages=messages
    )
    
    messages.append({"role": "assistant", "content": response.content})
    
    # Keep calling tools while they exist
    has_tool_calls = any(block.type == "tool_use" for block in response.content)
    
    if not has_tool_calls:
        # Agent is done, no more tool calls
        break
    
    while has_tool_calls:
        tool_results = []
        
        for block in response.content:
            if block.type == "tool_use":
                if block.name == "read_outline":
                    result = read_outline()
                elif block.name == "write_outline":
                    result = write_outline(block.input["content"])
                elif block.name == "get_next_batch":
                    if current_batch_idx < len(batches):
                        result = f"Batch {current_batch_idx + 1}/{len(batches)}:\n\n{batches[current_batch_idx]}"
                        current_batch_idx += 1
                    else:
                        result = "No more batches."
                
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": block.id,
                    "content": result
                })
        
        messages.append({"role": "user", "content": tool_results})
        
        # Get next response
        response = client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=16000,
            system=system_prompt,
            tools=tools,
            messages=messages
        )
        
        messages.append({"role": "assistant", "content": response.content})
        
        # Check if there are more tool calls
        has_tool_calls = any(block.type == "tool_use" for block in response.content)
    
    # No more tool calls for this iteration
    print(f"Processed batch {current_batch_idx}/{len(batches)}")

print("Complete! Check outline.md")

KeyboardInterrupt: 