# Agentspec Example Builder

## ‚ö†Ô∏è WORKFLOW (FOLLOW IN ORDER)

1. **Run Cell 1** (Imports & Setup)
2. **Run Cell 2** ‚Üê TEXT INPUT BOXES WILL APPEAR HERE
3. **Type/Paste in the boxes** that appear after Cell 2
4. **Run Cell 3** (API call)
5. **Run Cell 4** (Parse & Save)

### Step by step:
- Click **Cell 1** ‚Üí Press **Shift+Enter**
- Click **Cell 2** ‚Üí Press **Shift+Enter** 
- **Two text boxes will appear below Cell 2**
- Click in a box and paste your content
- Click **Cell 3** ‚Üí Press **Shift+Enter**
- Click **Cell 4** ‚Üí Press **Shift+Enter**

In [1]:
from IPython.display import display
from agentspec.notebook_ui import create_ui_only_toggle

ui_toggle_top = create_ui_only_toggle("UI only")
display(ui_toggle_top)


Button(description='UI only', layout=Layout(width='auto'), style=ButtonStyle(), tooltip='Toggle UI-only view')

In [2]:
import os, json, pathlib, requests
from dotenv import load_dotenv, find_dotenv

# Load .env (walk up from CWD to repo root)
env_path = find_dotenv(filename=".env", usecwd=True)
if env_path:
    load_dotenv(env_path)

# Read config from env with sane defaults
PROVIDER = os.getenv("PROVIDER", os.getenv("AGENTSPEC_PROVIDER", "ollama"))
MODEL = os.getenv("MODEL", os.getenv("AGENTSPEC_MODEL", "qwen3-coder:30b"))
BASE_URL = os.getenv("BASE_URL", os.getenv("OPENAI_BASE_URL", "http://localhost:11434/v1"))
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.0"))
MAX_OUTPUT_TOKENS = int(os.getenv("MAX_OUTPUT_TOKENS", "1500"))

# API keys (don‚Äôt print these)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")

# Prompts dir (relative still fine)
PROMPTS_DIR = pathlib.Path("../agentspec/prompts")
SYSTEM_PROMPT = (PROMPTS_DIR / "agentspec_yaml.md").read_text(encoding="utf-8")
INSTRUCTION_TEMPLATE = (PROMPTS_DIR / "example_builder_instructions.md").read_text(encoding="utf-8")
GRAMMAR = (PROMPTS_DIR / "agentspec_yaml.lark").read_text(encoding="utf-8")

print("‚úì Ready (env loaded from:", env_path or "none", ")")

‚úì Ready (env loaded from: /Users/davidmontgomery/agentspec/.env )


In [3]:
# PREFILL EXAMPLE 1: Chat Completions API mistake
PREFILL_EXAMPLE = '''def generate_documentation(code: str, model: str = "gpt-4o-mini") -> str:
    """Generate documentation."""
    import openai
    client = openai.OpenAI()
    response = client.chat.completions.create(
        model=model,
        max_tokens=2000,
        messages=[{"role": "system", "content": "Generate docs"}, {"role": "user", "content": code}]
    )
    return response.choices[0].message.content'''

PREFILL_CRITIQUE = "Bad agent would say this generates documentation correctly. Good documentation should warn this uses DEPRECATED chat.completions.create instead of responses.create, wrong model gpt-4o-mini instead of gpt-5, and wrong parameter max_tokens instead of max_output_tokens. Should add ASK USER guardrail before changing APIs."

from IPython.display import display, HTML, Javascript
from ipywidgets import Textarea, VBox, HBox, Button, HTML as WidgetHTML, Layout

# CSS inside a hidden HTML widget to avoid extra output spacing
style_widget = WidgetHTML('''
<style>
/* Root container styling */
.dark-notebook {
    background-color: #1e1e1e !important;
    border-radius: 4px !important;
}

/* Headings and paragraphs */
.dark-notebook .widget-html-content h3 {
    color: #e0e0e0 !important;
    margin: 0 0 6px 0 !important;
}
.dark-notebook .widget-html-content p {
    color: #b0b0b0 !important;
    margin: 0 0 8px 0 !important;
}

/* Textareas */
.dark-notebook .widget-textarea textarea {
    background-color: #2d2d2d !important;
    color: #e0e0e0 !important;
    border: 1px solid #444 !important;
    font-family: monospace !important;
    caret-color: #e0e0e0 !important;
}
.dark-notebook .widget-textarea textarea:focus {
    background-color: #333 !important;
    border-color: #666 !important;
    outline: none;
}
.dark-notebook .widget-textarea textarea::placeholder {
    color: #666 !important;
}

/* UI-only mode: hide all non-UI cells when toggled */
.ui-only .jp-Cell:not(:has(.dark-notebook)) { display: none !important; }
.ui-only .cell:not(:has(.dark-notebook)) { display: none !important; }
.ui-only .jp-OutputArea-output:has(.dark-notebook) { padding: 0 !important; }
</style>
''')

# Create input widgets with prefilled values
example_label = WidgetHTML('<h3>üìù Example</h3><p>Paste code or agentspec (Python, JS, TS, or description):</p>')
example_input = Textarea(value=PREFILL_EXAMPLE, rows=10, layout=Layout(width='100%', margin='0 0 12px 0'))

critic_label = WidgetHTML('<h3>üí¨ Critique</h3><p>Paste 1-2 sentences:</p>')
critic_input = Textarea(value=PREFILL_CRITIQUE, rows=5, layout=Layout(width='100%', margin='0'))

status_label = WidgetHTML('<p style="color:#b0b0b0;margin:8px 0 0 0;">‚úì Ready to input. Type in the boxes above, then run Cell 3.</p>')

# Toggle button to switch UI-only mode (reusable, tested)
from agentspec.notebook_ui import create_ui_only_toggle
ui_toggle = create_ui_only_toggle("UI only")

# Container with uniform padding and no margins
container = VBox(
    [style_widget, example_label, example_input, critic_label, critic_input, status_label],
    layout=Layout(
        width='100%',
        padding='12px',  # uniform on all sides
        margin='0',
        display='flex',
        flex_flow='column nowrap'
    )
)
# Apply class for background + radius via CSS
container.add_class('dark-notebook')

# Display single output (no extra spacing above)
display(container)

VBox(children=(HTML(value='\n<style>\n/* Root container styling */\n.dark-notebook {\n    background-color: #1‚Ä¶

In [4]:
# Extract values from widgets
EXAMPLE_TEXT = example_input.value
CRITIQUE = critic_input.value

if not EXAMPLE_TEXT or not CRITIQUE:
    print('ERROR: Both fields must be filled')
else:
    print(f'‚úì Example: {len(EXAMPLE_TEXT)} chars')
    print(f'‚úì Critique: {len(CRITIQUE)} chars')
    
    # Build user message
    USER_MESSAGE = f"{INSTRUCTION_TEMPLATE}\n\nEXAMPLE:\n```\n{EXAMPLE_TEXT}\n```\n\nCRITIQUE:\n{CRITIQUE}\n\nReturn a single JSON object only."
    
    # API call functions
    def call_openai_responses(model, base_url, system, user, api_key, grammar, temp, max_tokens):
        url = base_url.rstrip('/') + '/responses'
        headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
        payload = {
            'model': model,
            'input': f'{system}\n\n{user}',
            'temperature': temp,
            'max_output_tokens': max_tokens,
            'tools': [{'type': 'custom', 'name': 'agentspec_yaml', 'format': {'type': 'grammar', 'syntax': 'lark', 'definition': grammar}}]
        }
        r = requests.post(url, headers=headers, json=payload, timeout=120)
        r.raise_for_status()
        return r.json().get('output_text') or r.json().get('text') or ''
    
    def call_anthropic(model, system, user, api_key, temp, max_tokens):
        url = 'https://api.anthropic.com/v1/messages'
        headers = {'x-api-key': api_key, 'anthropic-version': '2023-06-01', 'Content-Type': 'application/json'}
        payload = {'model': model, 'max_tokens': max_tokens, 'temperature': temp, 'messages': [{'role': 'user', 'content': f'{system}\n\n{user}'}]}
        r = requests.post(url, headers=headers, json=payload, timeout=120)
        r.raise_for_status()
        try:
            return r.json()['content'][0]['text']
        except:
            return json.dumps(r.json())
    
    def call_ollama_chat(base_url, model, system, user, temp, max_tokens):
        url = base_url.rstrip('/') + '/chat/completions'
        payload = {'model': model, 'messages': [{'role': 'system', 'content': system}, {'role': 'user', 'content': user}], 'temperature': temp, 'max_tokens': max_tokens}
        r = requests.post(url, json=payload, timeout=120)
        r.raise_for_status()
        try:
            return r.json()['choices'][0]['message']['content']
        except:
            return json.dumps(r.json())
    
    # Call API
    if PROVIDER == 'openai':
        assert OPENAI_API_KEY, 'Set OPENAI_API_KEY'
        print(f'üîπ Using OpenAI /responses')
        RAW = call_openai_responses(MODEL, BASE_URL, SYSTEM_PROMPT, USER_MESSAGE, OPENAI_API_KEY, GRAMMAR, TEMPERATURE, MAX_OUTPUT_TOKENS)
    elif PROVIDER == 'anthropic':
        assert ANTHROPIC_API_KEY, 'Set ANTHROPIC_API_KEY'
        print(f'üîπ Using Anthropic')
        RAW = call_anthropic(MODEL, SYSTEM_PROMPT, USER_MESSAGE, ANTHROPIC_API_KEY, TEMPERATURE, MAX_OUTPUT_TOKENS)
    else:
        print(f'üîπ Using Ollama at {BASE_URL}')
        RAW = call_ollama_chat(BASE_URL, MODEL, SYSTEM_PROMPT, USER_MESSAGE, TEMPERATURE, MAX_OUTPUT_TOKENS)
    
    print('\nüìÑ Response:')
    print(RAW)

‚úì Example: 401 chars
‚úì Critique: 319 chars
üîπ Using Ollama at http://localhost:11434/v1



üìÑ Response:
```json
{
  "version": "1.0",
  "last_updated": "2025-11-02",
  "examples": [
    {
      "id": "deprecated_api_call",
      "type": "negative_then_positive",
      "language": "python",
      "code": "def generate_documentation(code: str, model: str = \"gpt-4o-mini\") -> str:\n    import openai\n    client = openai.OpenAI()\n    response = client.chat.completions.create(\n        model=model,\n        max_tokens=2000,\n        messages=[{\"role\": \"system\", \"content\": \"Generate docs\"}, {\"role\": \"user\", \"content\": code}]\n    )\n    return response.choices[0].message.content",
      "code_context": {
        "file": "utils/documentation_generator.py",
        "function": "generate_documentation",
        "subject_function": "openai.chat.completions.create"
      },
      "bad_documentation": {
        "what": "This function correctly generates documentation using the OpenAI API with optimal parameters and model selection",
        "why_bad": "Overstates the 

In [5]:
import re

def extract_json(text):
    text = re.sub(r'^```json\s*', '', text, flags=re.MULTILINE)
    text = re.sub(r'```\s*$', '', text, flags=re.MULTILINE)
    text = text.strip()
    if text.startswith('{') and text.endswith('}'):
        return json.loads(text)
    m = re.search(r'\{[\s\S]*\}', text)
    if m:
        return json.loads(m.group(0))
    raise ValueError('No JSON found')

OBJ = extract_json(RAW)
EX = OBJ['examples'][0] if ('examples' in OBJ and OBJ['examples']) else OBJ

if 'code' in EX and 'code_snippet' not in EX:
    EX['code_snippet'] = EX.pop('code')

print('‚úì Parsed JSON:\n')
print(json.dumps(EX, indent=2))

APPEND = False
if APPEND:
    examples_path = pathlib.Path('../agentspec/prompts/examples.json')
    data = json.loads(examples_path.read_text(encoding='utf-8'))
    data.setdefault('examples', []).append(EX)
    examples_path.write_text(json.dumps(data, indent=2), encoding='utf-8')
    print(f'\n‚úì Appended to {examples_path}')

‚úì Parsed JSON:

{
  "id": "deprecated_api_call",
  "type": "negative_then_positive",
  "language": "python",
  "code_context": {
    "file": "utils/documentation_generator.py",
    "function": "generate_documentation",
    "subject_function": "openai.chat.completions.create"
  },
  "bad_documentation": {
    "what": "This function correctly generates documentation using the OpenAI API with optimal parameters and model selection",
    "why_bad": "Overstates the accuracy of the implementation; it uses deprecated API calls, incorrect model, and wrong token parameter"
  },
  "good_documentation": {
    "what": "Uses deprecated openai.ChatCompletion.create() API instead of the modern client.responses.create(). Uses outdated gpt-4o-mini model and max_tokens instead of max_output_tokens.",
    "why": "This function demonstrates an outdated integration with OpenAI's API. It is not recommended for production use due to API deprecation and suboptimal parameter choices.",
    "guardrails": [
  