# Coding Agent Feature Test Notebook

This notebook validates the core features implemented in the finance coding agent:
- session/checkpointed runner flow
- schema-driven multi-pass extraction
- task-level tool policy override
- section-aware retrieval tooling
- JSONL memory artifact generation


In [None]:
from pathlib import Path
import json
import tempfile
import os

ROOT = Path.cwd().resolve()
SRC = ROOT / 'src'
if str(SRC) not in os.sys.path:
    os.sys.path.insert(0, str(SRC))

ROOT

In [None]:
from profiles.finance_docs import build_finance_docs_profile
from agent_core.runner import GenericHeadlessAgent
from agent_core.models import TaskRequest

profile = build_finance_docs_profile()
runner = GenericHeadlessAgent(profile.registry, profile.policy)
sorted(profile.registry.tools.keys())

## 1) Interactive Response Flow (Credit Agreement)

In [None]:
credit_doc = ROOT / 'examples' / 'sample_credit_agreement.txt'
response, trace = runner.respond(
    instruction='Answer with evidence and extract key terms',
    documents=[credit_doc],
    query='What is the maturity date?',
    metadata={
        'document_type': 'credit_agreement',
        'skill_pack': 'finance-docs',
        'tool_policy_override': {'deny': ['safe_bash']}
    }
)
print(response[:2000])
print('\ntrace events:', len(trace))

In [None]:
tool_events = [e for e in trace if e.get('event') in {'tool_started', 'tool_finished', 'checkpoint'}]
tool_events[:8]

## 2) Headless Run + Artifact Validation (JSONL memory + session trace)

In [None]:
tmp_dir = Path(tempfile.mkdtemp(prefix='agent-feature-run-'))
task = TaskRequest(
    instruction='Extract terms and answer questions with reasons',
    documents=[credit_doc],
    questions=[
        'What is the facility amount?',
        'What are the financial covenants?',
        'What is the maturity date?'
    ],
    output_modes=['report', 'json'],
    metadata={
        'document_type': 'credit_agreement',
        'skill_pack': 'finance-docs',
        'tool_policy_override': {'deny': ['safe_bash']}
    }
)
run_result = runner.run(task=task, output_dir=tmp_dir)
print('success:', run_result.success)
print('output_dir:', tmp_dir)
[str(a.path) for a in run_result.artifacts]

In [None]:
trace_path = tmp_dir / 'run_trace.json'
extraction_path = tmp_dir / 'extraction.json'
memory_path = tmp_dir / 'memory.jsonl'

trace_payload = json.loads(trace_path.read_text())
extract_payload = json.loads(extraction_path.read_text())
memory_lines = memory_path.read_text().strip().splitlines()

print('trace rows:', len(trace_payload))
print('checkpoints:', [e.get('step') for e in trace_payload if e.get('event') == 'checkpoint'])
print('document_type:', extract_payload['extraction'].get('document_type'))
print('consistency:', extract_payload['extraction'].get('consistency'))
print('memory.jsonl lines:', len(memory_lines))
print('first memory row keys:', list(json.loads(memory_lines[0]).keys()) if memory_lines else [])

## 3) Document-Type Switch Test (Compliance Certificate schema)

In [None]:
compliance_text = '''
COMPLIANCE CERTIFICATE
For the period ended March 31, 2026
Total Leverage Ratio: 3.25x
The Borrower is in compliance with all financial covenants.
'''
compliance_file = tmp_dir / 'sample_compliance_certificate.txt'
compliance_file.write_text(compliance_text)

response2, trace2 = runner.respond(
    instruction='Extract compliance certificate terms',
    documents=[compliance_file],
    query='What is the leverage ratio and compliance status?',
    metadata={'document_type': 'compliance_certificate', 'tool_policy_override': {'deny': ['safe_bash']}}
)
print(response2[:1500])

## 4) Section-Aware Retrieval Tools

In [None]:
load_documents = profile.registry.resolve('load_documents')
build_doc_map = profile.registry.resolve('build_doc_map')
chunk_doc_map_sections = profile.registry.resolve('chunk_doc_map_sections')
build_chunk_index = profile.registry.resolve('build_chunk_index')
retrieve_chunks = profile.registry.resolve('retrieve_chunks')

store = load_documents.run(documents=[str(credit_doc)])
doc_map = build_doc_map.run(document_store=store)
section_chunks = chunk_doc_map_sections.run(doc_map=doc_map, max_chars=500)
index = build_chunk_index.run(chunks=section_chunks)
hits = retrieve_chunks.run(query='Applicable Margin means', index=index, top_k=5)

print('section-aware chunks:', len(section_chunks))
hits[:3]

## 5) Tool Policy Override Test (deny should win)

In [None]:
try:
    runner.respond(
        instruction='Policy test',
        documents=[credit_doc],
        query='What is facility amount?',
        metadata={'tool_policy_override': {'allow': ['safe_bash'], 'deny': ['safe_bash']}}
    )
    print('respond completed; deny rule still applied if tool is called')
except Exception as exc:
    print('error:', str(exc))

print('Policy override test completed.')