In [None]:
# Setup
import sys
from pathlib import Path

notebook_dir = Path.cwd()
project_root = notebook_dir.parent if notebook_dir.name == 'notebooks' else notebook_dir
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from core.policy_context_framework import (
    PolicyContextFramework,
    PolicyDomain,
)

import json

print("‚úÖ Policy extraction framework loaded!")

---

## 1. The Challenge

Policy documents are complex:
- **Length**: Bills can be hundreds of pages
- **Language**: Legal/technical jargon
- **Structure**: Multiple sections, amendments, references
- **Implicit mechanisms**: Not always explicitly stated

### Manual Analysis Problems
- Time-consuming (days to weeks per bill)
- Inconsistent between analysts
- Difficult to compare across policies
- Hard to update when bills change

---

## 2. Context-Aware Framework

PoliSim's extraction framework:

```
Policy Text
    ‚Üì
[Identify Domain] ‚Üí Healthcare? Tax? Social Security?
    ‚Üì
[Extract Concepts] ‚Üí Funding, coverage, timeline
    ‚Üì
[Compose Themes] ‚Üí Group related mechanisms
    ‚Üì
[Extract Quantities] ‚Üí Parse %, $, dates
    ‚Üì
[Validate] ‚Üí Check completeness
    ‚Üì
Structured Mechanics
```

In [None]:
# Initialize framework
framework = PolicyContextFramework()

print("üìã Supported Policy Domains:")
for domain in PolicyDomain:
    print(f"   ‚Ä¢ {domain.value}")

In [None]:
# Example: Extract from Medicare-for-All text
m4a_sample = """
Section 1. Medicare-for-All Program

(a) UNIVERSAL COVERAGE - All United States residents shall be entitled 
to benefits under this Act, effective January 1, 2027.

(b) FUNDING - The program shall be funded through:
    (1) A 7.5% payroll tax on employers
    (2) A 4% income tax on households earning above $29,000
    (3) Elimination of private insurance subsidies ($500B annually)
    (4) Administrative savings of 15% compared to current system

(c) BENEFITS - Covered services include:
    (1) Hospital and outpatient care
    (2) Primary and preventive care
    (3) Prescription drugs
    (4) Mental health services
    (5) Dental and vision care
    
(d) TRANSITION - Private insurance plans may continue for 4 years 
from enactment, after which supplemental coverage only.
"""

print("üìÑ Sample M4A Text:")
print(m4a_sample[:500] + "...")

# Extract mechanics
print("\nüîÑ Extracting policy mechanics...")
result = framework.extract_mechanics(m4a_sample)

print("\n‚úÖ Extraction complete!")
print(f"\nüìä Extraction Results:")
print(f"   ‚Ä¢ Domain: {result.get('domain', 'Unknown')}")
print(f"   ‚Ä¢ Confidence: {result.get('confidence', 0):.2f}")
print(f"   ‚Ä¢ Funding mechanisms found: {len(result.get('funding', []))}")
print(f"   ‚Ä¢ Coverage details: {len(result.get('coverage', []))}")
print(f"   ‚Ä¢ Timeline events: {len(result.get('timeline', []))}")

# Display details
print("\nüí∞ Funding Mechanisms:")
for mech in result.get('funding', []):
    print(f"   ‚Ä¢ {mech}")

print("\nüè• Coverage:")
for cov in result.get('coverage', []):
    print(f"   ‚Ä¢ {cov}")

---

## 3. ACA-Style Extraction

Let's try a different policy format:

In [None]:
# ACA-style policy text
aca_sample = """
The Affordable Care Act establishes:

1. INDIVIDUAL MANDATE - All individuals must maintain minimum 
   essential coverage or pay a penalty (eliminated in 2019).

2. EMPLOYER MANDATE - Employers with 50+ FTE employees must 
   offer coverage or pay $2,000 per employee penalty.

3. EXCHANGES - State-based marketplaces for individual coverage
   with income-based subsidies for those earning 100-400% FPL.

4. MEDICAID EXPANSION - States may expand Medicaid to adults 
   earning up to 138% of FPL, with 90% federal matching.

5. ESSENTIAL HEALTH BENEFITS - All plans must cover:
   - Hospitalization
   - Prescription drugs
   - Maternity care
   - Mental health
   - Preventive services (no cost-sharing)
"""

print("üîÑ Extracting ACA mechanics...")
aca_result = framework.extract_mechanics(aca_sample)

print("\nüìä ACA Extraction Results:")
print(f"   ‚Ä¢ Domain: {aca_result.get('domain', 'Unknown')}")
print(f"   ‚Ä¢ Confidence: {aca_result.get('confidence', 0):.2f}")

print("\nüìã Key Mechanisms:")
for key, value in aca_result.items():
    if isinstance(value, list) and value:
        print(f"\n   {key.upper()}:")
        for item in value[:3]:  # Show first 3
            print(f"      ‚Ä¢ {item}")

---

## 4. Validation

Extraction quality depends on:
- Text clarity
- Explicit quantification
- Domain recognition

PoliSim reports confidence scores and flags missing elements.

In [None]:
# Validation example
print("üìä Extraction Validation")
print("=" * 50)

# Compare M4A and ACA extractions
comparisons = [
    ('M4A Sample', result),
    ('ACA Sample', aca_result)
]

for name, res in comparisons:
    print(f"\n{name}:")
    print(f"   ‚Ä¢ Confidence: {res.get('confidence', 0):.2f}")
    print(f"   ‚Ä¢ Completeness: {res.get('completeness', 'N/A')}")
    
    missing = res.get('missing_concepts', [])
    if missing:
        print(f"   ‚Ä¢ Missing concepts: {', '.join(missing)}")
    else:
        print(f"   ‚Ä¢ Missing concepts: None identified")

print("\nüí° Tip: Higher confidence and completeness indicate better extraction quality.")

---

## üéì What You've Learned

‚úÖ Policy documents contain complex, implicit mechanisms  
‚úÖ PoliSim's framework extracts funding, coverage, and timeline  
‚úÖ Confidence scores help validate extraction quality  
‚úÖ Different policy formats require different parsing approaches  

---

## üìö Next Steps

| Notebook | Topic | Time |
|----------|-------|------|
| **08** | API Integration | 30-45 min |
| **09** | Custom Policies | 60-90 min |
| **10** | Capstone | 2-3 hours |

---

*Continue to Notebook 08: API Integration* ‚Üí