In [None]:
# Display sample of the results that would be saved
print("Sample output (first 5 examples):")
print(json.dumps({
    "baseline": results["baseline"][:5],
    "proposed": results["proposed"][:5]
}, indent=2))

print(f"\n... and {len(results['baseline'])-5} more examples")

# Optionally save results to file (uncomment to enable)
# with open("method_out.json", "w") as f:
#     json.dump(results, f, indent=2)
# print("Results saved to method_out.json")

print("\nðŸŽ‰ Notebook demo complete! You can now experiment with different parameters above.")

## Output Results

The original script would save results to `method_out.json`. Here's what that output would look like:

In [None]:
# Try different parameter settings
def test_controller_params(epsilon_target=0.10, delta=0.05, min_samples=100, hysteresis=0.05):
    """Test controller with custom parameters."""
    
    # Create custom controller
    custom_controller = DKWController(
        epsilon_target=epsilon_target,
        delta=delta, 
        min_samples=min_samples,
        hysteresis=hysteresis
    )
    
    # Run on sample data
    np.random.seed(42)  # For reproducible results
    custom_results = {"baseline": [], "proposed": []}
    
    for example in sample_data:
        error = np.random.random() < example["difficulty"]
        custom_controller.add_observation(float(error))
        decision = custom_controller.decide()
        
        custom_results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
        })
    
    # Calculate metrics
    total = len(custom_results["proposed"])
    errors = sum(1 for r in custom_results["proposed"] if r['error'])
    fusions = sum(1 for r in custom_results["proposed"] if r['decision'] == 'fusion')
    
    print(f"Parameters: target={epsilon_target:.2f}, delta={delta:.2f}, min_samples={min_samples}, hysteresis={hysteresis:.2f}")
    print(f"Results: {errors}/{total} errors ({errors/total*100:.1f}%), {fusions}/{total} fusions ({fusions/total*100:.1f}%)")
    
    return custom_results

# Test with default parameters
print("=== Default Parameters ===")
test_controller_params()

# Try more aggressive settings
print("\n=== More Aggressive (lower target, less hysteresis) ===")
test_controller_params(epsilon_target=0.15, hysteresis=0.02)

# Try more conservative settings  
print("\n=== More Conservative (higher min samples, more hysteresis) ===")
test_controller_params(min_samples=150, hysteresis=0.08)

## Interactive Exploration

Try experimenting with different controller parameters to see how they affect the behavior:

In [None]:
import matplotlib.pyplot as plt

# Create visualization of results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Plot 1: Decision timeline for first 50 examples
decisions = [1 if r['decision'] == 'fusion' else 0 for r in results['proposed'][:50]]
errors = [1 if r['error'] else 0 for r in results['proposed'][:50]]

ax1.plot(decisions, 'b-', linewidth=2, label='Decision (1=fusion, 0=fission)')
ax1.scatter(range(len(errors)), errors, c=['red' if e else 'green' for e in errors], 
           alpha=0.6, label='Errors (red=error, green=success)')
ax1.set_xlabel('Example Number')
ax1.set_ylabel('Decision/Error')
ax1.set_title('DKW Controller Decisions vs Errors (First 50 Examples)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Cumulative error rates
cumulative_proposed = np.cumsum([r['error'] for r in results['proposed']]) / np.arange(1, len(results['proposed']) + 1)
cumulative_baseline = np.cumsum([r['error'] for r in results['baseline']]) / np.arange(1, len(results['baseline']) + 1)

ax2.plot(cumulative_proposed, 'b-', linewidth=2, label='DKW Controller')
ax2.plot(cumulative_baseline, 'r--', linewidth=2, label='Baseline (always fission)')
ax2.axhline(y=0.10, color='gray', linestyle=':', alpha=0.7, label='Target error rate (10%)')
ax2.set_xlabel('Example Number')
ax2.set_ylabel('Cumulative Error Rate')
ax2.set_title('Cumulative Error Rates')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Set random seed for reproducible results
np.random.seed(42)

# Run the experiment
results = run_experiment(sample_data)

# Print summary statistics
print("=== EXPERIMENT RESULTS ===")
print(f"Total examples processed: {len(results['baseline'])}")
print()

# Baseline (always fission) results
baseline_errors = sum(1 for r in results['baseline'] if r['error'])
baseline_fissions = sum(1 for r in results['baseline'] if r['decision'] == 'fission')
print(f"BASELINE (always fission):")
print(f"  Errors: {baseline_errors}/{len(results['baseline'])} ({baseline_errors/len(results['baseline'])*100:.1f}%)")
print(f"  Fission decisions: {baseline_fissions}/{len(results['baseline'])} ({baseline_fissions/len(results['baseline'])*100:.1f}%)")
print()

# Proposed method results
proposed_errors = sum(1 for r in results['proposed'] if r['error'])
proposed_fusions = sum(1 for r in results['proposed'] if r['decision'] == 'fusion')
proposed_fissions = sum(1 for r in results['proposed'] if r['decision'] == 'fission')
print(f"PROPOSED (DKW controller):")
print(f"  Errors: {proposed_errors}/{len(results['proposed'])} ({proposed_errors/len(results['proposed'])*100:.1f}%)")
print(f"  Fusion decisions: {proposed_fusions}/{len(results['proposed'])} ({proposed_fusions/len(results['proposed'])*100:.1f}%)")
print(f"  Fission decisions: {proposed_fissions}/{len(results['proposed'])} ({proposed_fissions/len(results['proposed'])*100:.1f}%)")

## Running the Experiment

Now let's run the DKW controller experiment and compare it against a baseline that always chooses the conservative "fission" mode.

In [None]:
def run_experiment(data):
    """Run DKW controller experiment with inlined data."""
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    for example in data:
        # Simulate error occurrence based on difficulty
        error = np.random.random() < example["difficulty"]
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
        })

    return results

print("Experiment function defined successfully!")

In [None]:
# Sample input data (inlined to make notebook self-contained)
# This replaces the original data loading from "../dataset_001/data_out.json"

sample_data = [
    {"id": "example_000", "difficulty": 0.05},
    {"id": "example_001", "difficulty": 0.08},
    {"id": "example_002", "difficulty": 0.15},
    {"id": "example_003", "difficulty": 0.03},
    {"id": "example_004", "difficulty": 0.12},
    {"id": "example_005", "difficulty": 0.07},
    {"id": "example_006", "difficulty": 0.20},
    {"id": "example_007", "difficulty": 0.02},
    {"id": "example_008", "difficulty": 0.18},
    {"id": "example_009", "difficulty": 0.11},
]

# Add more examples to reach minimum sample threshold
for i in range(10, 150):
    difficulty = np.random.beta(2, 8)  # Most examples have low difficulty
    sample_data.append({
        "id": f"example_{i:03d}",
        "difficulty": difficulty
    })

print(f"Generated {len(sample_data)} sample examples")
print(f"Difficulty range: {min(ex['difficulty'] for ex in sample_data):.3f} - {max(ex['difficulty'] for ex in sample_data):.3f}")

## Understanding the DKW Method

The **Dvoretzky-Kiefer-Wolfowitz (DKW) inequality** provides statistical confidence bounds for empirical error rates. 

Key parameters:
- `epsilon_target`: Target error rate threshold (10% by default)
- `delta`: Confidence level parameter (5% by default, giving 95% confidence)
- `min_samples`: Minimum observations before making decisions (100 by default)
- `hysteresis`: Prevents oscillation between states (5% by default)

The controller computes an upper confidence bound on the error rate and uses it to make reliable fusion/fission decisions.

In [None]:
"""DKW Controller Implementation."""
import json
import numpy as np
from dataclasses import dataclass, field

@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

print("DKW Controller class loaded successfully!")

# DKW Controller Implementation Demo

**Artifact ID:** experiment_001  
**Original File:** method.py

This notebook demonstrates a DKW-guided fusion/fission controller that makes adaptive decisions based on error observations while providing statistical guarantees through the Dvoretzky-Kiefer-Wolfowitz (DKW) inequality.

## Overview

The DKW controller adaptively switches between:
- **Fusion mode**: More aggressive/efficient processing
- **Fission mode**: Conservative/safe processing

The controller uses statistical confidence bounds to ensure reliable decision-making with probabilistic guarantees.