In [None]:
# Run the experiment
results = run_experiment(sample_data)

# Display results
print("=== EXPERIMENT RESULTS ===\n")

print("ðŸ“Š BASELINE RESULTS (Always Fission):")
baseline_decisions = [r["decision"] for r in results["baseline"]]
print(f"  Decisions: {set(baseline_decisions)} (all same)")
print(f"  Number of errors: {sum(r['error'] for r in results['baseline'])}")

print("\nðŸ§  PROPOSED RESULTS (DKW Controller):")
proposed_decisions = [r["decision"] for r in results["proposed"]]
fusion_count = proposed_decisions.count("fusion")
fission_count = proposed_decisions.count("fission")
print(f"  Fusion decisions: {fusion_count}")
print(f"  Fission decisions: {fission_count}")
print(f"  Number of errors: {sum(r['error'] for r in results['proposed'])}")

print(f"\nðŸ“ˆ COMPARISON:")
print(f"  Baseline uses fission 100% of the time")
print(f"  Proposed uses fusion {fusion_count}/{len(sample_data)} times ({100*fusion_count/len(sample_data):.1f}%)")

## 5. Run Experiment and View Results

Let's run the experiment and compare the results between the baseline and proposed methods.

In [None]:
def run_experiment(data):
    """Run DKW controller experiment with inline data."""
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    for example in data:
        # Simulate error occurrence based on difficulty
        np.random.seed(hash(example["id"]) % 2**32)  # Deterministic randomness for reproducibility
        error = np.random.random() < example["difficulty"]
        
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
            "difficulty": example["difficulty"]
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
            "difficulty": example["difficulty"]
        })

    return results

# Run the experiment with our sample data
print("Running experiment...")

## 4. Experiment Function

The `run_experiment` function compares two strategies:
1. **Proposed Method**: Uses DKW controller to adaptively choose fusion/fission
2. **Baseline Method**: Always uses conservative "fission" strategy

The function simulates error occurrence based on example difficulty and tracks the decisions made by each approach.

In [None]:
@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

# Create an instance to demonstrate the class
controller = DKWController()
print(f"Controller initialized with target epsilon: {controller.epsilon_target}")
print(f"Initial state: {controller.current_state}")
print(f"Minimum samples required: {controller.min_samples}")

## 3. DKW Controller Class

The `DKWController` implements a statistically-grounded decision controller that uses the Dvoretzky-Kiefer-Wolfowitz inequality to provide confidence bounds on empirical error rates.

### Key Components:
- **DKW Epsilon**: Computes confidence bounds using the DKW inequality
- **Error Tracking**: Maintains a history of error observations 
- **State Management**: Switches between "fusion" and "fission" based on error bounds
- **Hysteresis**: Prevents rapid oscillation between states

In [None]:
# Sample dataset - inline data to make notebook self-contained
sample_data = [
    {"id": "example_000", "difficulty": 0.1},   # Easy example
    {"id": "example_001", "difficulty": 0.2},   # Easy-medium example  
    {"id": "example_002", "difficulty": 0.8},   # Hard example
    {"id": "example_003", "difficulty": 0.3},   # Medium example
    {"id": "example_004", "difficulty": 0.9},   # Very hard example
    {"id": "example_005", "difficulty": 0.15},  # Easy example
    {"id": "example_006", "difficulty": 0.7},   # Hard example
    {"id": "example_007", "difficulty": 0.25},  # Medium example
    {"id": "example_008", "difficulty": 0.6},   # Medium-hard example
    {"id": "example_009", "difficulty": 0.4},   # Medium example
]

print(f"Loaded {len(sample_data)} examples")
print("Sample data structure:", sample_data[0])

## 2. Sample Data

Instead of reading from external JSON files, we'll define our sample data inline. This data represents examples with different difficulty levels that our controller will process.

In [None]:
"""DKW Controller Implementation."""
import json
import numpy as np
from dataclasses import dataclass, field

## 1. Import Required Libraries

Let's start by importing all the necessary libraries for our DKW controller implementation.

# DKW Controller Implementation (experiment_001)

This notebook demonstrates a **DKW-guided fusion/fission controller** that uses the Dvoretzky-Kiefer-Wolfowitz (DKW) inequality to make statistically-grounded decisions between fusion and fission strategies based on error observations.

## Overview
- **DKW Controller**: Adaptive controller that switches between fusion/fission based on error bounds
- **Statistical Guarantee**: Uses DKW inequality to provide confidence bounds on empirical error rates
- **Hysteresis**: Includes hysteresis to prevent oscillation between states