In [None]:
def run_experiment(data):
    """Run DKW controller experiment."""
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    for example in data:
        # Simulate error occurrence based on difficulty
        error = np.random.random() < example["difficulty"]
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
            "difficulty": example["difficulty"]
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
            "difficulty": example["difficulty"]
        })

    return results, controller

## Experiment Function

The experiment function runs both the proposed DKW controller and a baseline (always conservative) approach:

In [None]:
# Sample dataset - inline data instead of reading from file
sample_data = [
    {"id": "example_000", "difficulty": 0.02},
    {"id": "example_001", "difficulty": 0.05},
    {"id": "example_002", "difficulty": 0.15},
    {"id": "example_003", "difficulty": 0.08},
    {"id": "example_004", "difficulty": 0.12},
    {"id": "example_005", "difficulty": 0.03},
    {"id": "example_006", "difficulty": 0.18},
    {"id": "example_007", "difficulty": 0.07},
    {"id": "example_008", "difficulty": 0.09},
    {"id": "example_009", "difficulty": 0.14},
]

print(f"Created dataset with {len(sample_data)} examples")
print("Sample entries:")
for i, example in enumerate(sample_data[:3]):
    print(f"  {i+1}. ID: {example['id']}, Difficulty: {example['difficulty']}")

## Sample Data

Let's create some sample data to demonstrate the controller. Each example has an ID and a difficulty level (probability of error):

In [None]:
@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

## DKW Controller Class

The `DKWController` class implements the core logic:

- **epsilon_target**: Target error rate threshold (10%)
- **delta**: Confidence parameter for DKW bound (5%)
- **min_samples**: Minimum samples before making decisions (100)
- **hysteresis**: Prevents oscillation between states (5%)

In [None]:
import json
import numpy as np
from dataclasses import dataclass, field
import matplotlib.pyplot as plt
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

## Imports and Setup

First, let's import the required libraries:

# DKW Controller Implementation

This notebook demonstrates a **DKW-guided fusion/fission controller** implementation. The DKW (Dvoretzky-Kiefer-Wolfowitz) inequality provides statistical guarantees for decision-making under uncertainty.

## Overview

The controller makes decisions between "fusion" and "fission" modes based on observed error rates, using the DKW inequality to provide confidence bounds on the true error rate.