## 8. Interactive Experimentation

You can modify the controller parameters and re-run the experiment to see how it affects behavior:

In [None]:
# Convert results to the same format as original method_out.json
output_results = {
    "baseline": [],
    "proposed": []
}

for baseline_result, proposed_result in zip(results["baseline"], results["proposed"]):
    output_results["baseline"].append({
        "id": baseline_result["id"],
        "decision": baseline_result["decision"],
        "error": baseline_result["error"]
    })
    output_results["proposed"].append({
        "id": proposed_result["id"],
        "decision": proposed_result["decision"],
        "error": proposed_result["error"]
    })

# Save results to JSON file (optional)
with open("method_out.json", "w") as f:
    json.dump(output_results, f, indent=2)

print("Results saved to 'method_out.json'")

# Show a sample of the JSON output
print(f"\nSample of first 3 results:")
print(json.dumps({
    "baseline": output_results["baseline"][:3],
    "proposed": output_results["proposed"][:3]
}, indent=2))

## 7. Export Results

Export the results in JSON format (equivalent to the original method_out.json output):

In [None]:
# Show first 10 results for comparison
print("First 10 Results Comparison:")
print("=" * 60)
print(f"{'ID':<12} {'Difficulty':<10} {'Error':<7} {'Baseline':<10} {'Proposed':<10}")
print("-" * 60)

for i in range(min(10, len(sample_data))):
    sample = sample_data[i]
    baseline_result = results["baseline"][i]
    proposed_result = results["proposed"][i]
    
    print(f"{sample['id']:<12} {sample['difficulty']:<10.3f} {str(baseline_result['error']):<7} "
          f"{baseline_result['decision']:<10} {proposed_result['decision']:<10}")

# Analyze decision transitions in the DKW controller
print(f"\n\nDecision Transitions in DKW Controller:")
print("=" * 40)
transitions = []
prev_decision = None

for result in results["proposed"]:
    if prev_decision is not None and result["decision"] != prev_decision:
        transitions.append((prev_decision, result["decision"]))
    prev_decision = result["decision"]

print(f"Total decision changes: {len(transitions)}")
if transitions:
    fission_to_fusion = sum(1 for t in transitions if t == ("fission", "fusion"))
    fusion_to_fission = sum(1 for t in transitions if t == ("fusion", "fission"))
    print(f"Fission → Fusion: {fission_to_fusion}")
    print(f"Fusion → Fission: {fusion_to_fission}")

## 6. Detailed Analysis

Let's examine the first few results and analyze the decision-making pattern:

In [None]:
# Run the experiment
results = run_experiment(sample_data)

# Display summary statistics
print("Experiment Results Summary:")
print("=" * 50)

# Count decisions for each method
baseline_decisions = [r["decision"] for r in results["baseline"]]
proposed_decisions = [r["decision"] for r in results["proposed"]]

print(f"\nBaseline (always fission):")
print(f"  Fission decisions: {baseline_decisions.count('fission')}")
print(f"  Fusion decisions: {baseline_decisions.count('fusion')}")

print(f"\nProposed (DKW controller):")
print(f"  Fission decisions: {proposed_decisions.count('fission')}")
print(f"  Fusion decisions: {proposed_decisions.count('fusion')}")

# Count errors
baseline_errors = sum(r["error"] for r in results["baseline"])
proposed_errors = sum(r["error"] for r in results["proposed"])

print(f"\nError Rates:")
print(f"  Baseline error rate: {baseline_errors / len(results['baseline']):.3f}")
print(f"  Proposed error rate: {proposed_errors / len(results['proposed']):.3f}")

print(f"\nTotal samples processed: {len(sample_data)}")

## 5. Run the Experiment

Let's run the experiment and analyze the results:

In [None]:
def run_experiment(data):
    """Run DKW controller experiment with inline data."""
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    for example in data:
        # Simulate error occurrence based on difficulty
        error = np.random.random() < example["difficulty"]
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
        })

    return results

## 4. Experiment Function

The experiment function compares the DKW controller's decisions against a baseline that always chooses the conservative "fission" mode.

In [None]:
# Sample experimental data (replaces reading from "../dataset_001/data_out.json")
sample_data = [
    {"id": "example_000", "difficulty": 0.05},
    {"id": "example_001", "difficulty": 0.03},
    {"id": "example_002", "difficulty": 0.15},
    {"id": "example_003", "difficulty": 0.08},
    {"id": "example_004", "difficulty": 0.02},
    {"id": "example_005", "difficulty": 0.12},
    {"id": "example_006", "difficulty": 0.07},
    {"id": "example_007", "difficulty": 0.18},
    {"id": "example_008", "difficulty": 0.04},
    {"id": "example_009", "difficulty": 0.11}
] * 15  # Repeat to get enough samples (150 total)

# Add unique IDs
for i, item in enumerate(sample_data):
    item["id"] = f"example_{i:03d}"

print(f"Created {len(sample_data)} sample data points")
print(f"Sample difficulty range: {min(d['difficulty'] for d in sample_data):.3f} - {max(d['difficulty'] for d in sample_data):.3f}")

## 3. Sample Data

Instead of reading from external JSON files, we'll define our experimental data inline. This data simulates examples with varying difficulty levels.

In [None]:
@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

## 2. DKW Controller Class

The `DKWController` class implements a decision controller that uses the Dvoretzky-Kiefer-Wolfowitz inequality to make statistically principled decisions between "fusion" and "fission" modes.

### Key Parameters:
- `epsilon_target`: Target error rate threshold
- `delta`: Confidence parameter for DKW bound  
- `min_samples`: Minimum samples before making decisions
- `hysteresis`: Prevents rapid oscillation between modes

In [None]:
"""DKW Controller Implementation."""
import json
import numpy as np
from dataclasses import dataclass, field

# Set random seed for reproducible results
np.random.seed(42)

## 1. Dependencies and Imports

# DKW Controller Implementation Demo

This notebook demonstrates a **DKW-guided fusion/fission controller** that makes decisions based on error observations with statistical guarantees.

## Overview
The controller uses the Dvoretzky-Kiefer-Wolfowitz (DKW) inequality to provide confidence bounds on empirical error rates, enabling principled decisions between "fusion" and "fission" modes with theoretical guarantees.