In [None]:
# Optional: Save results to JSON file (same as original script)
# Uncomment the lines below to save results
# with open("method_out.json", "w") as f:
#     json.dump(results, f, indent=2)
# print("Results saved to method_out.json")

# Display the results as JSON for reference
print("Results in JSON format:")
print(json.dumps(results, indent=2))

## 7. Save Results (Optional)

The original script saved results to a JSON file. You can do the same here if needed:

In [None]:
# Display detailed results
print("BASELINE RESULTS (Always Fission):")
print("-" * 40)
for result in results["baseline"]:
    print(f"{result['id']}: {result['decision']}, error={result['error']}")

print("\nPROPOSED RESULTS (DKW Controller):")
print("-" * 40)
for result in results["proposed"]:
    print(f"{result['id']}: {result['decision']}, error={result['error']}")

# Calculate summary statistics
baseline_errors = sum(1 for r in results["baseline"] if r["error"])
proposed_errors = sum(1 for r in results["proposed"] if r["error"])
baseline_fissions = sum(1 for r in results["baseline"] if r["decision"] == "fission")
proposed_fissions = sum(1 for r in results["proposed"] if r["decision"] == "fission")

total_examples = len(results["baseline"])

print(f"\nSUMMARY STATISTICS:")
print(f"Total examples: {total_examples}")
print(f"Errors occurred: {baseline_errors} ({baseline_errors/total_examples*100:.1f}%)")
print(f"")
print(f"Baseline (Always Fission):")
print(f"  - Fission decisions: {baseline_fissions}/{total_examples} ({baseline_fissions/total_examples*100:.1f}%)")
print(f"  - Fusion decisions: {total_examples-baseline_fissions}/{total_examples} ({(total_examples-baseline_fissions)/total_examples*100:.1f}%)")
print(f"")
print(f"Proposed (DKW Controller):")
print(f"  - Fission decisions: {proposed_fissions}/{total_examples} ({proposed_fissions/total_examples*100:.1f}%)")
print(f"  - Fusion decisions: {total_examples-proposed_fissions}/{total_examples} ({(total_examples-proposed_fissions)/total_examples*100:.1f}%)")

## 6. Results Analysis

Let's analyze the results and compare the performance of both approaches:

In [None]:
# Set random seed for reproducible results
np.random.seed(42)

# Run the experiment
results = run_experiment(sample_data)

print("\n" + "="*50)
print("EXPERIMENT RESULTS")
print("="*50)

## 5. Run the Experiment

Now let's run the experiment with our sample data and see how the DKW controller performs compared to the baseline:

In [None]:
def run_experiment(data):
    """Run DKW controller experiment with inline data."""
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    print("Running experiment...")
    for i, example in enumerate(data):
        # Simulate error occurrence based on difficulty
        error = np.random.random() < example["difficulty"]
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
        })
        
        # Print progress for first few examples
        if i < 5:
            print(f"  {example['id']}: difficulty={example['difficulty']:.2f}, "
                  f"error={error}, decision={decision}")

    print(f"Experiment completed! Processed {len(data)} examples.")
    return results

print("Experiment function defined!")

## 4. Experiment Function

The `run_experiment` function processes the input data and compares two approaches:
1. **Baseline**: Always uses "fission" mode (conservative approach)
2. **Proposed**: Uses the DKW controller for adaptive decision making

In [None]:
# Sample input data (replaces reading from ../dataset_001/data_out.json)
# Each example has an 'id' and 'difficulty' (probability of error occurrence)
sample_data = [
    {"id": "example_000", "difficulty": 0.05},  # Easy example
    {"id": "example_001", "difficulty": 0.08},  # Easy example
    {"id": "example_002", "difficulty": 0.15},  # Medium example
    {"id": "example_003", "difficulty": 0.12},  # Medium example
    {"id": "example_004", "difficulty": 0.20},  # Hard example
    {"id": "example_005", "difficulty": 0.07},  # Easy example
    {"id": "example_006", "difficulty": 0.25},  # Very hard example
    {"id": "example_007", "difficulty": 0.10},  # Medium example
    {"id": "example_008", "difficulty": 0.18},  # Hard example
    {"id": "example_009", "difficulty": 0.06},  # Easy example
]

print(f"Created sample dataset with {len(sample_data)} examples")
print("Difficulty range:", min(ex["difficulty"] for ex in sample_data), 
      "to", max(ex["difficulty"] for ex in sample_data))

## 3. Sample Input Data

Instead of reading from external JSON files, we'll define the input data inline to make this notebook self-contained. This data represents examples with varying difficulty levels that the controller will process.

In [None]:
@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

print("DKWController class defined!")

## 2. DKW Controller Class

The core of our implementation is the `DKWController` class. This controller uses the Dvoretzky-Kiefer-Wolfowitz inequality to provide statistical guarantees when deciding between fusion and fission modes.

### Key Parameters:
- `epsilon_target`: Target error rate (default: 0.10)
- `delta`: Confidence level parameter (default: 0.05) 
- `min_samples`: Minimum samples before making decisions (default: 100)
- `hysteresis`: Prevents oscillation between states (default: 0.05)

In [None]:
"""DKW Controller Implementation."""
import json
import numpy as np
from dataclasses import dataclass, field

print("Libraries imported successfully!")

## 1. Imports and Dependencies

First, let's import the required libraries:

# DKW Controller Implementation Demo

This notebook demonstrates the **DKW-guided fusion/fission controller** from the experiment_001 artifact (method.py).

## Overview
The DKW (Dvoretzky-Kiefer-Wolfowitz) controller makes adaptive decisions between "fusion" and "fission" modes based on empirical error observations, providing statistical guarantees on the error bounds.

## Key Features
- **Statistical Guarantee**: Uses DKW inequality for confidence bounds
- **Adaptive Decision Making**: Switches between fusion/fission based on error patterns
- **Hysteresis**: Prevents oscillation between states