In [None]:
# Display sample results in the format that would be saved to "method_out.json"
print("Sample results format (first 3 examples):")
print("\nBaseline results:")
for i, result in enumerate(results["baseline"][:3]):
    print(f"  {result}")

print("\nProposed (DKW) results:")
for i, result in enumerate(results["proposed"][:3]):
    print(f"  {result}")

# Optionally save results to file (uncomment to enable)
# with open("method_out.json", "w") as f:
#     json.dump(results, f, indent=2)
# print("\nResults saved to method_out.json")

print(f"\n=== EXPERIMENT COMPLETE ===")
print("✓ Self-contained notebook with inline data")
print("✓ No external file dependencies") 
print("✓ Interactive visualizations")
print("✓ Statistical analysis")
print("✓ Reproducible results (fixed random seed)")

## Export Results (Optional)

The original script would save results to a JSON file. Here we can examine the output format and optionally save it:

In [None]:
# Create visualizations
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10))

# Extract time series data
examples = list(range(len(results["proposed"])))
proposed_decisions = [1 if r["decision"] == "fusion" else 0 for r in results["proposed"]]
baseline_decisions = [1 if r["decision"] == "fusion" else 0 for r in results["baseline"]]
errors = [1 if r["error"] else 0 for r in results["proposed"]]
difficulties = [sample_data[i]["difficulty"] for i in range(len(sample_data))]

# Plot 1: Decision comparison over time
ax1.plot(examples, proposed_decisions, 'b-', label='DKW Controller', linewidth=2, alpha=0.7)
ax1.plot(examples, baseline_decisions, 'r--', label='Baseline (Always Conservative)', linewidth=2, alpha=0.7)
ax1.set_ylabel('Decision\n(1=Fusion, 0=Fission)')
ax1.set_title('Controller Decisions Over Time')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Error occurrences and difficulty
ax2.scatter(examples, errors, c=difficulties, cmap='viridis', alpha=0.6, s=20)
ax2.set_ylabel('Error Occurred\n(1=Yes, 0=No)')
ax2.set_title('Error Occurrences Colored by Difficulty')
colorbar = plt.colorbar(ax2.collections[0], ax=ax2)
colorbar.set_label('Difficulty Level')
ax2.grid(True, alpha=0.3)

# Plot 3: Running error rate
window_size = 20
running_errors = []
for i in range(len(errors)):
    start_idx = max(0, i - window_size + 1)
    window_errors = errors[start_idx:i+1]
    running_errors.append(sum(window_errors) / len(window_errors))

ax3.plot(examples, running_errors, 'g-', linewidth=2, label=f'Running Error Rate (window={window_size})')
ax3.axhline(y=0.10, color='orange', linestyle='--', linewidth=2, label='Target Error Rate (0.10)')
ax3.set_xlabel('Example Number')
ax3.set_ylabel('Error Rate')
ax3.set_title('Running Error Rate vs Target')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print some key insights
print("=== KEY INSIGHTS ===")
print(f"• The DKW controller adapts its decisions based on observed error rates")
print(f"• Target error rate: {controller.epsilon_target}")
print(f"• Actual error rate: {proposed_error:.3f}")
print(f"• The controller made {sum(proposed_decisions)} fusion decisions vs {sum(baseline_decisions)} for baseline")

## Results Visualization

Let's visualize the controller's behavior over time to see how it adapts!

In [None]:
# Run the experiment
results = run_experiment(sample_data)

# Print summary statistics
print("=== EXPERIMENT RESULTS ===\n")

def analyze_results(results_data, name):
    total = len(results_data)
    fission_count = sum(1 for r in results_data if r["decision"] == "fission")
    fusion_count = total - fission_count
    error_count = sum(1 for r in results_data if r["error"])
    error_rate = error_count / total if total > 0 else 0
    
    print(f"{name}:")
    print(f"  Total examples: {total}")
    print(f"  Fission decisions: {fission_count} ({fission_count/total*100:.1f}%)")
    print(f"  Fusion decisions: {fusion_count} ({fusion_count/total*100:.1f}%)")
    print(f"  Error rate: {error_rate:.3f} ({error_count}/{total})")
    return fission_count, fusion_count, error_rate

baseline_fission, baseline_fusion, baseline_error = analyze_results(results["baseline"], "Baseline (Always Conservative)")
print()
proposed_fission, proposed_fusion, proposed_error = analyze_results(results["proposed"], "Proposed (DKW Controller)")

print(f"\n=== COMPARISON ===")
print(f"Fusion decisions - Proposed: {proposed_fusion}, Baseline: {baseline_fusion}")
print(f"The DKW controller made {proposed_fusion - baseline_fusion} more fusion decisions")

## Run the Experiment

Let's run the experiment and examine the results!

In [None]:
def run_experiment(data):
    """Run DKW controller experiment on provided data."""
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    for example in data:
        # Simulate error occurrence based on difficulty
        error = np.random.random() < example["difficulty"]
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
        })

    return results

print("Experiment function defined. Ready to run!")

## Experiment Function

The experiment function runs both the proposed DKW controller and a baseline (always conservative) approach on the same data to compare performance.

In [None]:
# Sample dataset - inline data instead of reading from files
# This simulates the data that would be in "../dataset_001/data_out.json"
sample_data = []

# Create a variety of examples with different difficulty levels
difficulties = np.concatenate([
    np.random.uniform(0.02, 0.08, 50),  # Easy examples (low error probability)
    np.random.uniform(0.08, 0.15, 30),  # Medium examples 
    np.random.uniform(0.15, 0.25, 20),  # Hard examples (high error probability)
])

for i, difficulty in enumerate(difficulties):
    sample_data.append({
        "id": f"example_{i:03d}",
        "difficulty": float(difficulty)
    })

print(f"Created {len(sample_data)} sample examples")
print(f"Difficulty range: {min(d['difficulty'] for d in sample_data):.3f} to {max(d['difficulty'] for d in sample_data):.3f}")
print("\nFirst 5 examples:")
for example in sample_data[:5]:
    print(f"  {example['id']}: difficulty = {example['difficulty']:.3f}")

## Sample Data (Self-Contained)

Instead of reading from external files, we'll create sample data inline. This data simulates examples with varying difficulty levels that affect error probability.

In [None]:
@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

# Test the controller instantiation
controller = DKWController()
print(f"Controller created with target error rate: {controller.epsilon_target}")
print(f"Initial state: {controller.current_state}")

## DKW Controller Class

The `DKWController` class implements the core algorithm with these key parameters:

- **`epsilon_target`**: Target error rate threshold (default: 0.10)
- **`delta`**: Confidence level parameter for DKW bound (default: 0.05)
- **`min_samples`**: Minimum samples before making decisions (default: 100)
- **`hysteresis`**: Prevents oscillation between states (default: 0.05)

### Key Methods:
- `dkw_epsilon(n)`: Computes the DKW bound for n samples
- `add_observation(error)`: Records new error observations
- `decide()`: Makes fusion/fission decision with statistical guarantees

In [None]:
# Required imports
import json
import numpy as np
from dataclasses import dataclass, field
import matplotlib.pyplot as plt

# Set random seed for reproducible results
np.random.seed(42)

print("Setup complete! Ready to run DKW controller demo.")

# DKW Controller Implementation - Interactive Demo

This notebook demonstrates a **DKW-guided fusion/fission controller** that uses the Dvoretzky-Kiefer-Wolfowitz (DKW) inequality to make statistical decisions with confidence guarantees.

## Overview
The DKW controller adaptively switches between:
- **Fusion**: Aggressive optimization mode
- **Fission**: Conservative mode

The controller uses statistical bounds to ensure error rates stay within target thresholds with high confidence.