In [None]:
# Optional: Save results to JSON file (as the original script did)
# Uncomment the lines below if you want to save the results

# with open("method_out.json", "w") as f:
#     json.dump(results, f, indent=2)
# print("Results saved to method_out.json")

# Display sample results in JSON format
print("Sample results structure (first 3 entries each):")
print(json.dumps({
    "baseline": results["baseline"][:3],
    "proposed": results["proposed"][:3]
}, indent=2))

print("\nðŸŽ‰ Notebook execution completed successfully!")
print("\nðŸ’¡ Key takeaways:")
print("- The DKW controller adaptively switches between fusion and fission modes")
print("- It provides statistical guarantees on error rates using the DKW inequality")
print("- The controller starts conservatively and becomes more aggressive as it gains confidence")
print("- Hysteresis prevents rapid oscillation between modes")

## Saving Results (Optional)

The original script saved results to `method_out.json`. Here's how you can save the results if needed:

In [None]:
# Create visualization of controller behavior over time
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# Extract decision timeline for proposed method
decisions_timeline = [r['decision'] for r in results['proposed']]
decision_numeric = [1 if d == 'fusion' else 0 for d in decisions_timeline]
errors_timeline = [r['error'] for r in results['proposed']]

# Plot 1: Decision timeline
x = range(len(decision_numeric))
ax1.plot(x, decision_numeric, 'b-', linewidth=2, label='Controller Decision')
ax1.fill_between(x, 0, decision_numeric, alpha=0.3, color='blue')
ax1.set_ylabel('Decision\n(0=Fission, 1=Fusion)')
ax1.set_title('DKW Controller Decision Timeline')
ax1.grid(True, alpha=0.3)
ax1.legend()

# Add error markers
error_indices = [i for i, error in enumerate(errors_timeline) if error]
if error_indices:
    ax1.scatter(error_indices, [decision_numeric[i] for i in error_indices], 
                color='red', s=50, marker='x', label='Error Occurred', zorder=5)
    ax1.legend()

# Plot 2: Cumulative error rate comparison
baseline_cumulative_errors = np.cumsum([r['error'] for r in results['baseline']])
proposed_cumulative_errors = np.cumsum([r['error'] for r in results['proposed']])
baseline_error_rate = baseline_cumulative_errors / (np.arange(len(baseline_cumulative_errors)) + 1)
proposed_error_rate = proposed_cumulative_errors / (np.arange(len(proposed_cumulative_errors)) + 1)

ax2.plot(x, baseline_error_rate, 'r--', linewidth=2, label='Baseline (Always Fission)')
ax2.plot(x, proposed_error_rate, 'g-', linewidth=2, label='Proposed (DKW Controller)')
ax2.axhline(y=0.10, color='black', linestyle=':', alpha=0.7, label='Target Error Rate (0.10)')
ax2.set_xlabel('Example Number')
ax2.set_ylabel('Cumulative Error Rate')
ax2.set_title('Error Rate Comparison Over Time')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()

print(f"Final error rates:")
print(f"  Baseline: {baseline_error_rate[-1]:.4f}")
print(f"  Proposed: {proposed_error_rate[-1]:.4f}")
print(f"  Target: 0.1000")

## Results Visualization

Let's visualize the controller's behavior over time to see how it adapts:

In [None]:
# Run the experiment
results = run_experiment(sample_data)

print("Experiment completed!")
print(f"Total examples processed: {len(results['baseline'])}")

# Show first few results for comparison
print("\n--- First 10 Results Comparison ---")
print("ID\t\tBaseline\tProposed\tError")
print("-" * 50)
for i in range(10):
    baseline_result = results['baseline'][i]
    proposed_result = results['proposed'][i]
    print(f"{baseline_result['id']}\t{baseline_result['decision']}\t\t{proposed_result['decision']}\t\t{baseline_result['error']}")

# Show summary statistics
baseline_fission_count = sum(1 for r in results['baseline'] if r['decision'] == 'fission')
proposed_fission_count = sum(1 for r in results['proposed'] if r['decision'] == 'fission')
proposed_fusion_count = sum(1 for r in results['proposed'] if r['decision'] == 'fusion')

print(f"\n--- Summary Statistics ---")
print(f"Baseline (always fission): {baseline_fission_count}/{len(results['baseline'])} fission decisions")
print(f"Proposed method: {proposed_fission_count}/{len(results['proposed'])} fission, {proposed_fusion_count}/{len(results['proposed'])} fusion decisions")

# Calculate error rates
baseline_errors = sum(1 for r in results['baseline'] if r['error'])
proposed_errors = sum(1 for r in results['proposed'] if r['error'])

print(f"\nTotal errors encountered: baseline={baseline_errors}, proposed={proposed_errors}")
print(f"Error rate: baseline={baseline_errors/len(results['baseline']):.3f}, proposed={proposed_errors/len(results['proposed']):.3f}")

## Running the Experiment

Now let's run the experiment on our sample data and examine the results:

In [None]:
def run_experiment(data):
    """Run DKW controller experiment.
    
    Args:
        data: List of examples, each with 'id' and 'difficulty' fields
        
    Returns:
        Dictionary with 'baseline' and 'proposed' results
    """
    controller = DKWController()
    results = {"baseline": [], "proposed": []}

    for example in data:
        # Simulate error occurrence based on difficulty
        error = np.random.random() < example["difficulty"]
        controller.add_observation(float(error))
        decision = controller.decide()

        results["proposed"].append({
            "id": example["id"],
            "decision": decision,
            "error": error,
        })
        results["baseline"].append({
            "id": example["id"],
            "decision": "fission",  # Always conservative
            "error": error,
        })

    return results

print("Experiment function defined successfully!")

## Experiment Function

The `run_experiment` function simulates running both the proposed DKW controller and a baseline approach on the same data:

- **Proposed method**: Uses the DKW controller to adaptively switch between fusion and fission
- **Baseline method**: Always uses conservative fission mode

For each example, we simulate error occurrence based on the difficulty level (higher difficulty = higher probability of error).

In [None]:
# Sample dataset - inlined for self-contained execution
# Each example has an 'id' and a 'difficulty' value (probability of error occurrence)
sample_data = [
    {"id": "example_000", "difficulty": 0.05},  # Easy example (5% error probability)
    {"id": "example_001", "difficulty": 0.08},  # Easy-medium example
    {"id": "example_002", "difficulty": 0.15},  # Medium example (15% error probability)
    {"id": "example_003", "difficulty": 0.12},  # Medium example
    {"id": "example_004", "difficulty": 0.20},  # Hard example (20% error probability)
    {"id": "example_005", "difficulty": 0.03},  # Very easy example
    {"id": "example_006", "difficulty": 0.25},  # Very hard example
    {"id": "example_007", "difficulty": 0.10},  # Medium example
    {"id": "example_008", "difficulty": 0.18},  # Hard example
    {"id": "example_009", "difficulty": 0.07},  # Easy example
] * 12  # Repeat to have 120 examples total (more than min_samples = 100)

print(f"Created sample dataset with {len(sample_data)} examples")
print("Sample examples:")
for i, example in enumerate(sample_data[:5]):
    print(f"  {i}: {example}")

## Sample Data

Since this is a self-contained notebook, we'll inline the sample data directly instead of reading from external files. The original script would read from `../dataset_001/data_out.json`.

In [None]:
@dataclass
class DKWController:
    """DKW-guided fusion/fission controller."""
    epsilon_target: float = 0.10
    delta: float = 0.05
    min_samples: int = 100
    hysteresis: float = 0.05

    samples: list = field(default_factory=list)
    current_state: str = "fission"

    def dkw_epsilon(self, n: int) -> float:
        """Compute DKW epsilon for n samples."""
        if n < 2:
            return 1.0
        return np.sqrt(np.log(2 / self.delta) / (2 * n))

    def add_observation(self, error: float) -> None:
        """Add error observation for calibration."""
        self.samples.append(error)

    def decide(self) -> str:
        """Make fusion/fission decision with DKW guarantee."""
        n = len(self.samples)
        if n < self.min_samples:
            return self.current_state

        epsilon = self.dkw_epsilon(n)
        empirical_error = np.mean(self.samples[-self.min_samples:])
        error_upper_bound = empirical_error + epsilon

        if self.current_state == "fusion":
            if error_upper_bound > self.epsilon_target + self.hysteresis:
                self.current_state = "fission"
        else:
            if error_upper_bound < self.epsilon_target - self.hysteresis:
                self.current_state = "fusion"

        return self.current_state

# Test the controller
controller = DKWController()
print(f"Controller initialized with state: {controller.current_state}")
print(f"Target error: {controller.epsilon_target}")
print(f"Min samples needed: {controller.min_samples}")

## DKW Controller Class

The `DKWController` class implements a fusion/fission controller that uses the **Dvoretzky-Kiefer-Wolfowitz (DKW) inequality** to provide statistical guarantees.

### Key Parameters:
- `epsilon_target`: Target error threshold (default: 0.10)
- `delta`: Confidence parameter for DKW bound (default: 0.05)
- `min_samples`: Minimum samples before making decisions (default: 100)
- `hysteresis`: Prevents oscillation between states (default: 0.05)

### How it works:
1. Collects error observations over time
2. Uses DKW inequality to compute error upper bound with confidence 1-Î´
3. Switches between fusion (aggressive) and fission (conservative) modes based on whether the error bound exceeds the target

In [None]:
import json
import numpy as np
from dataclasses import dataclass, field
import matplotlib.pyplot as plt

# Set random seed for reproducible results
np.random.seed(42)
print("Libraries imported successfully!")

## Required Imports

First, let's import the necessary libraries:

# DKW Controller Implementation - method.py

This notebook demonstrates a **DKW-guided fusion/fission controller** implementation. The controller uses the Dvoretzky-Kiefer-Wolfowitz (DKW) inequality to make statistically guaranteed decisions about when to use fusion vs fission modes based on error observations.

## Overview
- **DKW Controller**: A class that maintains error statistics and makes fusion/fission decisions
- **Experiment**: Simulates running the controller on sample data
- **Analysis**: Compares the proposed method against a baseline approach