# Policy Optimisation for Basic Bathtub Scenario

Find optimal parameters for the linear interval policy: `interval = a + b * durability`

In [None]:
import sys
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import numpy as np
import matplotlib.pyplot as plt

from src.scenarios import BasicBathtubScenario
from src.policy import LinearIntervalPolicy, NoOpPolicy, FixedIntervalPolicy
from src.runner import run_scenario, compare_policies
from src.optimisation import optimise_policy, grid_search, sensitivity_analysis

## Setup Scenario

In [None]:
# Same scenario params as exploration notebook
scenario = BasicBathtubScenario(
    scale1=100.0,
    scale2=200.0,
    service_cost=20.0,
    revenue_per_time=2.0
)

MAX_TIME = 150.0
N_SUBJECTS = 500

## Sanity Check: No-Op vs Baseline

First verify that service actually helps compared to doing nothing.

In [None]:
no_op = NoOpPolicy()
baseline = LinearIntervalPolicy(a=15.0, b=10.0)

result_no_op = run_scenario(scenario, no_op, N_SUBJECTS, MAX_TIME, seed=42)
result_baseline = run_scenario(scenario, baseline, N_SUBJECTS, MAX_TIME, seed=42)

print(f"No-Op Policy:   mean={result_no_op.mean_net_value:.1f}, std={result_no_op.std_net_value:.1f}")
print(f"Baseline (15+10d): mean={result_baseline.mean_net_value:.1f}, std={result_baseline.std_net_value:.1f}")
print(f"\nService improves net value by {result_baseline.mean_net_value - result_no_op.mean_net_value:.1f}")

## Grid Search

Explore the parameter space to understand the landscape.

In [None]:
# Define grid
a_values = np.linspace(5, 40, 12)
b_values = np.linspace(0, 20, 12)

def policy_factory(params):
    return LinearIntervalPolicy(a=params[0], b=params[1])

# Use more subjects to reduce noise (simulation variance)
best_params, best_value, results_grid = grid_search(
    scenario,
    policy_factory,
    [a_values, b_values],
    n_subjects=1000,  # More subjects for less noisy estimates
    max_time=MAX_TIME,
    seed=42,
    verbose=True
)

In [None]:
# Visualise grid search results
plt.figure(figsize=(10, 8))
plt.contourf(a_values, b_values, results_grid.T, levels=20, cmap='viridis')
plt.colorbar(label='Mean Net Value')
plt.scatter([best_params[0]], [best_params[1]], color='red', s=100, marker='*', label=f'Best: a={best_params[0]:.1f}, b={best_params[1]:.1f}')
plt.scatter([15], [10], color='orange', s=100, marker='o', edgecolors='black', label='Baseline: a=15, b=10')
plt.xlabel('a (base interval)')
plt.ylabel('b (durability coefficient)')
plt.title('Net Value Landscape')
plt.legend()
plt.show()

# Note: landscape suggests higher a,b (less frequent service) is better
# This aligns with no-op beating baseline - service doesn't pay off with current costs

## Gradient-Free Optimisation

Use Powell (gradient-free) which handles noisy objectives better than gradient-based methods like L-BFGS-B. Simulation variance makes gradient estimates unreliable. Start from grid search best for warm start.

In [None]:
# Start from grid search best, use gradient-free method
opt_result = optimise_policy(
    scenario,
    policy_factory,
    initial_params=best_params,  # Start from grid search best
    bounds=[(1.0, 50.0), (0.0, 30.0)],
    n_subjects=1000,  # More subjects for stable estimates
    max_time=MAX_TIME,
    method='Powell',  # Gradient-free, supports bounds, handles noise
    seed=42,
    verbose=True
)

In [None]:
# Plot optimisation trajectory
history = np.array([h[0] for h in opt_result.history])
values = np.array([h[1] for h in opt_result.history])

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Trajectory on contour
axes[0].contourf(a_values, b_values, results_grid.T, levels=20, cmap='viridis', alpha=0.7)
axes[0].plot(history[:, 0], history[:, 1], 'r.-', markersize=8, linewidth=1)
axes[0].scatter([history[0, 0]], [history[0, 1]], color='yellow', s=100, edgecolors='black', zorder=5, label='Start')
axes[0].scatter([history[-1, 0]], [history[-1, 1]], color='red', s=100, marker='*', zorder=5, label='End')
axes[0].set_xlabel('a')
axes[0].set_ylabel('b')
axes[0].set_title('Optimisation Trajectory')
axes[0].legend()

# Value over iterations
axes[1].plot(values, 'b.-')
axes[1].axhline(opt_result.initial_value, color='gray', linestyle='--', label='Initial (grid best)')
axes[1].set_xlabel('Evaluation')
axes[1].set_ylabel('Net Value')
axes[1].set_title('Convergence (noise causes deterioration)')
axes[1].legend()

plt.tight_layout()
plt.show()

# Note: optimizer struggles because simulation noise > signal from parameter changes
# Grid search result is more reliable for this problem

## Compare Policies

In [None]:
policies = {
    'No Service': NoOpPolicy(),
    'Fixed (20)': FixedIntervalPolicy(interval=20.0),
    'Baseline (15+10d)': LinearIntervalPolicy(a=15.0, b=10.0),
    'Grid Best': LinearIntervalPolicy(a=best_params[0], b=best_params[1]),
    'Powell': LinearIntervalPolicy(a=opt_result.optimal_params[0], b=opt_result.optimal_params[1]),
}

comparison = compare_policies(
    scenario, policies,
    n_subjects=2000,
    max_time=MAX_TIME,
    n_repeats=5,
    seed=42
)

print(f"{'Policy':<20} {'Mean':>10} {'Std':>10} {'Params':<20}")
print("-" * 62)
print(f"{'No Service':<20} {comparison['No Service']['mean']:>10.1f} {comparison['No Service']['std']:>10.1f}")
print(f"{'Fixed (20)':<20} {comparison['Fixed (20)']['mean']:>10.1f} {comparison['Fixed (20)']['std']:>10.1f}")
print(f"{'Baseline (15+10d)':<20} {comparison['Baseline (15+10d)']['mean']:>10.1f} {comparison['Baseline (15+10d)']['std']:>10.1f} {'a=15, b=10':<20}")
print(f"{'Grid Best':<20} {comparison['Grid Best']['mean']:>10.1f} {comparison['Grid Best']['std']:>10.1f} {f'a={best_params[0]:.1f}, b={best_params[1]:.1f}':<20}")
print(f"{'Powell':<20} {comparison['Powell']['mean']:>10.1f} {comparison['Powell']['std']:>10.1f} {f'a={opt_result.optimal_params[0]:.1f}, b={opt_result.optimal_params[1]:.1f}':<20}")

print(f"\nNote: compare_policies re-evaluates with n_subjects=2000, n_repeats=5 (different seeds)")
print(f"Original grid search values (n_subjects=1000, seed=42):")
print(f"  Grid Best: {best_value:.1f}")
print(f"  Powell optimal: {opt_result.optimal_value:.1f}")

In [None]:
# Bar chart comparison
names = list(comparison.keys())
means = [comparison[n]['mean'] for n in names]
stds = [comparison[n]['std'] for n in names]

plt.figure(figsize=(10, 5))
bars = plt.bar(names, means, yerr=stds, capsize=5, alpha=0.7)
plt.axhline(0, color='black', linewidth=0.5)
plt.ylabel('Mean Net Value')
plt.title('Policy Comparison')
plt.xticks(rotation=15)

# Colour best bar
best_idx = np.argmax(means)
bars[best_idx].set_color('green')

plt.tight_layout()
plt.show()

## Sensitivity Analysis

How sensitive is net value to each parameter?

In [None]:
sens = sensitivity_analysis(
    scenario,
    policy_factory,
    base_params=opt_result.optimal_params,
    param_names=['a', 'b'],
    variations=np.linspace(0.5, 1.5, 11),
    n_subjects=1000,  # More subjects for smoother curves
    max_time=MAX_TIME,
    seed=42
)

plt.figure(figsize=(8, 5))
plt.plot(sens['multipliers'], sens['a'], 'b.-', label='a (base interval)')
plt.plot(sens['multipliers'], sens['b'], 'r.-', label='b (durability coef)')
plt.axvline(1.0, color='gray', linestyle='--', alpha=0.5)
plt.xlabel('Parameter Multiplier')
plt.ylabel('Net Value')
plt.title(f'Sensitivity around optimal (a={opt_result.optimal_params[0]:.1f}, b={opt_result.optimal_params[1]:.1f})')
plt.legend()
plt.show()

## Summary

In [None]:
print("=" * 50)
print("OPTIMISATION SUMMARY")
print("=" * 50)
print(f"Scenario: BasicBathtubScenario")
print(f"  scale1={scenario.failure_model.scale1}, scale2={scenario.failure_model.scale2}")
print(f"  service_cost={scenario.costs.service_cost}, revenue={scenario.costs.revenue_per_time}")
print()
print(f"Policy: interval = a + b * durability")
print(f"  Initial:  a={opt_result.initial_params[0]:.1f}, b={opt_result.initial_params[1]:.1f}")
print(f"  Optimal:  a={opt_result.optimal_params[0]:.1f}, b={opt_result.optimal_params[1]:.1f}")
print()
print(f"Net Value:")
print(f"  Initial:  {opt_result.initial_value:.1f}")
print(f"  Optimal:  {opt_result.optimal_value:.1f}")
print(f"  Improvement: {opt_result.improvement:.1f} ({opt_result.improvement_pct:.1f}%)")

## Alternative Cost Structure

The current economics make service unprofitable. Let's try a scenario where service pays off: cheaper service, higher revenue.

In [None]:
# Scenario where service is cheap relative to revenue
scenario_v2 = BasicBathtubScenario(
    scale1=100.0,
    scale2=200.0,
    service_cost=0.5,     # Very cheap service
    revenue_per_time=3.0  # Good revenue
)

N_SAMPLES = 2000  # Consistent sample size

# Quick comparison
no_op_v2 = run_scenario(scenario_v2, NoOpPolicy(), N_SAMPLES, MAX_TIME, seed=42)
baseline_v2 = run_scenario(scenario_v2, LinearIntervalPolicy(a=15.0, b=10.0), N_SAMPLES, MAX_TIME, seed=42)

print(f"V2 scenario: cost={scenario_v2.costs.service_cost}, revenue={scenario_v2.costs.revenue_per_time}")
print(f"  No-Op:    {no_op_v2.mean_net_value:.1f}")
print(f"  Baseline: {baseline_v2.mean_net_value:.1f}")
print(f"  Improvement: {baseline_v2.mean_net_value - no_op_v2.mean_net_value:.1f}")

# Grid search on v2 in (a, b) space
print("\nGrid search on v2 scenario in (a, b) space...")
best_v2, value_v2, grid_v2 = grid_search(
    scenario_v2, policy_factory, [a_values, b_values],
    n_subjects=N_SAMPLES, max_time=MAX_TIME, seed=42, verbose=True
)

In [None]:
# Compare landscapes: original vs v2
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Original (service unprofitable)
im0 = axes[0].contourf(a_values, b_values, results_grid.T, levels=20, cmap='viridis')
axes[0].scatter([best_params[0]], [best_params[1]], color='red', s=100, marker='*')
axes[0].set_xlabel('a')
axes[0].set_ylabel('b')
axes[0].set_title(f'Original: cost={scenario.costs.service_cost}, rev={scenario.costs.revenue_per_time}\n'
                  f'Best: a={best_params[0]:.0f}, b={best_params[1]:.0f} -> {best_value:.0f}')
plt.colorbar(im0, ax=axes[0], label='Net Value')

# V2 (service profitable)
im1 = axes[1].contourf(a_values, b_values, grid_v2.T, levels=20, cmap='viridis')
axes[1].scatter([best_v2[0]], [best_v2[1]], color='red', s=100, marker='*')
axes[1].set_xlabel('a')
axes[1].set_ylabel('b')
axes[1].set_title(f'V2: cost={scenario_v2.costs.service_cost}, rev={scenario_v2.costs.revenue_per_time}\n'
                  f'Best: a={best_v2[0]:.0f}, b={best_v2[1]:.0f} -> {value_v2:.0f}')
plt.colorbar(im1, ax=axes[1], label='Net Value')

plt.tight_layout()
plt.show()

print("\nInterpretation:")
print("- Original: higher a,b = less service = better (optimal: don't service)")
print("- V2: there's a sweet spot for service frequency (optimal: service helps)")

## Reparameterisation: Service Rates (1/a, 1/b)

View in rate space where no-op is at origin (0, 0). If optimal is near origin, service doesn't help.

In [None]:
# Grid search in rate space: (1/a, 1/b)
# Origin (0,0) = no service, moving away = more frequent service

rate_a_values = np.linspace(0.01, 0.15, 12)  # 1/a: 0.01 -> a=100, 0.15 -> a=6.7
rate_b_values = np.linspace(0.0, 0.15, 12)   # Similar for b

def policy_factory_rates(params):
    """Create policy from rate parameters. rate=0 means infinite interval."""
    rate_a, rate_b = params
    a = 1.0 / rate_a if rate_a > 0.001 else 1000.0
    b = 1.0 / rate_b if rate_b > 0.001 else 1000.0
    return LinearIntervalPolicy(a=a, b=b)

N_SAMPLES_GRID = 2000  # More samples for cleaner landscape

print("Grid search in rate space (1/a, 1/b)...")
print("Origin (0,0) = no service, moving away = more frequent service\n")

best_rates, value_rates, grid_rates = grid_search(
    scenario,
    policy_factory_rates,
    [rate_a_values, rate_b_values],
    n_subjects=N_SAMPLES_GRID,
    max_time=MAX_TIME,
    seed=42,
    verbose=True
)

# Also for v2
print("\n\nGrid search in rate space for v2 scenario...")
best_rates_v2, value_rates_v2, grid_rates_v2 = grid_search(
    scenario_v2,
    policy_factory_rates,
    [rate_a_values, rate_b_values],
    n_subjects=N_SAMPLES_GRID,
    max_time=MAX_TIME,
    seed=42,
    verbose=True
)

In [None]:
# Visualise in rate space - origin is no-op
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Original scenario in rate space
im0 = axes[0].contourf(rate_a_values, rate_b_values, grid_rates.T, levels=20, cmap='viridis')
axes[0].scatter([0], [0], color='white', s=150, marker='o', edgecolors='black', label='No-Op (0,0)', zorder=5)
axes[0].scatter([best_rates[0]], [best_rates[1]], color='red', s=100, marker='*', label=f'Best', zorder=5)
axes[0].set_xlabel('1/a (base service rate)')
axes[0].set_ylabel('1/b (durability rate)')
axes[0].set_title(f'Original: cost={scenario.costs.service_cost}, rev={scenario.costs.revenue_per_time}\n'
                  f'Best: ({best_rates[0]:.3f}, {best_rates[1]:.3f}) -> {value_rates:.0f}')
axes[0].legend(loc='upper right')
plt.colorbar(im0, ax=axes[0], label='Net Value')

# V2 scenario in rate space  
im1 = axes[1].contourf(rate_a_values, rate_b_values, grid_rates_v2.T, levels=20, cmap='viridis')
axes[1].scatter([0], [0], color='white', s=150, marker='o', edgecolors='black', label='No-Op (0,0)', zorder=5)
axes[1].scatter([best_rates_v2[0]], [best_rates_v2[1]], color='red', s=100, marker='*', label=f'Best', zorder=5)
axes[1].set_xlabel('1/a (base service rate)')
axes[1].set_ylabel('1/b (durability rate)')
axes[1].set_title(f'V2: cost={scenario_v2.costs.service_cost}, rev={scenario_v2.costs.revenue_per_time}\n'
                  f'Best: ({best_rates_v2[0]:.3f}, {best_rates_v2[1]:.3f}) -> {value_rates_v2:.0f}')
axes[1].legend(loc='upper right')
plt.colorbar(im1, ax=axes[1], label='Net Value')

plt.tight_layout()
plt.show()

print("\nInterpretation in rate space:")
print("- Origin (0,0) = no service (infinite intervals)")
print("- Moving right = more frequent base service")
print("- Moving up = stronger durability effect on frequency")
print(f"\nOriginal: best near origin -> service not worthwhile")
print(f"V2: best away from origin -> optimal service frequency exists")

## Better Parameterisation: (c, r) - Effective Interval and Durability Ratio

### The Problem with (a, b)

Looking at the landscape in (a, b) space, we see iso-lines roughly where `a + b = const`. This happens because:

```
interval = a + b * durability
```

For a subject with **mean durability ≈ 1**, the interval is approximately `a + b`. So many different (a, b) combinations give similar results if their sum is the same:
- (10, 20) → interval ≈ 30 at durability=1
- (20, 10) → interval ≈ 30 at durability=1
- (15, 15) → interval ≈ 30 at durability=1

This creates a "ridge" in the landscape that's hard to optimise along.

### The (c, r) Parameterisation

Instead, we can use:
- **c = a + b**: the effective interval when durability = 1 (the mean)
- **r = b / (a + b)**: the fraction of the interval that depends on durability (0 to 1)

Inverting:
- a = c × (1 - r)
- b = c × r

This gives:
```
interval = c × (1 - r) + c × r × durability
         = c × (1 - r + r × durability)
         = c × (1 + r × (durability - 1))
```

### Interpretation

| Parameter | Meaning | Range |
|-----------|---------|-------|
| c | Service interval at mean durability | (0, ∞), higher = less service |
| r | How much to adjust for individual durability | [0, 1] |

When **r = 0**: interval = c for everyone (ignore durability)
When **r = 1**: interval = c × durability (fully proportional to durability)

### Why This is Better

1. **c directly controls service frequency** - the main decision variable
2. **r is a secondary tuning** - how much to personalise based on durability
3. **Iso-lines become vertical** - varying r at constant c gives similar results
4. **Natural bounds** - c > 0 (just needs to be positive), r ∈ [0, 1]
5. **No-op corresponds to c → ∞** (or 1/c → 0 in rate space)

In [None]:
# Grid search in (c, r) space
# c = effective interval at durability=1, r = durability ratio

c_values = np.linspace(10, 150, 15)  # Effective interval: 10 to 150
r_values = np.linspace(0, 1, 12)      # Durability ratio: 0 to 1

def policy_factory_cr(params):
    """Create policy from (c, r) parameters."""
    c, r = params
    a = c * (1 - r)
    b = c * r
    return LinearIntervalPolicy(a=a, b=b)

print("Grid search in (c, r) space...")
print("c = effective interval at mean durability")
print("r = fraction of interval depending on durability\n")

best_cr, value_cr, grid_cr = grid_search(
    scenario,
    policy_factory_cr,
    [c_values, r_values],
    n_subjects=N_SAMPLES_GRID,
    max_time=MAX_TIME,
    seed=42,
    verbose=True
)

print(f"\nBest: c={best_cr[0]:.1f}, r={best_cr[1]:.2f}")
print(f"Corresponds to: a={best_cr[0]*(1-best_cr[1]):.1f}, b={best_cr[0]*best_cr[1]:.1f}")

# Also for v2
print("\n\nGrid search in (c, r) space for v2 scenario...")
best_cr_v2, value_cr_v2, grid_cr_v2 = grid_search(
    scenario_v2,
    policy_factory_cr,
    [c_values, r_values],
    n_subjects=N_SAMPLES_GRID,
    max_time=MAX_TIME,
    seed=42,
    verbose=True
)

print(f"\nV2 Best: c={best_cr_v2[0]:.1f}, r={best_cr_v2[1]:.2f}")
print(f"Corresponds to: a={best_cr_v2[0]*(1-best_cr_v2[1]):.1f}, b={best_cr_v2[0]*best_cr_v2[1]:.1f}")

In [None]:
# Visualise (c, r) landscape
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Original scenario in (c, r) space
im0 = axes[0].contourf(c_values, r_values, grid_cr.T, levels=20, cmap='viridis')
axes[0].scatter([best_cr[0]], [best_cr[1]], color='red', s=100, marker='*', label=f'Best', zorder=5)
axes[0].axhline(0.5, color='white', linestyle='--', alpha=0.3, label='r=0.5 (balanced)')
axes[0].set_xlabel('c (effective interval at durability=1)')
axes[0].set_ylabel('r (durability ratio)')
axes[0].set_title(f'Original: cost={scenario.costs.service_cost}, rev={scenario.costs.revenue_per_time}\n'
                  f'Best: c={best_cr[0]:.0f}, r={best_cr[1]:.2f} -> {value_cr:.0f}')
axes[0].legend(loc='upper right')
plt.colorbar(im0, ax=axes[0], label='Net Value')

# V2 scenario in (c, r) space
im1 = axes[1].contourf(c_values, r_values, grid_cr_v2.T, levels=20, cmap='viridis')
axes[1].scatter([best_cr_v2[0]], [best_cr_v2[1]], color='red', s=100, marker='*', label=f'Best', zorder=5)
axes[1].axhline(0.5, color='white', linestyle='--', alpha=0.3, label='r=0.5 (balanced)')
axes[1].set_xlabel('c (effective interval at durability=1)')
axes[1].set_ylabel('r (durability ratio)')
axes[1].set_title(f'V2: cost={scenario_v2.costs.service_cost}, rev={scenario_v2.costs.revenue_per_time}\n'
                  f'Best: c={best_cr_v2[0]:.0f}, r={best_cr_v2[1]:.2f} -> {value_cr_v2:.0f}')
axes[1].legend(loc='upper right')
plt.colorbar(im1, ax=axes[1], label='Net Value')

plt.tight_layout()
plt.show()

print("\nInterpretation:")
print("- Original: high c is best (longer intervals, less service) -> service unprofitable")
print("- V2: there's an optimal c around the sweet spot -> service profitable")
print("\n- Vertical banding (constant value across r) would mean durability personalisation doesn't help")
print("- Sloped contours indicate that r matters: adjusting for durability improves outcomes")