# 02 · Digital Twin Lite

We build a toy digital twin where ball weight perturbations translate into WAMECU β shifts.
The experiment emphasises bias detection, not exploitation, and adheres to the manifesto.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from wamecu import wamecu_probabilities
from wamecu.simulate import simulate_draws
from wamecu.metrics import chi_square_test

plt.style.use('seaborn-v0_8')
RNG = np.random.default_rng(2025)

## Weight → β conversion

Heavier balls induce negative β (harder to draw), lighter ones positive.
We normalise around the mean weight and scale by an interpretable coupling constant *k*.

In [None]:
def weight_based_bias(weights: np.ndarray, k: float = 0.15) -> np.ndarray:
    weights = np.asarray(weights, dtype=float)
    centered = weights - weights.mean()
    beta = -k * centered / (np.abs(centered).max() + 1e-9)
    beta -= beta.mean()
    return np.clip(beta, -0.9, 0.9)

## Monte Carlo scenarios

We explore three machine states: balanced, front-loaded, and tail-heavy.

In [None]:
scenarios = {
    'Balanced': np.array([100, 102, 101, 99, 98, 100]),
    'Front-loaded': np.array([95, 96, 97, 103, 104, 105]),
    'Tail-heavy': np.array([105, 104, 103, 97, 96, 95]),
}

sample_size = 2000
results = []
for name, weights in scenarios.items():
    beta = weight_based_bias(weights, k=0.2)
    probs = wamecu_probabilities(len(weights), beta)
    draws = simulate_draws(probs, sample_size, seed=RNG.integers(0, 2**32 - 1))
    counts = np.bincount(draws, minlength=len(weights))
    expected = probs * sample_size
    stat, pval = chi_square_test(counts, expected)
    df = pd.DataFrame({
        'Outcome': np.arange(len(weights)),
        'Observed': counts,
        'Expected': expected,
        'Scenario': name,
    })
    df['chi2_stat'] = stat
    df['chi2_pvalue'] = pval
    results.append(df)
scenario_df = pd.concat(results, ignore_index=True)

In [None]:
g = sns.catplot(
    data=scenario_df.melt(id_vars=['Outcome', 'Scenario', 'chi2_stat', 'chi2_pvalue'], value_vars=['Observed', 'Expected']),
    x='Outcome', y='value', hue='variable', col='Scenario', kind='bar', height=4, aspect=1
)
g.fig.subplots_adjust(top=0.8)
g.fig.suptitle('Observed vs expected counts across weight scenarios')
plt.show()

scenario_summary = scenario_df.groupby('Scenario')[['chi2_stat', 'chi2_pvalue']].first()
scenario_summary

## Sensitivity analysis

We estimate how many draws are needed to flag a bias (χ² p-value < 0.05) for varying coupling constants.

In [None]:
def detectability_threshold(k_values, sample_grid, trials=60, alpha=0.05):
    thresholds = []
    base_weights = scenarios['Balanced']
    for k in k_values:
        beta = weight_based_bias(base_weights, k=k)
        probs = wamecu_probabilities(len(base_weights), beta)
        detected_at = None
        for sample_size in sample_grid:
            detections = 0
            for _ in range(trials):
                draws = simulate_draws(probs, sample_size, seed=RNG.integers(0, 2**32 - 1))
                counts = np.bincount(draws, minlength=len(base_weights))
                stat, pval = chi_square_test(counts, probs * sample_size)
                detections += pval < alpha
            rate = detections / trials
            if rate >= 0.8:
                detected_at = sample_size
                break
        thresholds.append(detected_at or np.nan)
    return np.array(thresholds)

k_values = np.linspace(0.05, 0.4, 8)
sample_grid = [200, 400, 600, 800, 1000]
thresholds = detectability_threshold(k_values, sample_grid)

plt.figure(figsize=(8, 4))
plt.plot(k_values, thresholds, marker='o')
plt.xlabel('Coupling constant k (weight → β)')
plt.ylabel('Sample size for ≥80% detection rate')
plt.title('Sensitivity of detection to coupling strength')
plt.grid(True)
plt.tight_layout()
plt.show()

pd.DataFrame({'k': k_values, 'sample_size_for_detection': thresholds})

## Reflections

* Moderate coupling (k≈0.2) is detectable with ~600 draws.
* Edge cases (k<0.1) require impractically long runs, underscoring the need for richer sensors.
* The twin stays firmly in the audit domain—no exploitation, just diagnostics.