
# TGTG — **Gamma (Risk Aversion) as Main Channel**
## Full-factorial sweeps + interaction analysis focused on \(\gamma\)

This notebook makes **risk aversion \(\gamma\)** the main hyperparameter channel.

For each hyperparameter combination (including \(\gamma\)) it:
1) trains a **Baseline** policy (forces \(b=0\))  
2) trains a **TGTG-available** policy (optimizes \(q\) and \(b\))  
3) evaluates both **out-of-sample** and stores waste/production/profit outcomes and deltas.

Then it produces **gamma-centric** plots and fits interaction models including \(\gamma\).

---

### Objective (as implemented)
Fitness uses daily profits \(\pi_t\) over \(D\) days:
\[
\text{fitness} = D(\bar{\pi} - \gamma \sigma_{\pi})
\]
Higher \(\gamma\) increases sensitivity to profit volatility, so TGTG can act as a risk-mitigation channel.

---

### Note on \(\tau\)
The simulator treats \(\tau\) as a **per-unit** TGTG price (salvage value).


In [None]:

import sys, os, time
from pathlib import Path

sys.path.append("/mnt/data")  # adjust if needed

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed

from tgtg_fast import (
    EnvironmentConfig, BakerAgent,
    generate_preferences,
    make_common_random_draws,
    alpha_path_constant, alpha_path_beta_shocks, alpha_path_logit_ar1,
    simulate_epoch_fast
)

np.set_printoptions(suppress=True)
pd.set_option("display.max_columns", 250)

OUT_DIR = Path("./tgtg_gamma_grid_outputs")
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_DIR


In [None]:

# --- Optimizer helpers ---
def clip_agent(agent: BakerAgent, q_max: int) -> BakerAgent:
    q = np.clip(np.floor(agent.q), 0, q_max).astype(np.int64)
    b = float(np.clip(agent.b, 0.0, 1.0))
    return BakerAgent(q=q, b=b, gamma=float(agent.gamma))

def init_population(P: int, L: int, gamma: float, q_max: int, rng: np.random.Generator) -> list[BakerAgent]:
    pop = []
    for _ in range(P):
        q = rng.integers(low=0, high=q_max + 1, size=L, dtype=np.int64)
        b = float(rng.random())
        pop.append(BakerAgent(q=q, b=b, gamma=float(gamma)))
    return pop

def mutate(agent: BakerAgent, q_sigma: float, b_sigma: float, q_max: int, rng: np.random.Generator) -> BakerAgent:
    q = agent.q.astype(np.float64) + rng.normal(0.0, q_sigma, size=agent.q.shape[0])
    b = float(agent.b + rng.normal(0.0, b_sigma))
    return clip_agent(BakerAgent(q=q, b=b, gamma=agent.gamma), q_max=q_max)

def evaluate_population_parallel_threads(
    population: list[BakerAgent],
    prefs: np.ndarray,
    env: EnvironmentConfig,
    alpha_path: np.ndarray,
    perm: np.ndarray,
    visit_u: np.ndarray,
    walk_u: np.ndarray,
    max_workers: int | None = None,
) -> tuple[np.ndarray, list[dict]]:
    if max_workers is None:
        max_workers = max(1, os.cpu_count() or 1)
    metrics_list = [None] * len(population)

    def _eval_one(idx: int):
        m = simulate_epoch_fast(population[idx], prefs, env, alpha_path, perm, visit_u, walk_u)
        return idx, m

    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        futures = [ex.submit(_eval_one, i) for i in range(len(population))]
        for f in as_completed(futures):
            idx, m = f.result()
            metrics_list[idx] = m

    fitness = np.array([m["fitness"] for m in metrics_list], dtype=np.float64)
    return fitness, metrics_list

def make_alpha_path(D: int, demand_spec: dict, rng: np.random.Generator) -> np.ndarray:
    k = demand_spec["kind"]
    if k == "constant":
        return alpha_path_constant(D, demand_spec["alpha"])
    if k == "beta":
        return alpha_path_beta_shocks(D, demand_spec["alpha_mean"], demand_spec["concentration"], rng)
    if k == "ar1":
        return alpha_path_logit_ar1(D, demand_spec["alpha0"], demand_spec["phi"], demand_spec["sigma"], rng)
    raise ValueError(f"Unknown demand spec kind: {k}")

def evolutionary_optimize(
    env: EnvironmentConfig,
    demand_spec: dict,
    *,
    gamma: float,
    allow_tgtg: bool,
    preference_mode: str,
    seed: int,
    D: int,
    P: int,
    G: int,
    elite_frac: float,
    q_max: int,
    q_sigma: float,
    b_sigma: float,
    n_workers_eval: int | None = None,
) -> dict:
    rng = np.random.default_rng(seed)
    prefs = generate_preferences(env.N, env.L, mode=preference_mode, rng=rng)

    pop = init_population(P, env.L, gamma=float(gamma), q_max=q_max, rng=rng)
    if not allow_tgtg:
        for a in pop:
            a.b = 0.0

    elite_n = max(2, int(P * elite_frac))

    # warm-up compile
    perm0, visit0, walk0 = make_common_random_draws(D, env.N, env.L, seed=seed + 10_000)
    alpha0 = make_alpha_path(D, demand_spec, np.random.default_rng(seed + 20_000))
    _ = simulate_epoch_fast(pop[0], prefs, env, alpha0, perm0, visit0, walk0)

    best_fit = -1e18
    best_agent = None
    best_metrics = None

    for g in range(G):
        perm, visit_u, walk_u = make_common_random_draws(D, env.N, env.L, seed=seed + 1_000_000 + g)
        alpha_path = make_alpha_path(D, demand_spec, np.random.default_rng(seed + 2_000_000 + g))

        fitness, metrics_list = evaluate_population_parallel_threads(
            pop, prefs, env, alpha_path, perm, visit_u, walk_u, max_workers=n_workers_eval
        )

        order = np.argsort(fitness)[::-1]
        elites = [pop[i] for i in order[:elite_n]]

        if float(fitness[order[0]]) > best_fit:
            best_fit = float(fitness[order[0]])
            best_agent = elites[0]
            best_metrics = metrics_list[order[0]]

        new_pop = elites.copy()
        while len(new_pop) < P:
            parent = elites[rng.integers(0, elite_n)]
            child = mutate(parent, q_sigma=q_sigma, b_sigma=b_sigma, q_max=q_max, rng=rng)
            if not allow_tgtg:
                child.b = 0.0
            new_pop.append(child)
        pop = new_pop

    return {
        "best_agent": best_agent,
        "best_fit": best_fit,
        "best_metrics_train": best_metrics,
        "prefs": prefs,
        "env": env,
        "demand_spec": demand_spec,
        "seed": seed,
        "D": D,
        "gamma": float(gamma),
    }

def out_of_sample_eval(
    agent: BakerAgent,
    prefs: np.ndarray,
    env: EnvironmentConfig,
    demand_spec: dict,
    *,
    seed: int,
    D: int,
    reps: int
) -> pd.DataFrame:
    rows = []
    for j in range(reps):
        perm, visit_u, walk_u = make_common_random_draws(D, env.N, env.L, seed=seed + 100_000 + j)
        alpha_path = make_alpha_path(D, demand_spec, np.random.default_rng(seed + 200_000 + j))
        m = simulate_epoch_fast(agent, prefs, env, alpha_path, perm, visit_u, walk_u)
        m = dict(m)
        m["rep"] = j
        rows.append(m)
    return pd.DataFrame(rows)


In [None]:

# --- Gamma-first grid definition (edit freely) ---
BASE = dict(N=600, r=0.35, chi=1.0, alpha_mean=0.35)

GRID = dict(
    gamma=[0.0, 0.4, 0.8, 1.2, 1.6, 2.0],
    L=[3, 6, 10],
    margin_rho_over_chi=[1.6, 2.5, 3.6],
    discount_tau_over_rho=[0.15, 0.35, 0.7],
    beta_concentration=[5.0, 20.0, 100.0],
)

BUDGET = dict(
    D=30, P=70, G=26, elite_frac=0.20,
    q_max=140, q_sigma=12.0, b_sigma=0.08,
    oos_reps=20, preference_mode="correlated",
)

OUTER_BACKEND = "threads"  # "threads" (portable) or "processes" (often faster on Linux/macOS)
OUTER_WORKERS = max(1, (os.cpu_count() or 1) // 2)
INNER_EVAL_WORKERS = 1

RESULTS_CSV = OUT_DIR / "gamma_grid_results.csv"
RESULTS_PARQUET = OUT_DIR / "gamma_grid_results.parquet"

def iter_full_factorial(grid: dict) -> list[dict]:
    keys = list(grid.keys())
    combos = []
    def rec(i, cur):
        if i == len(keys):
            combos.append(cur.copy())
            return
        k = keys[i]
        for v in grid[k]:
            cur[k] = v
            rec(i+1, cur)
    rec(0, {})
    return combos

combos = iter_full_factorial(GRID)
print("Total combinations:", len(combos))
print("Outer backend:", OUTER_BACKEND, "| workers:", OUTER_WORKERS)
combos[:2]


In [None]:

# --- Scenario runner + checkpointed grid execution ---
def make_env_and_demand(combo: dict):
    L = int(combo["L"])
    margin = float(combo["margin_rho_over_chi"])
    disc = float(combo["discount_tau_over_rho"])
    conc = float(combo["beta_concentration"])
    rho = margin * BASE["chi"]
    tau = disc * rho

    env = EnvironmentConfig(
        N=int(BASE["N"]),
        L=L,
        r=float(BASE["r"]),
        chi=float(BASE["chi"]),
        rho=float(rho),
        tau=float(tau),
    )
    demand_spec = {"kind":"beta", "alpha_mean": float(BASE["alpha_mean"]), "concentration": float(conc)}
    return env, demand_spec

def scenario_key(combo: dict) -> str:
    return "|".join([
        f"g={combo['gamma']}",
        f"L={combo['L']}",
        f"m={combo['margin_rho_over_chi']}",
        f"d={combo['discount_tau_over_rho']}",
        f"c={combo['beta_concentration']}",
    ])

def run_one_combo(combo: dict, seed: int) -> dict:
    env, demand_spec = make_env_and_demand(combo)
    gamma = float(combo["gamma"])

    out_t = evolutionary_optimize(
        env, demand_spec, gamma=gamma, allow_tgtg=True,
        preference_mode=BUDGET["preference_mode"], seed=seed,
        D=BUDGET["D"], P=BUDGET["P"], G=BUDGET["G"], elite_frac=BUDGET["elite_frac"],
        q_max=BUDGET["q_max"], q_sigma=BUDGET["q_sigma"], b_sigma=BUDGET["b_sigma"],
        n_workers_eval=INNER_EVAL_WORKERS
    )
    out_b = evolutionary_optimize(
        env, demand_spec, gamma=gamma, allow_tgtg=False,
        preference_mode=BUDGET["preference_mode"], seed=seed,
        D=BUDGET["D"], P=BUDGET["P"], G=BUDGET["G"], elite_frac=BUDGET["elite_frac"],
        q_max=BUDGET["q_max"], q_sigma=BUDGET["q_sigma"], b_sigma=BUDGET["b_sigma"],
        n_workers_eval=INNER_EVAL_WORKERS
    )

    oos_seed = seed + 777
    oos_t = out_of_sample_eval(out_t["best_agent"], out_t["prefs"], env, demand_spec, seed=oos_seed, D=BUDGET["D"], reps=BUDGET["oos_reps"])
    oos_b = out_of_sample_eval(out_b["best_agent"], out_b["prefs"], env, demand_spec, seed=oos_seed, D=BUDGET["D"], reps=BUDGET["oos_reps"])

    row = dict(combo)
    row["key"] = scenario_key(combo)
    row["seed"] = seed

    row["waste_base"] = float(oos_b["waste"].mean())
    row["waste_tgtg"] = float(oos_t["waste"].mean())
    row["delta_waste"] = row["waste_tgtg"] - row["waste_base"]

    row["profit_base"] = float(oos_b["mean_profit"].mean())
    row["profit_tgtg"] = float(oos_t["mean_profit"].mean())
    row["delta_profit"] = row["profit_tgtg"] - row["profit_base"]

    row["prod_base"] = float(oos_b["production"].mean())
    row["prod_tgtg"] = float(oos_t["production"].mean())
    row["delta_prod"] = row["prod_tgtg"] - row["prod_base"]

    row["b_star"] = float(out_t["best_agent"].b)

    D = float(BUDGET["D"])
    row["waste_base_per_day"] = row["waste_base"] / D
    row["waste_tgtg_per_day"] = row["waste_tgtg"] / D
    row["delta_waste_per_day"] = row["delta_waste"] / D

    row["prod_base_per_day"] = row["prod_base"] / D
    row["prod_tgtg_per_day"] = row["prod_tgtg"] / D
    row["delta_prod_per_day"] = row["delta_prod"] / D
    return row

def load_existing_results(path_csv: Path) -> pd.DataFrame:
    if path_csv.exists():
        return pd.read_csv(path_csv)
    return pd.DataFrame()

def save_results(df: pd.DataFrame):
    df.to_csv(RESULTS_CSV, index=False)
    try:
        df.to_parquet(RESULTS_PARQUET, index=False)
    except Exception:
        pass

def run_grid(combos: list[dict], seed_base: int = 3000, resume: bool = True, flush_every: int = 5) -> pd.DataFrame:
    existing = load_existing_results(RESULTS_CSV) if resume else pd.DataFrame()
    done = set(existing["key"].astype(str).tolist()) if (resume and len(existing) > 0) else set()
    pending = [c for c in combos if scenario_key(c) not in done]
    print(f"Total combos: {len(combos)} | already done: {len(done)} | pending: {len(pending)}")
    if len(pending) == 0:
        return existing

    rows = []
    t0 = time.time()
    Executor = ThreadPoolExecutor if OUTER_BACKEND == "threads" else ProcessPoolExecutor

    with Executor(max_workers=OUTER_WORKERS) as ex:
        futures = {}
        for idx, combo in enumerate(pending):
            seed = seed_base + idx * 17
            futures[ex.submit(run_one_combo, combo, seed)] = combo

        for j, f in enumerate(as_completed(futures), 1):
            rows.append(f.result())
            if j % flush_every == 0:
                df_new = pd.DataFrame(rows)
                df_all = pd.concat([existing, df_new], ignore_index=True).drop_duplicates(subset=["key"], keep="first")
                save_results(df_all)
                elapsed = time.time() - t0
                print(f"{j}/{len(pending)} completed | elapsed {elapsed:.1f}s | checkpoint saved")

    df_new = pd.DataFrame(rows)
    df_all = pd.concat([existing, df_new], ignore_index=True).drop_duplicates(subset=["key"], keep="first")
    save_results(df_all)
    return df_all


In [None]:

# RUN (resumable)
df = run_grid(combos, seed_base=3000, resume=True, flush_every=3)
df.shape, df.head()


In [None]:

# --- Gamma-centric plots ---
def plot_gamma_curves(df: pd.DataFrame, y_col: str, group_col: str, title: str):
    fig, ax = plt.subplots(figsize=(8.2, 4))
    for gval in sorted(df[group_col].unique()):
        sub = df[df[group_col] == gval].groupby("gamma")[y_col].mean().reset_index().sort_values("gamma")
        ax.plot(sub["gamma"], sub[y_col], marker="o", label=f"{group_col}={gval}")
    ax.axhline(0, linewidth=1)
    ax.set_xlabel("gamma")
    ax.set_ylabel(y_col)
    ax.set_title(title)
    ax.legend()
    plt.tight_layout()
    plt.show()

plot_gamma_curves(df, "delta_waste_per_day", "beta_concentration", "Δ waste/day vs γ (lines by volatility)")
plot_gamma_curves(df, "delta_prod_per_day", "beta_concentration", "Δ production/day vs γ (lines by volatility)")

fig, ax = plt.subplots(figsize=(8.2, 4))
sub = df.groupby("gamma")["b_star"].mean().reset_index().sort_values("gamma")
ax.plot(sub["gamma"], sub["b_star"], marker="o")
ax.set_xlabel("gamma")
ax.set_ylabel("mean b*")
ax.set_title("Mean b* vs γ (averaged over other parameters)")
plt.tight_layout()
plt.show()


In [None]:

# --- Conditional gamma slice ---
L_fix = 6
margin_fix = 2.5
disc_fix = 0.35

sl = df[(df["L"] == L_fix) & (df["margin_rho_over_chi"] == margin_fix) & (df["discount_tau_over_rho"] == disc_fix)].copy()
print("Slice rows:", len(sl))

plot_gamma_curves(sl, "delta_waste_per_day", "beta_concentration", f"Δ waste/day vs γ | L={L_fix}, m={margin_fix}, d={disc_fix}")

fig, ax = plt.subplots(figsize=(8.2, 4))
for conc in sorted(sl["beta_concentration"].unique()):
    sub = sl[sl["beta_concentration"] == conc].groupby("gamma")["b_star"].mean().reset_index().sort_values("gamma")
    ax.plot(sub["gamma"], sub["b_star"], marker="o", label=f"conc={conc}")
ax.set_xlabel("gamma")
ax.set_ylabel("b*")
ax.set_title(f"b* vs γ | L={L_fix}, m={margin_fix}, d={disc_fix}")
ax.legend()
plt.tight_layout()
plt.show()


In [None]:

# --- Interaction models including gamma ---
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

FEATURES = ["gamma", "L", "margin_rho_over_chi", "discount_tau_over_rho", "beta_concentration"]
TARGET = "delta_waste_per_day"  # or "waste_tgtg_per_day" or "b_star"

X = df[FEATURES].copy()
y = df[TARGET].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

lin = Pipeline(steps=[
    ("poly", PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)),
    ("scaler", StandardScaler(with_mean=True, with_std=True)),
    ("ridge", Ridge(alpha=1.0))
])
lin.fit(X_train, y_train)
pred_lin = lin.predict(X_test)
print("Linear interactions R2:", r2_score(y_test, pred_lin))
print("Linear interactions MAE:", mean_absolute_error(y_test, pred_lin))

poly = lin.named_steps["poly"]
ridge = lin.named_steps["ridge"]
terms = poly.get_feature_names_out(FEATURES)
coef_df = pd.DataFrame({"term": terms, "coef": ridge.coef_}).sort_values("coef", key=lambda s: np.abs(s), ascending=False)
coef_df.head(30)


In [None]:

rf = RandomForestRegressor(n_estimators=700, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
print("RF R2:", r2_score(y_test, pred_rf))
print("RF MAE:", mean_absolute_error(y_test, pred_rf))

imp = pd.DataFrame({"feature": FEATURES, "importance": rf.feature_importances_}).sort_values("importance", ascending=False)
imp


In [None]:

# Export
df.to_csv(OUT_DIR / "gamma_grid_results_FINAL.csv", index=False)
try:
    df.to_parquet(OUT_DIR / "gamma_grid_results_FINAL.parquet", index=False)
except Exception:
    pass

OUT_DIR
