In [None]:
import os
import time
import numpy as np
import pandas as pd
from gurobipy import Model, GRB, quicksum

from Input_generator import generate_input


In [None]:
# ------------------------- User knobs / constants -------------------------

GAMMA = 1.0
MAX_ITERS = 30
OUT_PREFIX = "sddp_alg6_mb"
OUTPUT_DIR = "results_sddp"
SEED_TRAIN = 0

# Minibatch settings for training forward pass
USE_MINIBATCH = True
BATCH_SIZE = 200        # per-iteration number of training paths to use (<= N_train)

# Optional logging knobs
LOG_LAST_ITER_ONLY = True
LOG_MAX_PATHS = 200


In [None]:


# Stepsize schedule α_n (Robbins–Monro style)
def alpha(n, alpha0=0.5, power=0.6):
    return alpha0 / (n ** power)

# ------------------------- Helpers ----------------------------------------

def add_theta_constraints(m, l_next_vars, cuts_a, cuts_b):
    theta = m.addVar(lb=-GRB.INFINITY, name="theta_next")
    if len(cuts_a) == 0:
        m.addConstr(theta <= 0.0, name="theta_le_0")
    else:
        for k, (a, b) in enumerate(zip(cuts_a, cuts_b)):
            expr = quicksum(float(a[j]) * l_next_vars[j] for j in range(len(a))) + float(b)
            m.addConstr(theta <= expr, name=f"theta_cut_{k}")
    return theta

def stage_forward_lp(
    t, l_t, nu_t, rho_t, R, pi_min, pi_max, l_min, l_max, g,
    cuts_next_a, cuts_next_b, silent=True,
    soft_bounds=True, level_penalty=1e6
):
    """
    Solve one stage LP and DISPOSE the model immediately (context manager).
    Returns: obj, pi_next (J,), l_next (J,), omega (J,), theta_val (scalar)
    """
    J = l_t.size

    # Context manager ensures model & env resources are freed right away.
    with Model(f"fwd_t{t}") as m:
        if silent:
            m.Params.OutputFlag = 0
        m.Params.Threads = 1           # keep memory/CPU stable
        # m.Params.Presolve = 2        # optional speed
        # m.Params.NumericFocus = 1    # optional robustness

        # Decision vars
        pi_next = m.addVars(J, lb=pi_min.tolist(), ub=pi_max.tolist(), name="pi_next")
        l_next  = m.addVars(J, lb=-GRB.INFINITY if soft_bounds else l_min.tolist(),
                               ub= GRB.INFINITY if soft_bounds else l_max.tolist(),
                               name="l_next")

        # Balance equalities
        bal_con = []
        for j in range(J):
            expr = l_next[j] - (float(l_t[j]) + float(nu_t[j]))
            expr -= quicksum(float(R[j, k]) * pi_next[k] for k in range(J))
            bal_con.append(m.addConstr(expr == 0.0, name=f"balance_{j}"))

        # Soft bound slacks + penalty
        pen_term = 0.0
        if soft_bounds:
            s_low  = m.addVars(J, lb=0.0, name="s_low")
            s_high = m.addVars(J, lb=0.0, name="s_high")
            for j in range(J):
                m.addConstr(l_min[j] - l_next[j] <= s_low[j],  name=f"soft_min_{j}")
                m.addConstr(l_next[j] - l_max[j] <= s_high[j], name=f"soft_max_{j}")
            pen_term = level_penalty * (quicksum(s_low[j] for j in range(J)) +
                                        quicksum(s_high[j] for j in range(J)))

        # Theta constraints
        theta = add_theta_constraints(m, [l_next[j] for j in range(J)],
                                      cuts_next_a, cuts_next_b)

        # Objective
        rev = quicksum(float(rho_t[j]) * float(g[j]) * pi_next[j] for j in range(J))
        m.setObjective(rev + theta - pen_term, GRB.MAXIMIZE)

        m.optimize()
        if m.Status not in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
            raise RuntimeError(f"Stage LP infeasible/unbounded at t={t} (status {m.Status})")

        pi_sol = np.array([pi_next[j].X for j in range(J)], dtype=float)
        l_sol  = np.array([l_next[j].X  for j in range(J)], dtype=float)
        omega  = np.array([c.Pi for c in bal_con], dtype=float)
        theta_val = float(theta.X)

        return float(m.ObjVal), pi_sol, l_sol, omega, theta_val

def evaluate_policy_on_paths(
    cuts_a, cuts_b, l0, prices, inflow, R, pi_min, pi_max, l_min, l_max, g,
    gamma=1.0, soft_bounds=True, level_penalty=1e6
):
    N_paths, T, J = prices.shape
    totals = np.zeros(N_paths)
    first_theta = np.zeros(N_paths)
    for m in range(N_paths):
        l_curr = l0.copy()
        path_profit = 0.0
        got_first = False
        for t in range(T):
            nu_t  = inflow[m, t, :]
            rho_t = prices[m, t, :]
            obj, pi_next, l_next, omega, theta_val = stage_forward_lp(
                t=t, l_t=l_curr, nu_t=nu_t, rho_t=rho_t, R=R,
                pi_min=pi_min, pi_max=pi_max, l_min=l_min, l_max=l_max, g=g,
                cuts_next_a=[cuts_a[t+1]], cuts_next_b=[cuts_b[t+1]],
                silent=True, soft_bounds=soft_bounds, level_penalty=level_penalty
            )
            if not got_first:
                first_theta[m] = theta_val
                got_first = True
            path_profit += (gamma ** t) * obj
            l_curr = l_next
        totals[m] = path_profit

    return pd.DataFrame({
        "path_id": np.arange(N_paths, dtype=int),
        "total_profit": totals,
        "first_hour_theta": first_theta
    })

# ------------------------- Core runner ------------------------------------

def run_sddp_and_export(
    N,
    max_iters=MAX_ITERS,
    gamma=GAMMA,
    seed_train=SEED_TRAIN,
    out_prefix=OUT_PREFIX,
):
    t_start = time.time()
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # ------------------------ Generate N scenarios ------------------------
    gen = generate_input(N)

    if isinstance(gen, dict):
        J      = int(gen["J"]); T = int(gen["T"])
        l_max  = np.asarray(gen["l_max"], dtype=float)
        l_min  = np.asarray(gen["l_min"], dtype=float)
        l0     = np.asarray(gen["l0"], dtype=float)
        pi_max = np.asarray(gen["pi_max"], dtype=float)
        pi_min = np.asarray(gen["pi_min"], dtype=float)
        R      = np.asarray(gen["R"], dtype=float)
        g      = np.asarray(gen.get("alpha_energy", np.ones(J)), dtype=float)
        prices = np.asarray(gen["prices_t"], dtype=float)       # (N, T) or (N, T, J)
        inflow = np.asarray(gen["inflows_tj"], dtype=float)     # (N, T, J)
    else:
        (J, T, l_max, l_min, l0, pi_max, pi_min,
         price_samples, inflow_samples, nu0, rho0,
         R, a_t0, b_t0, l_bar0, alpha_energy) = gen
        J      = int(J); T = int(T)
        l_max  = np.asarray(l_max, dtype=float)
        l_min  = np.asarray(l_min, dtype=float)
        l0     = np.asarray(l0, dtype=float)
        pi_max = np.asarray(pi_max, dtype=float)
        pi_min = np.asarray(pi_min, dtype=float)
        R      = np.asarray(R, dtype=float)
        g      = np.asarray(alpha_energy, dtype=float)
        prices = np.asarray(price_samples, dtype=float)
        inflow = np.asarray(inflow_samples, dtype=float)

    # (optional) halve memory; safe with Gurobi
    prices = prices.astype(np.float32, copy=False)
    inflow = inflow.astype(np.float32, copy=False)
    l_max  = l_max.astype(np.float32,  copy=False)
    l_min  = l_min.astype(np.float32,  copy=False)
    l0     = l0.astype(np.float32,     copy=False)
    pi_max = pi_max.astype(np.float32, copy=False)
    pi_min = pi_min.astype(np.float32, copy=False)
    g      = g.astype(np.float32,      copy=False)
    R      = R.astype(np.float32,      copy=False)

    if prices.ndim == 2:
        prices = np.repeat(prices[:, :, None], J, axis=2)

    assert inflow.shape == (N, T, J)
    assert prices.shape == (N, T, J)

    # --- Split into train/test (80% / 20%) ---
    N_total = N
    N_train = int(0.8 * N_total)
    N_test  = N_total - N_train

    idx_all = np.arange(N_total)
    rng_state = np.random.get_state()
    np.random.seed(seed_train)
    np.random.shuffle(idx_all)
    np.random.set_state(rng_state)

    idx_train = idx_all[:N_train]
    idx_test  = idx_all[N_train:]

    prices_train = prices[idx_train, :, :]
    inflow_train = inflow[idx_train, :, :]
    prices_test  = prices[idx_test, :, :]
    inflow_test  = inflow[idx_test, :, :]

    M = N_train

    # Initialize current (single) cut per stage
    cuts_a = [np.zeros(J, dtype=np.float32) for _ in range(T+1)]
    cuts_b = [0.0 for _ in range(T+1)]

    history = {"iter": [], "avg_forward_value": [], "cum_abs_delta": []}
    perstage_rows = []
    cum_abs = 0.0

    # ------------------------ SDDP iterations ------------------------
    for n in range(1, max_iters + 1):
        an = alpha(n)

        # ---- choose minibatch for this iteration ----
        if USE_MINIBATCH and M > 0:
            rng_state = np.random.get_state()
            np.random.seed(SEED_TRAIN + n)  # different batch each iteration
            batch_idx = np.random.choice(M, size=min(BATCH_SIZE, M), replace=False)
            np.random.set_state(rng_state)
        else:
            batch_idx = np.arange(M)

        B = len(batch_idx)

        # Pre-allocate for backward updates (just for batch)
        omega_tm = np.zeros((T, B, J), dtype=np.float32)
        Vhat_tm  = np.zeros((T, B),    dtype=np.float32)
        l_t_mat  = np.zeros((B, T, J), dtype=np.float32)

        path_objs = np.zeros(B, dtype=np.float64)

        for local_i, m_idx in enumerate(batch_idx):
            l_curr = l0.copy()
            for t in range(T):
                nu_t  = inflow_train[m_idx, t, :]
                rho_t = prices_train[m_idx, t, :]
                l_t_mat[local_i, t, :] = l_curr

                obj, pi_next, l_next, omega, theta_val = stage_forward_lp(
                    t=t, l_t=l_curr, nu_t=nu_t, rho_t=rho_t, R=R,
                    pi_min=pi_min, pi_max=pi_max, l_min=l_min, l_max=l_max, g=g,
                    cuts_next_a=[cuts_a[t+1]], cuts_next_b=[cuts_b[t+1]],
                    silent=True, soft_bounds=True, level_penalty=1e6
                )

                # Logging (last iter only, limited paths)
                if (not LOG_LAST_ITER_ONLY) or (n == max_iters):
                    if local_i < LOG_MAX_PATHS:
                        row = {
                            "N": N, "iter": n, "path_id": int(local_i), "t": int(t),
                            "obj": float(obj), "theta": float(theta_val)
                        }
                        perstage_rows.append(row)

                path_objs[local_i] += (gamma ** t) * obj
                Vhat_tm[t, local_i] = obj
                omega_tm[t, local_i, :] = omega.astype(np.float32, copy=False)
                l_curr = l_next

        avg_forward = float(np.mean(path_objs)) if B > 0 else 0.0
        if n == 1:
            cum_abs += 0.0
        else:
            prev = history["avg_forward_value"][-1]
            cum_abs += abs(avg_forward - prev)

        history["iter"].append(n)
        history["avg_forward_value"].append(avg_forward)
        history["cum_abs_delta"].append(cum_abs)

        # ---- Backward updates using the batch averages ----
        for t in reversed(range(T)):
            if B == 0:
                continue
            omega_bar = np.mean(omega_tm[t, :, :], axis=0)  # (J,)
            # adjust_bar = mean_m ( Vhat_t^m - omega_t^m^T * l_t^m )
            adjust_bar = float(
                np.mean(
                    Vhat_tm[t, :] - np.sum(omega_tm[t, :, :] * l_t_mat[:, t, :], axis=1)
                )
            )

            cuts_a[t] = (1.0 - an) * cuts_a[t] + an * omega_bar
            cuts_b[t] = (1.0 - an) * cuts_b[t] + an * adjust_bar

        print(f"  Iter {n:02d}: avg forward value = {avg_forward:,.3f} (cumΔ={cum_abs:,.3f})")

    # ------------------------ Save training logs ------------------------
    df_hist = pd.DataFrame(history)
    df_hist["N"] = N
    df_hist.to_csv(os.path.join(OUTPUT_DIR, f"{out_prefix}_history_N{N}.csv"), index=False)

    df_train_perstage = pd.DataFrame(perstage_rows)
    df_train_perstage.to_csv(os.path.join(OUTPUT_DIR, f"{out_prefix}_train_perstage_N{N}.csv"), index=False)

    # ------------------------ Train/Test evaluation with final cuts ------------------------
    df_eval_train = evaluate_policy_on_paths(
        cuts_a, cuts_b, l0, prices_train, inflow_train,
        R, pi_min, pi_max, l_min, l_max, g,
        gamma=GAMMA, soft_bounds=True, level_penalty=1e6
    )
    df_eval_train.to_csv(os.path.join(OUTPUT_DIR, f"{out_prefix}_train_eval_perpath_N{N}.csv"), index=False)

    df_eval_test = evaluate_policy_on_paths(
        cuts_a, cuts_b, l0, prices_test, inflow_test,
        R, pi_min, pi_max, l_min, l_max, g,
        gamma=GAMMA, soft_bounds=True, level_penalty=1e6
    )
    df_eval_test.to_csv(os.path.join(OUTPUT_DIR, f"{out_prefix}_test_perpath_N{N}.csv"), index=False)

    # In-sample stats (on evaluation rollouts)
    in_mean = float(df_eval_train["total_profit"].mean()) if not df_eval_train.empty else 0.0
    in_std  = float(df_eval_train["total_profit"].std(ddof=1)) if len(df_eval_train) > 1 else 0.0
    first_mean = float(df_eval_train["first_hour_theta"].mean()) if not df_eval_train.empty else 0.0
    first_std  = float(df_eval_train["first_hour_theta"].std(ddof=1)) if len(df_eval_train) > 1 else 0.0

    # Test stats
    test_mean = float(df_eval_test["total_profit"].mean()) if not df_eval_test.empty else 0.0
    test_std  = float(df_eval_test["total_profit"].std(ddof=1)) if len(df_eval_test) > 1 else 0.0

    runtime_sec = time.time() - t_start

    # Save final cuts per stage
    rows = []
    for t in range(T+1):
        rows.append({"stage": t, **{f"a_{j}": float(cuts_a[t][j]) for j in range(J)}, "b": float(cuts_b[t]), "N": N})
    pd.DataFrame(rows).to_csv(os.path.join(OUTPUT_DIR, f"{out_prefix}_cuts_N{N}.csv"), index=False)

    # One-row summary
    df_sum = pd.DataFrame([{
        "N": N, "train_size": N_train, "test_size": N_test, "iters": max_iters,
        "runtime_sec": runtime_sec,
        "final_avg_forward_value": history["avg_forward_value"][-1] if history["avg_forward_value"] else 0.0,
        "cum_abs_delta": history["cum_abs_delta"][-1] if history["cum_abs_delta"] else 0.0,
        "in_sample_mean": in_mean, "in_sample_std": in_std,
        "test_mean": test_mean, "test_std": test_std,
        "first_hour_theta_mean": first_mean, "first_hour_theta_std": first_std
    }])

    return ({"a": cuts_a, "b": cuts_b}, {"history": df_hist}, df_sum)



In [None]:
# ------------------------- Driver: loop over N ----------------------------

if __name__ == "__main__":
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    all_summaries = []
    N = None

    if N is None:
        for N in range(100, 1001, 100):  # 100, 200, ..., 2000
            print(f"\n=== Running SDDP with N={N} samples ===")
            _, _, df_sum = run_sddp_and_export(
                N=N, max_iters=MAX_ITERS, gamma=GAMMA,
                seed_train=SEED_TRAIN, out_prefix=OUT_PREFIX
            )
            all_summaries.append(df_sum)
    else:
        print(f"\n=== Running SDDP with N={N} samples ===")
        _, _, df_sum = run_sddp_and_export(
            N=N, max_iters=MAX_ITERS, gamma=GAMMA,
            seed_train=SEED_TRAIN, out_prefix=OUT_PREFIX
        )
        all_summaries.append(df_sum)

    df_all = pd.concat(all_summaries, ignore_index=True)
    df_all.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_summary_allN.csv"), index=False)
    print("\nDone. Summaries written to", os.path.abspath(OUTPUT_DIR))
