In [1]:
import os, time
import numpy as np
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
# Load data (assumes you've generated these already)
from Input_generator import generate_input


In [2]:
# ===== learning rate =====
def alpha_schedule(n, alpha0=0.5, power=0.6):
    return alpha0 / (n ** power)

# ===== helpers to coerce shapes =====
def coerce_vec(x, J):
    if x is None:
        return np.zeros(J)
    x = np.asarray(x, dtype=float).reshape(-1)
    if x.size == J: return x
    if x.size == 1: return np.full(J, float(x[0]))
    return np.zeros(J)

# ensure b_t is a scalar per stage
def coerce_scalar(x):
    """Return a scalar float from possibly vector/array b_t; default 0.0."""
    if x is None:
        return 0.0
    arr = np.asarray(x, dtype=float).reshape(-1)
    if arr.size == 0:
        return 0.0
    # If your training uses a true scalar, this just passes it through.
    # If legacy code produced a vector, we take the mean to collapse consistently.
    return float(arr.mean())

# evaluate linear value function at next stage (t_next is 1-based for V̄_{t_next+1})
def eval_vbar_linear(a_list, b_list, t_next, x_post):
    """
    a_list: list length >= t_next+1, each a_t is shape (J,)
    b_list: list of scalars (float) per stage
    t_next: int, e.g. 1 for V̄2
    x_post: shape (J,), post-decision state
    """
    a = np.asarray(a_list[t_next], dtype=float).reshape(-1)   # a_{t_next+1}; since a_list has length T+1 indexed 0..T
    b = float(b_list[t_next])                                 # scalar intercept
    return float(a @ np.asarray(x_post, float).reshape(-1) + b)


## Step 1

- a) Sample the post-decision value

In [3]:
# ===== single-stage lookahead with linearized future =====
def solve_stage_step1a_gurobi_report(
    t, n, l_t, rho_t, nu_t, R, l_min, l_max, pi_min, pi_max,
    a_next_prev, Vhat_next_prev_scalar, l_next_prev,
    gen_coeff=None, verbose=False, soft_bounds=True, bound_penalty_scale=1000.0
):
    J = l_t.shape[0]
    if gen_coeff is None:
        gen_coeff = np.ones(J)

    m = gp.Model(f"stage_{n}_{t}")
    if not verbose:
        m.Params.OutputFlag = 0

    pi = m.addVars(J, lb=pi_min.tolist(), ub=pi_max.tolist(), name="pi")

    if soft_bounds:
        lnext = m.addVars(J, lb=-GRB.INFINITY, name="l_next")
        s_lo  = m.addVars(J, lb=0.0, name="s_lo")
        s_hi  = m.addVars(J, lb=0.0, name="s_hi")
    else:
        lnext = m.addVars(J, lb=l_min.tolist(), ub=l_max.tolist(), name="l_next")
        s_lo = s_hi = None

    dyn_constr = []
    for j in range(J):
        expr = gp.LinExpr(l_t[j] + nu_t[j])
        for k in range(J):
            c = R[j, k]
            if c != 0.0:
                expr.addTerms(c, pi[k])
        dyn_constr.append(m.addConstr(lnext[j] == expr, name=f"dyn[{j}]"))

    if soft_bounds:
        for j in range(J):
            m.addConstr(lnext[j] + s_lo[j] >= l_min[j], name=f"min[{j}]")
            m.addConstr(lnext[j] - s_hi[j] <= l_max[j], name=f"max[{j}]")

    # decision objective: revenue + linearized future - slack penalty
    rev = gp.LinExpr()
    for j in range(J):
        if gen_coeff[j] != 0.0:
            rev.addTerms(rho_t * gen_coeff[j], pi[j])

    fut = gp.LinExpr()
    for j in range(J):
        a = a_next_prev[j]
        if a != 0.0:
            fut.addTerms(a, lnext[j])

    if soft_bounds:
        pen_coeff = bound_penalty_scale * (abs(rho_t) + 1.0)
        pen_term = pen_coeff * (gp.quicksum(s_lo[j] for j in range(J)) +
                                gp.quicksum(s_hi[j] for j in range(J)))
        m.setObjective(rev + fut - pen_term, GRB.MAXIMIZE)
    else:
        pen_coeff = 0.0
        m.setObjective(rev + fut, GRB.MAXIMIZE)

    m.optimize()
    if m.status != GRB.OPTIMAL:
        raise RuntimeError(f"Gurobi not OPTIMAL at (n={n}, t={t}). Status: {m.status}")

    pi_opt    = np.array([pi[j].X for j in range(J)])
    lnext_opt = np.array([lnext[j].X for j in range(J)])
    lam       = np.array([dyn_constr[j].Pi for j in range(J)])


    rev_stage = float(sum(rho_t * gen_coeff[j] * pi_opt[j] for j in range(J)))
    penalty_stage = 0.0
    if soft_bounds:
        s_lo_sum = float(sum(s_lo[j].X for j in range(J)))
        s_hi_sum = float(sum(s_hi[j].X for j in range(J)))
        penalty_stage = pen_coeff * (s_lo_sum + s_hi_sum)

    obj_stage = rev_stage - penalty_stage

    # --- post-decision state (AFTER decision, BEFORE inflow): x̄_t = l_t + R π_t
    xpost = l_t + R.dot(pi_opt)

    return pi_opt, lnext_opt, lam, rev_stage, penalty_stage, obj_stage, xpost




- c) Determine the next pre-decision state

In [4]:
# ===== main ADP exports =====
def run_adp_exports_sddp_format(
    N, out_dir,
    alpha0=0.5, power=0.6, verbose=False
):
    # Load inputs for this N
    (J, T, l_max, l_min, l0, pi_max, pi_min,
     price_samples, inflow_samples, nu0, rho0, R,
     a_t_init, b_t_init, l_bar, alpha_energy) = generate_input(N)

    l_min = np.asarray(l_min, dtype=float); l_max = np.asarray(l_max, dtype=float)
    l0    = np.asarray(l0, dtype=float)
    pi_min = np.asarray(pi_min, dtype=float); pi_max = np.asarray(pi_max, dtype=float)
    R = np.asarray(R, dtype=float)
    price_samples = np.asarray(price_samples, dtype=float)
    inflow_samples = np.asarray(inflow_samples, dtype=float)
    gen_coeff = np.asarray(alpha_energy if alpha_energy is not None else np.ones(J), dtype=float)

    # supergradients a_t : length T+1 (index 0..T). Keep a_0 unused; a_{t} used with stage t.
    a_t = [coerce_vec(a_t_init[t] if t < len(a_t_init) else None, J) for t in range(T+1)]

    ### FIX/NEW: scalar intercepts b_t per stage (length T+1). Use 0.0 if not trained yet.
    b_t = [coerce_scalar(b_t_init[t] if t < len(b_t_init) else None) for t in range(T+1)]

    # baselines (not exported)
    Vhat_prev  = np.zeros(T+1)
    l_prev_pre = np.zeros((T+1, J)); l_prev_pre[0] = l0.copy()

    first_hour_rows = []
    paths_rows = []
    obj_totals = []

    os.makedirs(out_dir, exist_ok=True)
    t0 = time.perf_counter()

    for n in range(1, N+1):
        alpha_n = alpha_schedule(n, alpha0=alpha0, power=power)
        price_path  = price_samples[n-1]
        inflow_path = inflow_samples[n-1]

        l_t = l0.copy()

        # Prepare the first-hour row; fill vbar AFTER solving t=0.  ### MOVED
        fh_row = {
            "path": n-1,
            "theta1": 0.0,                    # keep as-is unless you add cuts
            "vbar2_at_xpost1": 0.0,           # will be overwritten after t==0 solve
            "price1": float(price_path[0]),
        }
        for j in range(J):
            fh_row[f"inflow1_res{j+1}"] = float(inflow_path[0, j])

        obj_total = 0.0
        for t in range(T):
            rho_t = float(price_path[t])
            nu_t  = inflow_path[t].astype(float)   # inflow (exogenous)

            a_next_prev = a_t[t+1]                 # this is a_{t+1}
            Vhat_next_prev_scalar = Vhat_prev[t+1] # unused but kept for interface parity
            l_next_prev = l_prev_pre[t+1]          # unused placeholder

            (pi_opt, lnext_opt, lam,
             rev_stage, penalty_stage, obj_stage, xpost) = solve_stage_step1a_gurobi_report(
                t, n, l_t, rho_t, nu_t, R, l_min, l_max, pi_min, pi_max,
                a_next_prev, Vhat_next_prev_scalar, l_next_prev,
                gen_coeff=gen_coeff, verbose=verbose, soft_bounds=True
            )

            # === NEW: after solving stage 0, compute V̄2 at x̄1 and store it ===
            if t == 0:
                # Evaluate V̄2(x̄1) = a_2^T x̄1 + b_2
                vbar2 = eval_vbar_linear(a_t, b_t, t_next=1, x_post=xpost)
                fh_row["vbar2_at_xpost1"] = float(vbar2)

            row = {
                "path": n-1,
                "stage": t+1,
                "price": rho_t,
                "obj_stage": obj_stage,
                "rev_stage": rev_stage,
                "penalty_stage": penalty_stage,
                "util_alg": obj_stage,  # same for export
                "theta": 0.0,
                "v_est": 0.0,
            }
            for j in range(J):
                row[f"pi{j+1}"]     = float(pi_opt[j])
                row[f"l{j+1}"]      = float(l_t[j])
                row[f"xpost{j+1}"]  = float(xpost[j])
                row[f"inflow{j+1}"] = float(nu_t[j])

            obj_total += obj_stage
            paths_rows.append(row)

            # Step 1(b) stochastic gradient update using multipliers (skip at t=0 if you want)
            if t > 0:
                a_t[t] = (1.0 - alpha_n) * a_t[t] + alpha_n * lam
                # If you also learn an intercept, update b_t[t] here using your chosen rule.

            # Step 1(c) move to next state
            l_t = lnext_opt.copy()

        # Now that vbar2 is filled, append first-hour row  ### MOVED
        first_hour_rows.append(fh_row)

        # add total row
        paths_rows.append({"path": n-1, "stage": "total", "obj_total": obj_total})
        obj_totals.append(obj_total)

        # (Step 2 baselines not used for export)
        Vhat_prev[:] = 0.0
        l_prev_pre[:] = 0.0

    runtime_hours = (time.perf_counter() - t0) / 3600.0

    # write per-N CSVs (all into the SAME folder)
    pd.DataFrame(first_hour_rows).to_csv(os.path.join(out_dir, f"run_N{N}_first_hour.csv"), index=False)

    base_cols = ["path","stage","price","obj_stage","rev_stage","penalty_stage","util_alg","theta","v_est"]
    per_res_cols = []
    for j in range(J):
        per_res_cols += [f"pi{j+1}", f"l{j+1}", f"xpost{j+1}", f"inflow{j+1}"]
    ordered_cols = base_cols + per_res_cols + ["obj_total"]
    df_paths = pd.DataFrame(paths_rows).reindex(columns=ordered_cols)
    df_paths.to_csv(os.path.join(out_dir, f"run_N{N}_paths.csv"), index=False)

    obj_arr = np.array(obj_totals, dtype=float)
    df_summary = pd.DataFrame([{
        "N": N,
        "paths": N,
        "avg_obj_total": obj_arr.mean() if obj_arr.size else 0.0,
        "p10_obj_total": float(np.percentile(obj_arr, 10)) if obj_arr.size else 0.0,
        "p50_obj_total": float(np.percentile(obj_arr, 50)) if obj_arr.size else 0.0,
        "p90_obj_total": float(np.percentile(obj_arr, 90)) if obj_arr.size else 0.0,
        "runtime_hours": runtime_hours
    }])
    df_summary.to_csv(os.path.join(out_dir, f"run_N{N}_summary.csv"), index=False)

    return df_summary.iloc[0].to_dict()

# ---------- sweep N=100..1000 and aggregate ----------
def run_all_N_sddp_format(out_dir="adp_results"):
    os.makedirs(out_dir, exist_ok=True)
    all_rows = []
    for N in range(100, 1001, 100):
        row = run_adp_exports_sddp_format(N, out_dir=out_dir)
        all_rows.append(row)
    pd.DataFrame(all_rows).to_csv(os.path.join(out_dir, "run_summary_allN.csv"), index=False)




In [5]:
if __name__ == "__main__":
    run_all_N_sddp_format(out_dir="adp_results")
    print("Done. All files saved in: adp_results/")

Set parameter Username
Academic license - for non-commercial use only - expires 2026-03-26
Done. All files saved in: adp_results/
