In [None]:
import os, time
import numpy as np
import pandas as pd
import gurobipy as gp
from gurobipy import GRB, quicksum
from Input_generator import generate_input


In [None]:
# ------------------------- Config -------------------------

GAMMA = 1.0
MAX_ITERS = 30
OUT_PREFIX = "adp_alg6"
OUTPUT_DIR = "results_sddp"
SEED_TRAIN = 0

# logging controls (keep RAM/files small)
LOG_LAST_ITER_ONLY = True
LOG_MAX_PATHS = 200

# stepsize α_n
def alpha_schedule(n, alpha0=0.5, power=0.6):
    return alpha0 / (n ** power)


In [None]:


# ------------------------- Small helpers -------------------------

def eval_vbar_linear(a_list, b_list, t_next, x_post):
    """V̄_{t+1}(x_post) = a_{t+1}^T x_post + b_{t+1} ; t_next is the stage index (0-based)."""
    a = np.asarray(a_list[t_next+1], dtype=float).reshape(-1)
    b = float(b_list[t_next+1])
    return float(a @ np.asarray(x_post, float).reshape(-1) + b)

# ------------------------- Single-stage solve -------------------------

def solve_stage(
    t, l_t, rho_t, nu_t, R, l_min, l_max, pi_min, pi_max,
    a_next, b_next, gen_coeff=None, soft_bounds=True, penalty_scale=1e6, silent=True
):
    """
    Solve one forward stage with linearized next value:
      maximize  sum_j rho_t * g_j * pi_j  +  a_{t+1}^T l_{t+1} + b_{t+1}  - penalty
      s.t.      l_{t+1} = l_t + nu_t + R*pi
                (soft) l_min <= l_{t+1} <= l_max

    Returns: (obj, pi*, l_next*, duals λ on balance, theta_est=a^T l_next + b, x_post=l_t + R*pi)
    """
    J = l_t.size
    if gen_coeff is None:
        gen_coeff = np.ones(J, dtype=float)

    with gp.Model(f"stage_t{t}") as m:
        if silent:
            m.Params.OutputFlag = 0
        m.Params.Threads = 1

        pi    = m.addVars(J, lb=pi_min.tolist(), ub=pi_max.tolist(), name="pi")
        lnext = m.addVars(J, lb=-GRB.INFINITY if soft_bounds else l_min.tolist(),
                             ub= GRB.INFINITY if soft_bounds else l_max.tolist(),
                             name="l_next")

        # mass balance
        bal = []
        for j in range(J):
            expr = lnext[j] - (float(l_t[j]) + float(nu_t[j]))
            expr -= quicksum(float(R[j, k]) * pi[k] for k in range(J))
            bal.append(m.addConstr(expr == 0.0, name=f"bal_{j}"))

        # soft bounds
        pen_term = 0.0
        if soft_bounds:
            s_lo  = m.addVars(J, lb=0.0, name="s_lo")
            s_hi  = m.addVars(J, lb=0.0, name="s_hi")
            for j in range(J):
                m.addConstr(lnext[j] + s_lo[j] >= l_min[j], name=f"lb_{j}")
                m.addConstr(lnext[j] - s_hi[j] <= l_max[j], name=f"ub_{j}")
            pen_term = penalty_scale * (quicksum(s_lo[j] for j in range(J)) +
                                        quicksum(s_hi[j] for j in range(J)))

        # objective = revenue + a^T l_next + b - penalty
        rev = quicksum(float(rho_t) * float(gen_coeff[j]) * pi[j] for j in range(J))
        fut = quicksum(float(a_next[j]) * lnext[j] for j in range(J)) + float(b_next)
        m.setObjective(rev + fut - pen_term, GRB.MAXIMIZE)

        m.optimize()
        if m.Status not in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
            raise RuntimeError(f"Stage LP infeasible/unbounded at t={t} (status {m.Status})")

        pi_sol = np.array([pi[j].X for j in range(J)], dtype=float)
        ln_sol = np.array([lnext[j].X for j in range(J)], dtype=float)
        lam    = np.array([c.Pi for c in bal], dtype=float)
        theta_est = float(np.dot(a_next, ln_sol) + b_next)
        x_post = l_t + R.dot(pi_sol)

        return float(m.ObjVal), pi_sol, ln_sol, lam, theta_est, x_post

# ------------------------- Policy evaluation -------------------------

def evaluate_policy(a_t, b_t, l0, prices, inflow, R, l_min, l_max, pi_min, pi_max, g, gamma=1.0):
    """Roll out final (a_t, b_t) on provided paths; return per-path totals & first-hour theta."""
    N_paths, T, J = prices.shape
    totals = np.zeros(N_paths, dtype=float)
    first_theta = np.zeros(N_paths, dtype=float)

    for m in range(N_paths):
        l_curr = l0.copy()
        acc = 0.0
        got_first = False
        for t in range(T):
            obj, pi_next, l_next, lam, theta, x_post = solve_stage(
                t=t, l_t=l_curr, rho_t=float(prices[m, t, 0]),  # price same across j
                nu_t=inflow[m, t, :], R=R, l_min=l_min, l_max=l_max,
                pi_min=pi_min, pi_max=pi_max, a_next=a_t[t+1], b_next=b_t[t+1],
                gen_coeff=g, soft_bounds=True, penalty_scale=1e6, silent=True
            )
            if not got_first:
                first_theta[m] = theta
                got_first = True
            acc += (gamma ** t) * obj
            l_curr = l_next
        totals[m] = acc

    return pd.DataFrame({"path_id": np.arange(N_paths), "total_profit": totals, "first_hour_theta": first_theta})

# ------------------------- Main runner -------------------------

def run_adp_like_sddp(
    N,
    max_iters=MAX_ITERS,
    gamma=GAMMA,
    seed_train=SEED_TRAIN,
    out_prefix=OUT_PREFIX
):
    t0 = time.time()
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # --- generate scenarios ---
    gen = generate_input(N)
    # Unpack (16-tuple path)
    (J, T, l_max, l_min, l0, pi_max, pi_min,
     price_samples, inflow_samples, nu0, rho0, R,
     a_t0, b_t0, l_bar0, alpha_energy) = gen

    # enforce shapes/dtypes
    l_min = np.asarray(l_min, dtype=np.float32)
    l_max = np.asarray(l_max, dtype=np.float32)
    l0    = np.asarray(l0,    dtype=np.float32)
    pi_min= np.asarray(pi_min,dtype=np.float32)
    pi_max= np.asarray(pi_max,dtype=np.float32)
    R     = np.asarray(R,     dtype=np.float32)
    g     = np.asarray(alpha_energy, dtype=np.float32)

    prices = np.asarray(price_samples, dtype=np.float32)  # (N,T)
    inflow = np.asarray(inflow_samples, dtype=np.float32) # (N,T,J)
    # broadcast prices to (N,T,J) with same price for each unit
    prices = np.repeat(prices[:, :, None], J, axis=2)

    # --- split 80/20 ---
    N_total = N
    N_train = int(0.8 * N_total)
    N_test  = N_total - N_train

    idx = np.arange(N_total)
    rng_state = np.random.get_state()
    np.random.seed(seed_train)
    np.random.shuffle(idx)
    np.random.set_state(rng_state)

    id_train = idx[:N_train]
    id_test  = idx[N_train:]

    prices_train = prices[id_train]
    inflow_train = inflow[id_train]
    prices_test  = prices[id_test]
    inflow_test  = inflow[id_test]

    # --- initialize (a_t, b_t) as single cut per stage ---
    a_t = [np.zeros(J, dtype=np.float32) for _ in range(T+1)]
    b_t = [0.0 for _ in range(T+1)]

    # --- logs ---
    history = {"iter": [], "avg_forward_value": [], "cum_abs_delta": []}
    perstage_rows = []  # last-iteration per-stage logs
    cum_abs = 0.0

    # ------------------------ training iterations ------------------------
    for it in range(1, max_iters + 1):
        an = alpha_schedule(it)

        # accumulators for Algorithm-6-style updates
        sum_omega = np.zeros((T, J), dtype=np.float64)
        sum_adjust = np.zeros(T, dtype=np.float64)

        path_totals = np.zeros(N_train, dtype=np.float64)

        for m in range(N_train):
            l_curr = l0.copy()
            for t in range(T):
                # NOTE: we use one price per stage (same across j)
                rho_t = float(prices_train[m, t, 0])
                nu_t  = inflow_train[m, t, :]

                obj, pi_next, l_next, lam, theta, x_post = solve_stage(
                    t=t, l_t=l_curr, rho_t=rho_t, nu_t=nu_t, R=R,
                    l_min=l_min, l_max=l_max, pi_min=pi_min, pi_max=pi_max,
                    a_next=a_t[t+1], b_next=b_t[t+1], gen_coeff=g,
                    soft_bounds=True, penalty_scale=1e6, silent=True
                )

                # Algorithm-6 averages:
                # Vhat_t = obj; adjust = Vhat_t - lam^T l_t
                sum_omega[t, :] += lam
                sum_adjust[t]   += (obj - float(lam @ l_curr))

                # discounted accumulation
                path_totals[m] += (gamma ** t) * obj

                # last-iteration per-stage logging (capped)
                if (not LOG_LAST_ITER_ONLY) or (it == max_iters):
                    if m < LOG_MAX_PATHS:
                        perstage_rows.append({
                            "N": N, "iter": it, "path_id": m, "t": t,
                            "obj": obj, "theta": theta
                        })

                l_curr = l_next

        avg_forward = float(np.mean(path_totals))
        if it == 1:
            cum_abs += 0.0
        else:
            cum_abs += abs(avg_forward - history["avg_forward_value"][-1])

        history["iter"].append(it)
        history["avg_forward_value"].append(avg_forward)
        history["cum_abs_delta"].append(cum_abs)

        # update a_t,b_t with averages across training paths
        omega_bar = (sum_omega / max(N_train, 1)).astype(np.float32)
        adjust_bar = (sum_adjust / max(N_train, 1)).astype(np.float32)

        for t in range(T-1, -1, -1):
            a_t[t] = (1.0 - an) * a_t[t] + an * omega_bar[t, :]
            b_t[t] = (1.0 - an) * b_t[t] + an * float(adjust_bar[t])

        print(f"  Iter {it:02d}: avg forward value = {avg_forward:,.3f} (cumΔ={cum_abs:,.3f})")

    # ------------------------ save training logs ------------------------
    df_hist = pd.DataFrame(history)
    df_hist["N"] = N
    df_hist.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_history_N{N}.csv"), index=False)

    df_train_perstage = pd.DataFrame(perstage_rows)
    df_train_perstage.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_train_perstage_N{N}.csv"), index=False)

    # ------------------------ evaluation ------------------------
    df_eval_train = evaluate_policy(a_t, b_t, l0, prices_train, inflow_train,
                                    R, l_min, l_max, pi_min, pi_max, g, gamma=GAMMA)
    df_eval_train.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_train_eval_perpath_N{N}.csv"), index=False)

    df_eval_test = evaluate_policy(a_t, b_t, l0, prices_test, inflow_test,
                                   R, l_min, l_max, pi_min, pi_max, g, gamma=GAMMA)
    df_eval_test.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_test_perpath_N{N}.csv"), index=False)

    # stats
    in_mean  = float(df_eval_train["total_profit"].mean()) if not df_eval_train.empty else 0.0
    in_std   = float(df_eval_train["total_profit"].std(ddof=1)) if len(df_eval_train) > 1 else 0.0
    test_mean= float(df_eval_test["total_profit"].mean()) if not df_eval_test.empty else 0.0
    test_std = float(df_eval_test["total_profit"].std(ddof=1)) if len(df_eval_test) > 1 else 0.0

    first_mean = float(df_eval_train["first_hour_theta"].mean()) if not df_eval_train.empty else 0.0
    first_std  = float(df_eval_train["first_hour_theta"].std(ddof=1)) if len(df_eval_train) > 1 else 0.0

    # ------------------------ save cuts & summary ------------------------
    rows = []
    for t in range(T+1):
        row = {"stage": t, "N": N, "b": float(b_t[t])}
        for j in range(J):
            row[f"a_{j}"] = float(a_t[t][j])
        rows.append(row)
    pd.DataFrame(rows).to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_cuts_N{N}.csv"), index=False)

    runtime_sec = time.time() - t0
    df_sum = pd.DataFrame([{
        "N": N,
        "train_size": N_train, "test_size": N_test, "iters": MAX_ITERS,
        "runtime_sec": runtime_sec,
        "final_avg_forward_value": history["avg_forward_value"][-1],
        "cum_abs_delta": history["cum_abs_delta"][-1],
        "in_sample_mean": in_mean, "in_sample_std": in_std,
        "test_mean": test_mean, "test_std": test_std,
        "first_hour_theta_mean": first_mean, "first_hour_theta_std": first_std
    }])

    return ({"a": a_t, "b": b_t}, {"history": df_hist}, df_sum)



In [None]:
# ------------------------- Driver -------------------------

if __name__ == "__main__":
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    all_summaries = []
    N = None

    if N is None:
        for N in range(100, 1001, 100):  # 100, 200, ..., 2000
            print(f"\n=== Running ADP-like SDDP with N={N} samples ===")
            _, _, df_sum = run_adp_like_sddp(
                N=N, max_iters=MAX_ITERS, gamma=GAMMA,
                seed_train=SEED_TRAIN, out_prefix=OUT_PREFIX
            )
            all_summaries.append(df_sum)
    else:
        print(f"\n=== Running ADP-like SDDP with N={N} samples ===")
        _, _, df_sum = run_adp_like_sddp(
            N=N, max_iters=MAX_ITERS, gamma=GAMMA,
            seed_train=SEED_TRAIN, out_prefix=OUT_PREFIX
        )
        all_summaries.append(df_sum)

    df_all = pd.concat(all_summaries, ignore_index=True)
    df_all.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_summary_allN.csv"), index=False)
    print("\nDone. Summaries written to", os.path.abspath(OUTPUT_DIR))
