In [None]:
import os, time
import numpy as np
import pandas as pd
import gurobipy as gp
from gurobipy import GRB, quicksum
from Input_generator import generate_input

# ------------------------- Config -------------------------

GAMMA = 1.0
MAX_ITERS = 30
OUT_PREFIX = "adp_uc_alg6"
OUTPUT_DIR = "results_adp_unit"
SEED_TRAIN = 0

# >>>>>>> Set your startup costs here (one per reservoir/unit) <<<<<<<
# If you don't know yet, keep numbers as placeholders and tune later.
STARTUP_COSTS = None  # e.g., set to [200.0, 300.0] for (Upper, Lower)

# logging controls (keep RAM/files small)
LOG_LAST_ITER_ONLY = True
LOG_MAX_PATHS = 200

# stepsize α_n
def alpha_schedule(n, alpha0=0.5, power=0.6):
    return alpha0 / (n ** power)

# ------------------------- Helpers -------------------------

def coerce_vec(x, J):
    if x is None: return np.zeros(J, dtype=float)
    a = np.asarray(x, dtype=float).reshape(-1)
    if a.size == J: return a
    if a.size == 1: return np.full(J, float(a[0]), dtype=float)
    return np.zeros(J, dtype=float)

def coerce_scalar(x):
    if x is None: return 0.0
    arr = np.asarray(x, dtype=float).reshape(-1)
    return float(arr.mean()) if arr.size else 0.0

def eval_vbar_linear(a_list, b_list, t_next, x_post):
    """V̄_{t+1}(x_post) = a_{t+1}^T x_post + b_{t+1}; t_next is 0-based."""
    a = np.asarray(a_list[t_next+1], dtype=float).reshape(-1)
    b = float(b_list[t_next+1])
    return float(a @ np.asarray(x_post, float).reshape(-1) + b)

# ------------------------- Stage solvers -------------------------

def solve_stage_uc_mip(
    t, l_t, u_prev, rho_t, nu_t, R, l_min, l_max, pi_min, pi_max,
    a_next, b_next, gen_coeff, lambda_startup,
    penalty_scale=1e6, silent=True
):
    """
    UC MIP stage with startup costs and soft bounds:
      max  sum_j rho_t * g_j * pi_j  + a_{t+1}^T l_{t+1} + b_{t+1}
           - sum_j λ_j * ν_j  - penalty
      s.t. l_{t+1} = l_t + nu_t + R*pi
           u binaries, startup binaries ν with:
                ν >= u - u_prev,  ν <= u,  ν <= 1 - u_prev
           u * pi_min <= pi <= u * pi_max
           soft bounds on l_{t+1}
    Returns:
      obj, pi*, l_next*, theta_est, x_post, revenue, penalty, startup_cost, u*
    """
    J = l_t.size

    with gp.Model(f"uc_mip_t{t}") as m:
        if silent:
            m.Params.OutputFlag = 0
        m.Params.Threads = 1

        pi    = m.addVars(J, lb=0.0, ub=[float(pi_max[j]) for j in range(J)], name="pi")
        u     = m.addVars(J, vtype=GRB.BINARY, name="u")
        nuBin = m.addVars(J, vtype=GRB.BINARY, name="nu_start")
        lnext = m.addVars(J, lb=-GRB.INFINITY, name="l_next")
        s_lo  = m.addVars(J, lb=0.0, name="s_lo")
        s_hi  = m.addVars(J, lb=0.0, name="s_hi")

        # balance
        for j in range(J):
            expr = lnext[j] - (float(l_t[j]) + float(nu_t[j]))
            expr -= quicksum(float(R[j, k]) * pi[k] for k in range(J))
            m.addConstr(expr == 0.0, name=f"bal_{j}")

        # soft bounds
        for j in range(J):
            m.addConstr(lnext[j] + s_lo[j] >= float(l_min[j]), name=f"lb_{j}")
            m.addConstr(lnext[j] - s_hi[j] <= float(l_max[j]), name=f"ub_{j}")

        # commitment bounds
        for j in range(J):
            m.addConstr(pi[j] >= float(pi_min[j]) * u[j], name=f"minflow_{j}")
            m.addConstr(pi[j] <= float(pi_max[j]) * u[j], name=f"maxflow_{j}")

        # startup logic with u_prev
        for j in range(J):
            up = float(u_prev[j])
            m.addConstr(nuBin[j] >= u[j] - up, name=f"start_ge_{j}")
            m.addConstr(nuBin[j] <= u[j],      name=f"start_cap1_{j}")
            m.addConstr(nuBin[j] <= 1.0 - up,  name=f"start_cap2_{j}")

        # objective
        rev  = quicksum(float(rho_t) * float(gen_coeff[j]) * pi[j] for j in range(J))
        fut  = quicksum(float(a_next[j]) * lnext[j] for j in range(J)) + float(b_next)
        su   = quicksum(float(lambda_startup[j]) * nuBin[j] for j in range(J))
        pen  = penalty_scale * (quicksum(s_lo[j] for j in range(J)) + quicksum(s_hi[j] for j in range(J)))
        m.setObjective(rev + fut - su - pen, GRB.MAXIMIZE)

        m.optimize()
        if m.Status not in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
            raise RuntimeError(f"UC MIP infeasible/unbounded at t={t} (status {m.Status})")

        pi_sol = np.array([pi[j].X for j in range(J)], dtype=float)
        ln_sol = np.array([lnext[j].X for j in range(J)], dtype=float)
        u_sol  = np.array([u[j].X  for j in range(J)], dtype=float)
        nu_sol = np.array([nuBin[j].X for j in range(J)], dtype=float)
        theta_est = float(np.dot(a_next, ln_sol) + b_next)
        x_post = l_t + R.dot(pi_sol)

        revenue = float(rho_t) * float(np.dot(gen_coeff, pi_sol))
        penalty = penalty_scale * float(sum(s_lo[j].X + s_hi[j].X for j in range(J)))
        startup_cost = float(np.dot(lambda_startup, nu_sol))
        obj_val = float(m.ObjVal)

        # round u to 0/1
        u_sol = (u_sol > 0.5).astype(float)

        return obj_val, pi_sol, ln_sol, theta_est, x_post, revenue, penalty, startup_cost, u_sol

def solve_stage_uc_duals_lp(
    t, l_t, u_prev, rho_t, nu_t, R, l_min, l_max, pi_min, pi_max,
    a_next, b_next, gen_coeff, lambda_startup, u_fixed,
    penalty_scale=1e6, silent=True
):
    """
    LP pass with u fixed to u_fixed to get duals on mass-balance constraints.
    Startup constraints honored with u_prev and u_fixed; startup cost included.
    Returns: lam (dual vector on balance constraints), lp_obj (optional).
    """
    J = l_t.size
    with gp.Model(f"uc_duals_lp_t{t}") as m:
        if silent:
            m.Params.OutputFlag = 0
        m.Params.Threads = 1

        pi    = m.addVars(J, lb=0.0, ub=[float(pi_max[j]) for j in range(J)], name="pi")
        lnext = m.addVars(J, lb=-GRB.INFINITY, name="l_next")
        s_lo  = m.addVars(J, lb=0.0, name="s_lo")
        s_hi  = m.addVars(J, lb=0.0, name="s_hi")
        nuBin = m.addVars(J, lb=0.0, ub=1.0, name="nu_start_relaxed")  # continuous [0,1] is fine for LP

        # balance + dual capture
        bal = []
        for j in range(J):
            expr = lnext[j] - (float(l_t[j]) + float(nu_t[j]))
            expr -= quicksum(float(R[j, k]) * pi[k] for k in range(J))
            c = m.addConstr(expr == 0.0, name=f"bal_{j}")
            bal.append(c)

        # soft bounds
        for j in range(J):
            m.addConstr(lnext[j] + s_lo[j] >= float(l_min[j]), name=f"lb_{j}")
            m.addConstr(lnext[j] - s_hi[j] <= float(l_max[j]), name=f"ub_{j}")

        # commitment bounds with u fixed
        for j in range(J):
            uj = float(u_fixed[j])
            m.addConstr(pi[j] >= float(pi_min[j]) * uj, name=f"minflow_fix_{j}")
            m.addConstr(pi[j] <= float(pi_max[j]) * uj, name=f"maxflow_fix_{j}")

        # startup constraints with u_prev, u_fixed
        for j in range(J):
            up = float(u_prev[j]); uj = float(u_fixed[j])
            m.addConstr(nuBin[j] >= uj - up, name=f"start_ge_fix_{j}")
            m.addConstr(nuBin[j] <= uj,      name=f"start_cap1_fix_{j}")
            m.addConstr(nuBin[j] <= 1.0 - up,name=f"start_cap2_fix_{j}")

        # objective
        rev  = quicksum(float(rho_t) * float(gen_coeff[j]) * pi[j] for j in range(J))
        fut  = quicksum(float(a_next[j]) * lnext[j] for j in range(J)) + float(b_next)
        su   = quicksum(float(lambda_startup[j]) * nuBin[j] for j in range(J))
        pen  = penalty_scale * (quicksum(s_lo[j] for j in range(J)) + quicksum(s_hi[j] for j in range(J)))
        m.setObjective(rev + fut - su - pen, GRB.MAXIMIZE)

        m.optimize()
        if m.Status not in (GRB.OPTIMAL, GRB.SUBOPTIMAL):
            raise RuntimeError(f"UC LP (fixed u) infeasible/unbounded at t={t} (status {m.Status})")

        lam = np.array([c.Pi for c in bal], dtype=float)
        return lam, float(m.ObjVal)

# ------------------------- Policy evaluation (UC) -------------------------

def evaluate_policy_uc(a_t, b_t, l0, prices, inflow, R, l_min, l_max, pi_min, pi_max, g, lambda_startup, gamma=1.0):
    """Roll out given (a_t, b_t) with UC MIP stage solver over provided paths (with startups)."""
    N_paths, T, J = prices.shape
    totals = np.zeros(N_paths, dtype=float)
    first_theta = np.zeros(N_paths, dtype=float)

    for m in range(N_paths):
        l_curr = l0.copy()
        u_prev = np.zeros(J, dtype=float)  # assume all-off initially; change if you have a known u0
        acc = 0.0
        got_first = False
        for t in range(T):
            obj, pi_next, l_next, theta, x_post, revenue, penalty, startup_cost, u_star = solve_stage_uc_mip(
                t=t, l_t=l_curr, u_prev=u_prev, rho_t=float(prices[m, t, 0]),
                nu_t=inflow[m, t, :], R=R, l_min=l_min, l_max=l_max,
                pi_min=pi_min, pi_max=pi_max, a_next=a_t[t+1], b_next=b_t[t+1],
                gen_coeff=g, lambda_startup=lambda_startup, penalty_scale=1e6, silent=True
            )
            if not got_first:
                first_theta[m] = theta
                got_first = True
            acc += (gamma ** t) * obj
            l_curr = l_next
            u_prev = u_star  # carry commitment forward
        totals[m] = acc

    return pd.DataFrame({"path_id": np.arange(N_paths), "total_profit": totals, "first_hour_theta": first_theta})

# ------------------------- Main runner -------------------------

def run_uc_like_adp_with_duals(
    N,
    max_iters=MAX_ITERS,
    gamma=GAMMA,
    seed_train=SEED_TRAIN,
    out_prefix=OUT_PREFIX
):
    t0 = time.time()
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # --- generate scenarios (your 16-tuple) ---
    (J, T, l_max, l_min, l0, pi_max, pi_min,
     price_samples, inflow_samples, nu0, rho0, R,
     a_t0, b_t0, l_bar0, alpha_energy) = generate_input(N)

    # Shapes/dtypes
    l_min = np.asarray(l_min, dtype=np.float32)
    l_max = np.asarray(l_max, dtype=np.float32)
    l0    = np.asarray(l0,    dtype=np.float32)
    pi_min= np.asarray(pi_min,dtype=np.float32)
    pi_max= np.asarray(pi_max,dtype=np.float32)
    R     = np.asarray(R,     dtype=np.float32)
    g     = np.asarray(alpha_energy, dtype=np.float32)

    prices = np.asarray(price_samples, dtype=np.float32)  # (N,T)
    inflow = np.asarray(inflow_samples, dtype=np.float32) # (N,T,J)
    prices = np.repeat(prices[:, :, None], J, axis=2)     # (N,T,J) same price per unit

    # Startup costs vector
    if STARTUP_COSTS is None:
        lambda_startup = np.zeros(J, dtype=np.float32)           # <- change if you want nonzero by default
    else:
        ls = np.asarray(STARTUP_COSTS, dtype=np.float32).reshape(-1)
        if ls.size == 1:
            lambda_startup = np.full(J, float(ls[0]), dtype=np.float32)
        elif ls.size >= J:
            lambda_startup = ls[:J].astype(np.float32)
        else:
            lambda_startup = np.pad(ls, (0, J-ls.size), constant_values=ls[-1]).astype(np.float32)

    # --- split 80/20 like ADP script ---
    N_total = N
    N_train = int(0.8 * N_total)
    N_test  = N_total - N_train

    idx = np.arange(N_total)
    rng_state = np.random.get_state()
    np.random.seed(seed_train)
    np.random.shuffle(idx)
    np.random.set_state(rng_state)

    id_train = idx[:N_train]
    id_test  = idx[N_train:]

    prices_train = prices[id_train]
    inflow_train = inflow[id_train]
    prices_test  = prices[id_test]
    inflow_test  = inflow[id_test]

    # --- initialize a_t, b_t from generator (coerced), length T+1 ---
    a_t = [np.zeros(J, dtype=np.float32) for _ in range(T+1)]
    b_t = [0.0 for _ in range(T+1)]
    if isinstance(a_t0, (list, tuple)):
        for t in range(min(T+1, len(a_t0))):
            a_t[t] = coerce_vec(a_t0[t], J).astype(np.float32)
    if isinstance(b_t0, (list, tuple)):
        for t in range(min(T+1, len(b_t0))):
            b_t[t] = coerce_scalar(b_t0[t])

    # --- logs (same keys as ADP script) ---
    history = {"iter": [], "avg_forward_value": [], "cum_abs_delta": []}
    perstage_rows = []  # last-iteration per-stage logs
    cum_abs = 0.0

    # ------------------------ training iterations (MIP + LP for duals) ------------------------
    for it in range(1, max_iters + 1):
        an = alpha_schedule(it)

        # accumulators for Algorithm-6-style updates
        sum_omega  = np.zeros((T, J), dtype=np.float64)
        sum_adjust = np.zeros(T, dtype=np.float64)

        path_totals = np.zeros(N_train, dtype=np.float64)

        for m in range(N_train):
            l_curr = l0.copy()
            u_prev = np.zeros(J, dtype=float)  # assume all-off initially; change if you know u0
            for t in range(T):
                rho_t = float(prices_train[m, t, 0])
                nu_t  = inflow_train[m, t, :]

                # 1) UC MIP forward pass (with startup costs)
                (obj, pi_next, l_next, theta, x_post, revenue,
                 penalty, startup_cost, u_star) = solve_stage_uc_mip(
                    t=t, l_t=l_curr, u_prev=u_prev, rho_t=rho_t, nu_t=nu_t, R=R,
                    l_min=l_min, l_max=l_max, pi_min=pi_min, pi_max=pi_max,
                    a_next=a_t[t+1], b_next=b_t[t+1], gen_coeff=g,
                    lambda_startup=lambda_startup, penalty_scale=1e6, silent=True
                )

                # 2) LP pass for duals with u fixed = u_star (and startup constraints respected)
                lam, _ = solve_stage_uc_duals_lp(
                    t=t, l_t=l_curr, u_prev=u_prev, rho_t=rho_t, nu_t=nu_t, R=R,
                    l_min=l_min, l_max=l_max, pi_min=pi_min, pi_max=pi_max,
                    a_next=a_t[t+1], b_next=b_t[t+1], gen_coeff=g,
                    lambda_startup=lambda_startup, u_fixed=u_star, penalty_scale=1e6, silent=True
                )

                # Algorithm-6 averages:
                sum_omega[t, :] += lam
                sum_adjust[t]   += (obj - float(lam @ l_curr))

                # discounted accumulation (forward value)
                path_totals[m] += (gamma ** t) * obj

                # last-iteration per-stage logging (capped)
                if (not LOG_LAST_ITER_ONLY) or (it == MAX_ITERS):
                    if m < LOG_MAX_PATHS:
                        perstage_rows.append({
                            "N": N, "iter": it, "path_id": m, "t": t,
                            "obj": obj, "theta": theta
                        })

                l_curr = l_next
                u_prev = u_star  # carry commitment forward

        avg_forward = float(np.mean(path_totals))
        if it == 1:
            cum_abs += 0.0
        else:
            cum_abs += abs(avg_forward - history["avg_forward_value"][-1])

        history["iter"].append(it)
        history["avg_forward_value"].append(avg_forward)
        history["cum_abs_delta"].append(cum_abs)

        # 3) Update a_t,b_t with averages across training paths (backward style)
        omega_bar  = (sum_omega / max(N_train, 1)).astype(np.float32)
        adjust_bar = (sum_adjust / max(N_train, 1)).astype(np.float32)
        for t in range(T-1, -1, -1):
            a_t[t] = (1.0 - an) * a_t[t] + an * omega_bar[t, :]
            b_t[t] = (1.0 - an) * b_t[t] + an * float(adjust_bar[t])

        print(f"  Iter {it:02d}: avg forward value = {avg_forward:,.3f} (cumΔ={cum_abs:,.3f})")

    # ------------------------ save training logs ------------------------
    df_hist = pd.DataFrame(history)
    df_hist["N"] = N
    df_hist.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_history_N{N}.csv"), index=False)

    df_train_perstage = pd.DataFrame(perstage_rows)
    df_train_perstage.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_train_perstage_N{N}.csv"), index=False)

    # ------------------------ evaluation ------------------------
    df_eval_train = evaluate_policy_uc(a_t, b_t, l0, prices_train, inflow_train,
                                       R, l_min, l_max, pi_min, pi_max, g, lambda_startup, gamma=GAMMA)
    df_eval_train.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_train_eval_perpath_N{N}.csv"), index=False)

    df_eval_test = evaluate_policy_uc(a_t, b_t, l0, prices_test, inflow_test,
                                      R, l_min, l_max, pi_min, pi_max, g, lambda_startup, gamma=GAMMA)
    df_eval_test.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_test_eval_perpath_N{N}.csv"), index=False)

    # stats
    in_mean  = float(df_eval_train["total_profit"].mean()) if not df_eval_train.empty else 0.0
    in_std   = float(df_eval_train["total_profit"].std(ddof=1)) if len(df_eval_train) > 1 else 0.0
    test_mean= float(df_eval_test["total_profit"].mean()) if not df_eval_test.empty else 0.0
    test_std = float(df_eval_test["total_profit"].std(ddof=1)) if len(df_eval_test) > 1 else 0.0

    first_mean = float(df_eval_train["first_hour_theta"].mean()) if not df_eval_train.empty else 0.0
    first_std  = float(df_eval_train["first_hour_theta"].std(ddof=1)) if len(df_eval_train) > 1 else 0.0

    # ------------------------ save cuts & summary ------------------------
    rows = []
    for t in range(T+1):
        row = {"stage": t, "N": N, "b": float(b_t[t])}
        for j in range(J):
            row[f"a_{j}"] = float(a_t[t][j])
        rows.append(row)
    pd.DataFrame(rows).to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_cuts_N{N}.csv"), index=False)

    runtime_sec = time.time() - t0
    df_sum = pd.DataFrame([{
        "N": N,
        "train_size": N_train, "test_size": N_test, "iters": MAX_ITERS,
        "runtime_sec": runtime_sec,
        "final_avg_forward_value": history["avg_forward_value"][-1],
        "cum_abs_delta": history["cum_abs_delta"][-1],
        "in_sample_mean": in_mean, "in_sample_std": in_std,
        "test_mean": test_mean, "test_std": test_std,
        "first_hour_theta_mean": first_mean, "first_hour_theta_std": first_std
    }])

    return ({"a": a_t, "b": b_t}, {"history": df_hist}, df_sum)

# ------------------------- Driver -------------------------

if __name__ == "__main__":
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    all_summaries = []
    N = None

    if N is None:
        for N in range(100, 1001, 100):  # 100, 200, ..., 1000
            print(f"\n=== Running UC Algorithm-6 (MIP+LP duals, with startups) with N={N} samples ===")
            _, _, df_sum = run_uc_like_adp_with_duals(
                N=N, max_iters=MAX_ITERS, gamma=GAMMA,
                seed_train=SEED_TRAIN, out_prefix=OUT_PREFIX
            )
            all_summaries.append(df_sum)
    else:
        print(f"\n=== Running UC Algorithm-6 (MIP+LP duals, with startups) with N={N} samples ===")
        _, _, df_sum = run_uc_like_adp_with_duals(
            N=N, max_iters=MAX_ITERS, gamma=GAMMA,
            seed_train=SEED_TRAIN, out_prefix=OUT_PREFIX
        )
        all_summaries.append(df_sum)

    df_all = pd.concat(all_summaries, ignore_index=True)
    df_all.to_csv(os.path.join(OUTPUT_DIR, f"{OUT_PREFIX}_summary_allN.csv"), index=False)
    print("\nDone. Summaries written to", os.path.abspath(OUTPUT_DIR))
