In [None]:
from __future__ import annotations

from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import pandas as pd

from src.experiment import run_scheduler_experiment
from config import *

# =====================  FILEPATHS  =====================
CY_carbon_csv_path = "./CI_data/CY_2024_5_minute.csv"
UK_carbon_csv_path = "./CI_data/GB_2024_5_minute.csv"
BRA_carbon_csv_path = "./CI_data/BR_2024_5_minute.csv"
AU_carbon_csv_path = "./CI_data/AU_2024_5_minute.csv"

carbon_csv_path = UK_carbon_csv_path

# =====================  SCHEDULERS  =====================

greedy_defer_order_scheduler = GreedyLowCarbonDeferScheduler(
    name="Greedy Delay",
    search_hours=SEARCH_HOURS,
    candidate_step_slots=candidate_step_slots,
    show_progress=True,
)

scheds_to_run = [greedy_defer_order_scheduler]

# =====================  VALUES REGIME  =====================

VALUES_REGIME = "baseline"  # or "local"


def get_total_emissions_gco2(result: Any) -> float:
    return float(result.carbon_total_gco2)


def maybe_get(result: Any, key: str) -> Optional[float]:
    if hasattr(result, key):
        v = getattr(result, key)
        try:
            return float(v) if v is not None else None
        except Exception:
            return None

    s = result.summary()
    if key in s and s[key] is not None:
        try:
            return float(s[key])
        except Exception:
            return None

    return None


def select_values_paths(regime: str) -> Dict[str, str]:
    regime = regime.lower().strip()
    if regime == "baseline":
        return {"GNN": baseline_GNN_values_csv_path, "RF": baseline_RF_values_csv_path}
    if regime == "local":
        return {"GNN": local_GNN_values_csv_path, "RF": local_RF_values_csv_path}
    raise ValueError("VALUES_REGIME must be 'baseline' or 'local'.")


def cv_pct_to_variance_factor(cv_pct: float, decimals: int = 2) -> float:
    vf = 1.0 + (cv_pct / 100.0)
    return round(vf, decimals)


def relative_reduction_pct(reference: float, candidate: float) -> float:
    """(reference - candidate) / reference * 100"""
    if reference == 0:
        return float("nan")
    return (reference - candidate) / reference * 100.0


def run_once(
    scheduler,
    *,
    vf: float,
    values_csv_path: str,
) -> Tuple[float, Dict[str, Any]]:
    res = run_scheduler_experiment(
        name=f"{scheduler.name}",
        label="GNN",
        scheduler=scheduler,
        carbon_csv_path=carbon_csv_path,
        values_csv_path=values_csv_path,
        query_limit=NUM_QUERIES,
        use_lifecycle_ci=USE_LIFECYCLE,
        start_date=START_DATE,
        end_date=END_DATE,
        start_time=TIME,
        end_time=TIME,
        upsample_to_sec=SLOT_SEC,
        power_kw=POWER_CONSUMPTION,
        oracle=False,
        add_variance=vf,
    )

    emissions = get_total_emissions_gco2(res)
    row = {
        "scheduler": scheduler.name,
        "values_regime": VALUES_REGIME,
        "ci_csv": Path(carbon_csv_path).name,
        "model": "GNN",
        "variance_factor": vf,
        "margin_pct": (vf - 1.0) * 100.0,
        "total_emissions_gco2": emissions,
        "makespan_seconds": maybe_get(res, "makespan_seconds"),
        "carbon_per_query_gco2": maybe_get(res, "carbon_per_query_gco2"),
    }
    return emissions, row


def main() -> pd.DataFrame:
    values_paths = select_values_paths(VALUES_REGIME)

    # --- Higher-variance regime P99 CV (%) from the table
    P99_CV_HIGH_VARIANCE_PCT = 98.652
    
    vf_target = cv_pct_to_variance_factor(P99_CV_HIGH_VARIANCE_PCT, decimals=2)  # -> 1.99
    vf_point = 1.00

    rows: List[Dict[str, Any]] = []

    for scheduler in scheds_to_run:
        e_point, row_point = run_once(
            scheduler,
            vf=vf_point,
            values_csv_path=values_paths["GNN"],
        )
        rows.append(row_point)

        e_margin, row_margin = run_once(
            scheduler,
            vf=vf_target,
            values_csv_path=values_paths["GNN"],
        )
        rows.append(row_margin)

        yyy = relative_reduction_pct(e_point, e_margin)

        print(
            f"[{scheduler.name} | GNN | {Path(carbon_csv_path).name} | {VALUES_REGIME}] "
            f"vf={vf_target:.2f} (P99 CV {P99_CV_HIGH_VARIANCE_PCT:.3f}%) yields "
            f"{yyy:.2f}% lower emissions than point forecasts (vf=1.00)."
        )

    df = pd.DataFrame(rows)
    return df


results_df = main()
