In [12]:
from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd
import yaml

CONFIG_PATH = Path("refinery_config.yaml")




In [13]:


# -------------------------
# Helpers
# -------------------------
def read_refinery_output(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)

    # mode "sans header"
    if not {"country", "year", "refinery_output"}.issubset(df.columns):
        df = pd.read_csv(
            path,
            header=None,
            names=["country", "year", "scenario", "refinery_output"],
        )

    # normalisation noms de colonnes : scenario -> base_scenario
    if "base_scenario" not in df.columns and "scenario" in df.columns:
        df = df.rename(columns={"scenario": "base_scenario"})

    needed = {"country", "base_scenario", "year", "refinery_output"}
    if not needed.issubset(df.columns):
        raise ValueError(f"Colonnes attendues {sorted(needed)} ; trouvé {sorted(df.columns)}")

    df["country"] = df["country"].astype(str).str.strip().str.upper()
    df["base_scenario"] = df["base_scenario"].astype(str).str.strip()
    df["year"] = pd.to_numeric(df["year"], errors="coerce")
    df["refinery_output"] = pd.to_numeric(df["refinery_output"], errors="coerce")
    df = df.dropna(subset=["year", "refinery_output", "base_scenario"])
    df["year"] = df["year"].astype(int)
    df["refinery_output"] = df["refinery_output"].astype(float)
    return df


def assert_non_negative(df: pd.DataFrame, cols: list[str]) -> None:
    cols_present = [c for c in cols if c in df.columns]
    if not cols_present:
        return
    if (df[cols_present] < -1e-9).any(axis=None):
        raise ValueError("Valeurs négatives détectées:\n" + str(df[cols_present].min()))


def check_coverage(df: pd.DataFrame, y0: int, y1: int) -> None:
    for k, g in df.groupby(["country", "base_scenario", "scenario", "unit"]):
        ys = np.sort(g["year"].unique())
        expected_n = (y1 - y0 + 1)
        if ys[0] != y0 or ys[-1] != y1 or len(ys) != expected_n:
            raise ValueError(
                f"Couverture années KO pour {k}: "
                f"{ys[0]}..{ys[-1]} n={len(ys)} (attendu {expected_n})."
            )
def piecewise_linear_annual(years: np.ndarray, anchors: dict[int, float]) -> np.ndarray:
    """
    Interpolation linéaire annuelle à partir d'ancres {year: value}.
    - Extrapolation "flat" en dehors [min_anchor, max_anchor].
    """
    y = years.astype(int)
    xs = np.array(sorted(anchors.keys()), dtype=int)
    vs = np.array([anchors[int(x)] for x in xs], dtype=float)

    out = np.interp(y, xs, vs)  # interp linéaire dans l'intervalle
    out[y < xs.min()] = vs[0]   # flat avant première ancre
    out[y > xs.max()] = vs[-1]  # flat après dernière ancre
    return out

In [14]:

# -------------------------
# Main
# -------------------------
with CONFIG_PATH.open("r", encoding="utf-8") as f:
    CFG = yaml.safe_load(f)

YEAR_START = int(CFG["years"]["start"])
YEAR_END = int(CFG["years"]["end"])
YEARS = np.arange(YEAR_START, YEAR_END + 1)

INPUT_CSV = Path(CFG["input"]["csv_path"])
BASE_YEAR = int(CFG["input"]["base_year"])
BASE_SCENARIO = str(CFG["input"]["scenario"]).strip()

OUT_BASE = Path(CFG["output"]["base_dir"])
SCENARIO_DIRS = {k: OUT_BASE / v for k, v in CFG["output"]["scenario_dirs"].items()}

REF = CFG["refinery"]
INEFF = float(REF.get("inefficiency_share", 0.0))
if INEFF < 0:
    raise ValueError("refinery.inefficiency_share doit être >= 0.")

CONCAWE = REF["scenarios"]

# 1) Lecture input + base 2019
df_in = read_refinery_output(INPUT_CSV)

if CFG["input"]["countries"] == "ALL":
    COUNTRIES = sorted(
        df_in.loc[
            (df_in["year"] == BASE_YEAR) & (df_in["base_scenario"] == BASE_SCENARIO),
            "country"
        ].unique().tolist()
    )
else:
    COUNTRIES = [str(c).strip().upper() for c in CFG["input"]["countries"]]

df_base = df_in.loc[
    (df_in["country"].isin(COUNTRIES)) &
    (df_in["base_scenario"] == BASE_SCENARIO) &
    (df_in["year"] == BASE_YEAR)
].copy()

if df_base.empty:
    raise ValueError(
        f"Aucune base trouvée pour year={BASE_YEAR} et base_scenario={BASE_SCENARIO}."
    )

# une ligne par pays (si doublons)
df_base = (
    df_base.sort_values(["country"])
           .groupby(["country", "base_scenario"], as_index=False)
           .head(1)
)

base_output = {
    (r["country"], r["base_scenario"]): float(r["refinery_output"])
    for _, r in df_base.iterrows()
}
BASE_KEYS = sorted(base_output.keys())


def build_refinery_scenario(scn_name: str, scn_payload: dict) -> pd.DataFrame:
    units = scn_payload["units"]
    unit_names = list(units.keys())

    # --- Total capacity anchors (pour info et contrôle) ---
    # total_cap(y) = somme des utilized_capacity_mton des unités
    anchor_years = sorted(next(iter(units.values()))["utilized_capacity_mton"].keys())
    total_cap_anchors = {}
    for y in anchor_years:
        total_cap_anchors[int(y)] = float(
            sum(float(units[u]["utilized_capacity_mton"][int(y)]) for u in unit_names)
        )

    # --- Unit capacity annual series + shares annual series ---
    cap_annual = {}
    for u in unit_names:
        anchors = {int(y): float(units[u]["utilized_capacity_mton"][int(y)]) for y in anchor_years}
        cap_annual[u] = piecewise_linear_annual(YEARS, anchors)

    total_cap_annual = np.zeros_like(YEARS, dtype=float)
    for u in unit_names:
        total_cap_annual += cap_annual[u]

    # shares = cap_u / total_cap ; si total_cap=0 -> share=0
    shares_annual = {}
    for u in unit_names:
        s = np.zeros_like(YEARS, dtype=float)
        np.divide(cap_annual[u], total_cap_annual, out=s, where=(total_cap_annual > 0))
        shares_annual[u] = s

    # Option : appliquer aussi une baisse "niveau total" via ratio total_cap(t)/total_cap(2024)
    # -> simple et cohérent avec Concawe (si on considère refinery_output comme "feed proxy").
    # 2019..2024: flat car Concawe commence à 2024.
    y_ref = min(anchor_years)  # typiquement 2024
    cap_ref = float(total_cap_anchors[int(y_ref)])
    if cap_ref <= 0:
        raise ValueError(f"Total capacity de référence <=0 pour {scn_name} (year={y_ref}).")

    level_factor = total_cap_annual / cap_ref  # ~1 à 2024
    # avant 2024, on force à 1 (plateau) pour ne pas inventer une tendance pré-2024
    level_factor[YEARS < y_ref] = 1.0
    level_factor = np.clip(level_factor, 0.0, None)

    # --- Build long output ---
    out = []
    for (country, base_sc) in BASE_KEYS:
        base = base_output[(country, base_sc)]
        total_feed = base * level_factor  # proxy "refinery_output(t)"

        for u in unit_names:
            spec_wt = float(units[u]["spec_cons_wt"])
            feed_u = total_feed * shares_annual[u]
            h2_u = feed_u * (spec_wt / 100.0) * (1.0 + INEFF)

            out.append(pd.DataFrame({
                "country": country,
                "base_scenario": base_sc,
                "scenario": scn_name,
                "unit": u,
                "year": YEARS,
                "refinery_output_total": total_feed,
                "unit_share": shares_annual[u],
                "unit_feed": feed_u,
                "spec_cons_wt": spec_wt,
                "h2_demand": h2_u,
            }))

    df = pd.concat(out, ignore_index=True)

    # contrôles
    assert_non_negative(df, ["refinery_output_total", "unit_share", "unit_feed", "h2_demand", "spec_cons_wt"])
    check_coverage(df, YEAR_START, YEAR_END)

    # identité : somme unit_feed == refinery_output_total (tolérance numérique)
    chk = (
        df.groupby(["country", "base_scenario", "scenario", "year"], as_index=False)
          .agg(total_feed=("refinery_output_total", "first"), sum_unit_feed=("unit_feed", "sum"))
    )
    err = (chk["total_feed"] - chk["sum_unit_feed"]).abs().max()
    if err > 1e-6:
        raise ValueError(f"Incohérence allocation feed (max abs err={err}) pour {scn_name}.")

    return df

In [15]:
def write_outputs(scn_name: str, df: pd.DataFrame) -> None:
    out_dir = SCENARIO_DIRS[scn_name]
    out_dir.mkdir(parents=True, exist_ok=True)
    df.to_csv(out_dir / "refinery_eu_demand.csv", index=False)


OUT_BASE.mkdir(parents=True, exist_ok=True)

for scn_name, payload in CONCAWE.items():
    df_s = build_refinery_scenario(scn_name, payload)
    write_outputs(scn_name, df_s)

with (OUT_BASE / "config_effective.yaml").open("w", encoding="utf-8") as f:
    yaml.safe_dump(CFG, f, sort_keys=False, allow_unicode=True)

print("OK: scénarios refinery-eu-demand générés dans", OUT_BASE)

OK: scénarios refinery-eu-demand générés dans /Users/simonbrigode/Desktop/tp_pommes_kraft/data_country/industry/pommes-h2-network/data-pommes/scenario-refinery
