In [1]:
from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd
import yaml

In [3]:
CONFIG_PATH = Path("steel_config.yaml")

with CONFIG_PATH.open("r", encoding="utf-8") as f:
    CFG = yaml.safe_load(f)

CFG.keys()

dict_keys(['years', 'input', 'output', 'steel_cagr', 'scenarios', 'demand_factors', 'recycling'])

In [4]:
YEAR_START = int(CFG["years"]["start"])
YEAR_END = int(CFG["years"]["end"])
YEARS = np.arange(YEAR_START, YEAR_END + 1)

INPUT_CSV = Path(CFG["input"]["csv_path"])
COUNTRIES = list(CFG["input"]["countries"])
BASE_YEAR = int(CFG["input"]["base_year"])

OUT_BASE = Path(CFG["output"]["base_dir"])
SCENARIO_DIRS = {
    sc: OUT_BASE / rel
    for sc, rel in CFG["output"]["scenario_dirs"].items()
}

STEEL_CAGR = CFG.get("steel_cagr", {})

SC_PARAMS = CFG["scenarios"]

DEMAND_CFG = CFG.get("demand_factors", {})
DEMAND_ENABLED = bool(DEMAND_CFG.get("enabled", False))

H2_KG_PER_T = float(DEMAND_CFG.get("h2_kg_per_t_steel", 0.0))
CH4_KG_PER_T = float(DEMAND_CFG.get("ch4_kg_per_t_steel", 0.0))

H2_COL = str(DEMAND_CFG.get("h2_column_name", "h2_demand"))
CH4_COL = str(DEMAND_CFG.get("ch4_column_name", "ch4_demand"))

REC_CFG = CFG.get("recycling", {})
REC_ENABLED = bool(REC_CFG.get("enabled", True))
REC_TARGET_2050 = float(REC_CFG.get("target_share_2050", 0.50))
REC_RAMP_START = int(REC_CFG.get("ramp_start", BASE_YEAR))
REC_RAMP_END = int(REC_CFG.get("ramp_end", YEAR_END))

YEAR_START, YEAR_END, COUNTRIES, INPUT_CSV, OUT_BASE

(2019,
 2050,
 ['A', 'L', 'L'],
 PosixPath('/Users/simonbrigode/Desktop/tp_pommes_kraft/data_country/industry/pommes-h2-network/industry/data/raw/iron_and_steel.csv'),
 PosixPath('/Users/simonbrigode/Desktop/tp_pommes_kraft/data_country/industry/pommes-h2-network/data-pommes/scenarios-steel'))

In [5]:
def add_fuel_demands(df: pd.DataFrame) -> pd.DataFrame:
    """
    Ajoute des colonnes de demande (kg) dérivées des productions DRI (en tonnes acier).

    Convention :
    - h2_demand  = dri_hydrogen_production * h2_kg_per_t_steel
    - ch4_demand = dri_network_gas_production * ch4_kg_per_t_steel

    Les facteurs viennent du YAML.
    """
    if not DEMAND_ENABLED:
        return df

    out = df.copy()

    # Sécurité : colonnes attendues
    for col in ["dri_hydrogen_production", "dri_network_gas_production"]:
        if col not in out.columns:
            raise ValueError(f"Colonne manquante pour calculer la demande : {col}")

    out[H2_COL] = out["dri_hydrogen_production"] * H2_KG_PER_T
    out[CH4_COL] = out["dri_network_gas_production"] * CH4_KG_PER_T

    # Contrôle de non-négativité
    if (out[[H2_COL, CH4_COL]] < -1e-9).any(axis=None):
        raise ValueError("Demande H2/CH4 négative détectée (problème upstream).")

    return out

In [6]:
import numpy as np
import pandas as pd


def linear_ramp(years: np.ndarray, start: int, end: int) -> np.ndarray:
    """
    Construit une rampe temporelle (fonction d'adoption) entre 0 et 1.

    Objectif
    --------
    Cette fonction sert à modéliser une pénétration progressive d'une technologie
    (ex : part DRI dans la production d'acier) ou l'évolution d'un paramètre
    (ex : taux de capture CCS) sur une période.

    Définition
    ----------
    - Pour years <= start : la rampe vaut 0.0 (pas d'adoption)
    - Pour years >= end   : la rampe vaut 1.0 (adoption complète)
    - Entre start et end  : interpolation linéaire continue de 0 à 1

    Paramètres
    ----------
    years : np.ndarray
        Vecteur d'années (typiquement un range annuel: [2019, ..., 2050]).
        On attend un tableau numérique (int/float).
    start : int
        Année de début de rampe (inclus).
    end : int
        Année de fin de rampe (inclus au sens "à partir de end => 1").

    Retour
    ------
    np.ndarray
        Tableau de même taille que 'years', avec des valeurs dans [0, 1].

    Cas limites / robustesse
    ------------------------
    - Si end <= start : on évite une division par zéro (ou une rampe inversée).
      On choisit une rampe "step" :
        * 0 avant start
        * 1 à partir de start
      Cela permet de représenter un basculement instantané.
    """
    if end <= start:
        # Step function : adoption instantanée à partir de start
        return (years >= start).astype(float)

    # Interpolation linéaire : x = (t - start) / (end - start)
    x = (years - start) / (end - start)

    # Clamp pour garantir [0, 1] même si years sort de l'intervalle
    return np.clip(x, 0.0, 1.0)


def apply_cagr(value_base: float, years: np.ndarray, cagr: float, base_year: int) -> np.ndarray:
    """
    Projette une grandeur en appliquant un taux de croissance annuel composé (CAGR).

    Objectif
    --------
    Produire une trajectoire de production totale d'acier (steel_production)
    à partir d'une valeur de référence (ex : 2019) et d'un CAGR supposé constant.

    Formule
    -------
    value(t) = value_base * (1 + cagr)^(t - base_year)

    Paramètres
    ----------
    value_base : float
        Valeur au year = base_year (ex : production acier en 2019).
    years : np.ndarray
        Tableau d'années pour lesquelles on veut une valeur.
    cagr : float
        Taux de croissance annuel (ex : 0.01 pour +1%/an).
        Peut être négatif (décroissance).
    base_year : int
        Année de référence associée à value_base.

    Retour
    ------
    np.ndarray
        Tableau des valeurs projetées, de même taille que 'years'.

    Remarques
    ---------
    - Si cagr = 0.0 : trajectoire constante (= value_base).
    - Le modèle suppose un CAGR constant : c'est volontairement simple
      (et transparent) pour scénarisation initiale.
    """
    dt = years - base_year  # écart temporel (en années)
    return value_base * np.power(1.0 + cagr, dt)


def assert_non_negative(df: pd.DataFrame, cols: list[str]) -> None:
    """
    Vérifie qu'un ensemble de colonnes ne contient pas de valeurs négatives.

    Objectif
    --------
    Dans ce notebook, les colonnes représentent des productions (volumes).
    Elles ne doivent pas être négatives. On tolère une très faible négativité
    due aux arrondis flottants (ex : -1e-12).

    Paramètres
    ----------
    df : pd.DataFrame
        Table contenant les trajectoires.
    cols : list[str]
        Colonnes à tester (ex : routes de production).

    Comportement
    ------------
    - Si une valeur < -1e-9 existe : on lève une erreur.
    - Sinon : silence (assertion passée).

    Tolérance
    ---------
    -1e-9 est une tolérance arbitraire mais raisonnable pour des volumes.
    Elle évite de déclencher une erreur sur du bruit numérique.
    """
    if (df[cols] < -1e-9).any(axis=None):
        # On fournit les minima par colonne pour debug rapide
        raise ValueError("Valeurs négatives détectées:\n" + str(df[cols].min()))


def assert_route_sum(df: pd.DataFrame) -> None:
    """
    Vérifie la cohérence comptable : steel_production = somme des routes.

    Objectif
    --------
    Dans votre schéma, la production totale d'acier (steel_production) doit être
    exactement répartie entre trois routes:
      - dri_network_gas_production (DRI avec CH4 / gaz réseau)
      - dri_hydrogen_production    (DRI avec H2)
      - blastfurnace_bof_production (BF-BOF, éventuellement avec CCUS etc.)

    Test
    ----
    On calcule :
      s = dri_network_gas + dri_hydrogen + blastfurnace_bof
    puis on vérifie que l'écart max |steel_production - s| reste très faible.

    Tolérance
    ---------
    err <= 1e-6
    Cette tolérance tient compte de la précision flottante
    (et évite les faux positifs dus aux arrondis).

    Remarque importante
    -------------------
    Cette assertion garantit une "masse" conservée :
    aucune production n'est perdue ni créée entre routes.
    """
    s = (
        df["dri_network_gas_production"]
        + df["dri_hydrogen_production"]
        + df["blastfurnace_bof_production"]
    )

    err = (df["steel_production"] - s).abs().max()
    if err > 1e-6:
        raise ValueError(
            "Incohérence steel_production vs somme routes "
            f"(max abs err={err})."
        )


def check_coverage(df: pd.DataFrame, y0: int, y1: int) -> None:
    """
    Vérifie la couverture temporelle complète et sans trous pour chaque série.

    Objectif
    --------
    On veut un continuum annuel strict entre y0 et y1 (inclus),
    pour chaque combinaison (country, scenario).

    Ce contrôle est crucial parce que :
    - POMMES attend généralement des séries cohérentes sur tout l'horizon,
    - des années manquantes peuvent produire des bugs silencieux
      (joins, interpolations implicites, etc.).

    Paramètres
    ----------
    df : pd.DataFrame
        Données au format long, avec colonnes:
          - country
          - scenario
          - year
    y0 : int
        Année de début (incluse).
    y1 : int
        Année de fin (incluse).

    Test
    ----
    Pour chaque (country, scenario) :
      - année min == y0
      - année max == y1
      - nombre d'années == (y1 - y0 + 1)
    Cela garantit l'absence de trous (pas seulement la min/max).

    Remarque
    --------
    Si vous avez des données trimestrielles ou mensuelles un jour,
    ce contrôle devra être adapté (fréquence différente).
    """
    for (c, sc), g in df.groupby(["country", "scenario"]):
        years = np.sort(g["year"].unique())

        expected_n = (y1 - y0 + 1)
        if years[0] != y0 or years[-1] != y1 or len(years) != expected_n:
            raise ValueError(
                f"Couverture années KO pour {c}/{sc}: "
                f"{years[0]}..{years[-1]} n={len(years)} (attendu {expected_n})."
            )
def anchored_dri_total_on_primary(
    primary_total: np.ndarray,
    dri_total_2019_primary: float,
    dri_share_target: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
    """
    - cible : dri_total_target = primary_total * dri_share_target
    - ancrage : dri_total = max(dri_total_2019_primary, dri_total_target)
    - BF = primary_total - dri_total
    """
    dri_total_target = primary_total * dri_share_target
    dri_total = np.maximum(dri_total_2019_primary, dri_total_target)
    bf_total = np.clip(primary_total - dri_total, 0.0, None)
    return dri_total, bf_total

In [7]:
df_in = pd.read_csv(INPUT_CSV)

# normalisation minimale
df_in["country"] = df_in["country"].astype(str).str.strip().str.upper()
df_in["scenario"] = df_in["scenario"].astype(str).str.strip()

BASE_YEAR = int(CFG["input"]["base_year"])
BASE_SCENARIOS = list(CFG["input"]["base_scenarios"])

# --- sélection des pays ---
if CFG["input"]["countries"] == "ALL":
    COUNTRIES = sorted(
        df_in.loc[df_in["year"] == BASE_YEAR, "country"].unique().tolist()
    )
else:
    COUNTRIES = [c.upper() for c in CFG["input"]["countries"]]

print(f"{len(COUNTRIES)} pays détectés à l'année {BASE_YEAR}")
print(COUNTRIES)

print("Scénarios TYNDP disponibles :", sorted(df_in["scenario"].unique()))
print("Scénarios TYNDP utilisés :", BASE_SCENARIOS)

# --- sélection base ---
df_base = df_in.loc[
    (df_in["country"].isin(COUNTRIES)) &
    (df_in["scenario"].isin(BASE_SCENARIOS)) &
    (df_in["year"] == BASE_YEAR)
].copy()

if df_base.empty:
    raise ValueError(
        "Aucune ligne trouvée pour la combinaison "
        f"(year={BASE_YEAR}, scenarios={BASE_SCENARIOS})."
    )

# une ligne par (country, scenario)
df_base = (
    df_base.sort_values(["country", "scenario"])
           .groupby(["country", "scenario"], as_index=False)
           .head(1)
)

df_base.head()

27 pays détectés à l'année 2019
['AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK']
Scénarios TYNDP disponibles : ['DE', 'GA']
Scénarios TYNDP utilisés : ['GA']


Unnamed: 0,country,scenario,year,steel_production,dri_network_gas_production,dri_hydrogen_production,blastfurnace_bof_production
4,AT,GA,2019,7592214.0,0.0,0.0,6863361.456
12,BE,GA,2019,7759543.0,0.0,0.0,5237691.525
20,BG,GA,2019,565900.0,0.0,0.0,0.0
28,CY,GA,2019,0.0,0.0,0.0,0.0
36,CZ,GA,2019,4436832.0,0.0,0.0,4201679.904


In [8]:
base_rows = {
    (r["country"], r["scenario"]): r
    for _, r in df_base.iterrows()
}

# --- EAF (scrap) : défini comme résiduel en année de base ---
# Hypothèse utilisateur : EAF_2019 = steel_production_2019 - blastfurnace_bof_production_2019
# => on le conserve a minima constant jusqu'en 2050.

eaf_2019 = {}
for key, row in base_rows.items():  # key = (country, base_scenario)
    steel0 = float(row["steel_production"])
    bf0 = float(row["blastfurnace_bof_production"])
    eaf0 = steel0 - bf0

    if eaf0 < -1e-6:
        raise ValueError(f"EAF_2019 négatif pour {key}: steel({steel0}) - bf({bf0}) = {eaf0}")

    eaf_2019[key] = max(eaf0, 0.0)  # clip de sécurité

# --- EAF-scrap (acier secondaire) : résiduel en année de base ---
# Convention (data-driven) :
#   eaf_scrap_2019 = steel_2019 - (BF_2019 + DRIgas_2019 + DRIh2_2019)
# Puis convergence linéaire de la part EAF vers 50% en 2050
# (sans diminuer un pays déjà >50% en base).

eaf_scrap_2019 = {}
eaf_scrap_share_2019 = {}

for key, row in base_rows.items():  # key = (country, base_scenario)
    steel0 = float(row["steel_production"])
    bf0 = float(row["blastfurnace_bof_production"])
    dri0 = float(row["dri_network_gas_production"]) + float(row["dri_hydrogen_production"])
    eaf0 = steel0 - (bf0 + dri0)

    if eaf0 < -1e-6:
        raise ValueError(
            f"EAF-scrap_2019 négatif pour {key}: steel({steel0}) - (bf({bf0}) + dri({dri0})) = {eaf0}"
        )

    eaf0 = max(eaf0, 0.0)
    eaf_scrap_2019[key] = eaf0
    eaf_scrap_share_2019[key] = (eaf0 / steel0) if steel0 > 0 else 0.0


def build_eaf_scrap_series(
    key: tuple[str, str],
    years: np.ndarray,
    steel_total: np.ndarray
) -> np.ndarray:
    """
    EAF-scrap(t) = share(t) * steel_total(t)
    share(t) évolue linéairement vers max(share_2019, target_2050).
    """
    if not REC_ENABLED:
        return np.full_like(years, eaf_scrap_2019[key], dtype=float)

    s0 = float(eaf_scrap_share_2019[key])
    sT = max(s0, REC_TARGET_2050)  # pas de baisse forcée
    r = linear_ramp(years, REC_RAMP_START, REC_RAMP_END)

    share_t = s0 + r * (sT - s0)
    share_t = np.clip(share_t, 0.0, 1.0)

    eaf = steel_total * share_t
    eaf = np.clip(eaf, 0.0, steel_total)

    return eaf

total_steel = {}
for (country, sc_ty), row in base_rows.items():
    base = float(row["steel_production"])
    cagr = float(CFG.get("steel_cagr", {}).get(country, 0.0))
    total_steel[(country, sc_ty)] = apply_cagr(
        base, YEARS, cagr=cagr, base_year=BASE_YEAR
    )

In [9]:
def make_effective_dri_mix(
    dri_mix: pd.DataFrame,
    dri_total: np.ndarray,
    steel_total: np.ndarray
) -> pd.DataFrame:
    """
    Mix DRI "effectif" :
    - si dri_total == 0 : shares CH4/H2 -> NaN
    - ajoute dri_total, steel_production, dri_share_of_steel
    - calcule dri_share_of_steel sans warnings (np.divide with where)
    """
    out = dri_mix.copy()
    out["dri_total"] = dri_total
    out["steel_production"] = steel_total

    # Calcul robuste : évite le warning "invalid value encountered in divide"
    dri_share = np.full_like(steel_total, np.nan, dtype=float)
    np.divide(
        dri_total,
        steel_total,
        out=dri_share,
        where=(steel_total > 0)
    )
    out["dri_share_of_steel"] = dri_share

    # Shares CH4/H2 n'ont de sens que si DRI > 0
    mask = dri_total > 0
    out["ch4_share"] = np.where(mask, out["ch4_share"].to_numpy(), np.nan)
    out["h2_share"]  = np.where(mask, out["h2_share"].to_numpy(), np.nan)

    return out

def split_total_into_eaf_and_primary(total: np.ndarray, eaf: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    """
    Décompose steel_total en :
      - eaf (scrap) : fixé (ou trajectoire dédiée)
      - primary_total = steel_total - eaf : masse à répartir entre BF-BOF et DRI

    On impose primary_total >= 0.
    """
    primary = total - eaf
    if (primary < -1e-6).any():
        raise ValueError("EAF dépasse steel_production sur certaines années (primary < 0).")
    return eaf, np.clip(primary, 0.0, None)

def build_dri_mix(years: np.ndarray, ramp_start: int, ch4_only_years: int, h2_ramp_years: int) -> pd.DataFrame:
    h2_start = ramp_start + int(ch4_only_years)
    h2_end = h2_start + int(h2_ramp_years)

    h2_share = linear_ramp(years, h2_start, h2_end)
    ch4_share = 1.0 - h2_share

    return pd.DataFrame({"year": years, "ch4_share": ch4_share, "h2_share": h2_share})


def build_bf_fuel_mix(
    years: np.ndarray,
    ramp_start: int,
    ramp_end: int,
    biomass_max: float,
    h2_max: float,
    ccs_capture_rate_start: float,
    ccs_capture_rate_end: float,
    ccs_ramp_start: int,
    ccs_ramp_end: int,
) -> pd.DataFrame:
    r = linear_ramp(years, ramp_start, ramp_end)

    biomass_share = r * float(biomass_max)
    h2_share = r * float(h2_max)

    coal_share = 1.0 - biomass_share - h2_share
    coal_share = np.clip(coal_share, 0.0, 1.0)

    ccs_r = linear_ramp(years, ccs_ramp_start, ccs_ramp_end)
    ccs_capture_rate = float(ccs_capture_rate_start) + ccs_r * (float(ccs_capture_rate_end) - float(ccs_capture_rate_start))

    return pd.DataFrame({
        "year": years,
        "coal_share": coal_share,
        "biomass_share": biomass_share,
        "h2_share": h2_share,
        "ccs_capture_rate": ccs_capture_rate
    })

In [10]:
def build_full_dri_eaf(country: str, base_scenario: str, base_row: pd.Series,
                       years: np.ndarray, total: np.ndarray, p: dict):

    key = (country, base_scenario)
    eaf = build_eaf_scrap_series(key, years, steel_total=total)
    _, primary_total = split_total_into_eaf_and_primary(total, eaf)
    # Base 2019 : on garde le DRI existant, mais sur la partie "primary"
    dri_total_2019_primary = float(base_row["dri_network_gas_production"]) + float(base_row["dri_hydrogen_production"])

    dri_r = linear_ramp(years, int(p["dri_ramp_start"]), int(p["dri_ramp_end"]))
    dri_share_target = dri_r  # 100% du primary en 2050

    dri_total, bf_total = anchored_dri_total_on_primary(primary_total, dri_total_2019_primary, dri_share_target)

    dri_mix = build_dri_mix(
    years,
    ramp_start=int(p["dri_ramp_start"]),
    ch4_only_years=int(p["dri_ch4_only_years"]),
    h2_ramp_years=int(p["dri_h2_ramp_years"]),
    )
    dri_mix = make_effective_dri_mix(dri_mix, dri_total=dri_total, steel_total=total)
    dri_h2 = dri_total * dri_mix["h2_share"].to_numpy()
    dri_ch4 = dri_total * dri_mix["ch4_share"].to_numpy()

    df = pd.DataFrame({
        "country": country,
        "scenario": f"{base_scenario}|full-dri-eaf",
        "year": years,
        "steel_production": total,
        "eaf_production": eaf,
        "dri_network_gas_production": dri_ch4,
        "dri_hydrogen_production": dri_h2,
        "blastfurnace_bof_production": bf_total,
    })
    return df, dri_mix, None


def build_import_dri(country: str, base_scenario: str, base_row: pd.Series,
                     years: np.ndarray, total: np.ndarray, p: dict):

    key = (country, base_scenario)
    eaf = build_eaf_scrap_series(key, years, steel_total=total)
    _, primary_total = split_total_into_eaf_and_primary(total, eaf)
    dri_gas_2019 = float(base_row["dri_network_gas_production"])
    dri_h2_2019 = float(base_row["dri_hydrogen_production"])
    dri_total_2019_primary = dri_gas_2019 + dri_h2_2019

    dri_r = linear_ramp(years, int(p["dri_ramp_start"]), int(p["dri_ramp_end"]))
    dri_share_target = dri_r  # 100% du primary en 2050

    dri_total, bf_total = anchored_dri_total_on_primary(primary_total, dri_total_2019_primary, dri_share_target)

    dri_increment = np.clip(dri_total - dri_total_2019_primary, 0.0, None)

    # L'incrément est importé, donc non-H2 (proxy gaz)
    dri_h2 = np.full_like(primary_total, dri_h2_2019)
    dri_ch4 = np.full_like(primary_total, dri_gas_2019) + dri_increment

    eff_total = np.clip(dri_ch4 + dri_h2, 1e-12, None)

    dri_mix = pd.DataFrame({
        "year": years,
        "ch4_share": dri_ch4 / eff_total,
        "h2_share": dri_h2 / eff_total,
    })

    dri_mix = make_effective_dri_mix(
        dri_mix,
        dri_total=(dri_ch4 + dri_h2),
        steel_total=total
    )

    df = pd.DataFrame({
        "country": country,
        "scenario": f"{base_scenario}|import-dri",
        "year": years,
        "steel_production": total,
        "eaf_production": eaf,
        "dri_network_gas_production": dri_ch4,
        "dri_hydrogen_production": dri_h2,
        "blastfurnace_bof_production": bf_total,
    })
    return df, dri_mix, None

def build_intermediary(country: str, base_scenario: str, base_row: pd.Series,
                       years: np.ndarray, total: np.ndarray, p: dict):

    key = (country, base_scenario)
    eaf = build_eaf_scrap_series(key, years, steel_total=total)
    _, primary_total = split_total_into_eaf_and_primary(total, eaf)
    dri_total_2019_primary = float(base_row["dri_network_gas_production"]) + float(base_row["dri_hydrogen_production"])

    dri_r = linear_ramp(years, int(p["dri_ramp_start"]), int(p["dri_ramp_end"]))
    dri_share_target = dri_r * float(p["dri_max_share_2050"])

    dri_total, bf_total = anchored_dri_total_on_primary(primary_total, dri_total_2019_primary, dri_share_target)

    dri_mix = build_dri_mix(
    years,
    ramp_start=int(p["dri_ramp_start"]),
    ch4_only_years=int(p["dri_ch4_only_years"]),
    h2_ramp_years=int(p["dri_h2_ramp_years"]),
)
    dri_mix = make_effective_dri_mix(dri_mix, dri_total=dri_total, steel_total=total)
    dri_h2 = dri_total * dri_mix["h2_share"].to_numpy()
    dri_ch4 = dri_total * dri_mix["ch4_share"].to_numpy()

    df = pd.DataFrame({
        "country": country,
        "scenario": f"{base_scenario}|intermerdiary",
        "year": years,
        "steel_production": total,
        "eaf_production": eaf,
        "dri_network_gas_production": dri_ch4,
        "dri_hydrogen_production": dri_h2,
        "blastfurnace_bof_production": bf_total,
    })
    return df, dri_mix, None


def build_bf_bof_ccus(country: str, base_scenario: str, base_row: pd.Series,
                      years: np.ndarray, total: np.ndarray, p: dict):

    key = (country, base_scenario)
    eaf = build_eaf_scrap_series(key, years, steel_total=total)
    _, primary_total = split_total_into_eaf_and_primary(total, eaf)
    dri_total_2019_primary = float(base_row["dri_network_gas_production"]) + float(base_row["dri_hydrogen_production"])

    dri_r = linear_ramp(years, int(p["dri_ramp_start"]), int(p["dri_ramp_end"]))
    dri_share_target = dri_r * float(p["dri_max_share_2050"])

    dri_total, bf_total = anchored_dri_total_on_primary(primary_total, dri_total_2019_primary, dri_share_target)

    dri_mix = build_dri_mix(
    years,
    ramp_start=int(p["dri_ramp_start"]),
    ch4_only_years=int(p["dri_ch4_only_years"]),
    h2_ramp_years=int(p["dri_h2_ramp_years"]),
    )
    dri_mix = make_effective_dri_mix(dri_mix, dri_total=dri_total, steel_total=total)
    dri_h2 = dri_total * dri_mix["h2_share"].to_numpy()
    dri_ch4 = dri_total * dri_mix["ch4_share"].to_numpy()

    bf_mix = build_bf_fuel_mix(
        years=years,
        ramp_start=int(p["bf_fuel_ramp_start"]),
        ramp_end=int(p["bf_fuel_ramp_end"]),
        biomass_max=float(p["bf_biomass_max_share"]),
        h2_max=float(p["bf_h2_injection_max_share"]),
        ccs_capture_rate_start=float(p["ccs_capture_rate_start"]),
        ccs_capture_rate_end=float(p["ccs_capture_rate_end"]),
        ccs_ramp_start=int(p["ccs_ramp_start"]),
        ccs_ramp_end=int(p["ccs_ramp_end"]),
    )

    df = pd.DataFrame({
        "country": country,
        "scenario": f"{base_scenario}|full-bf-bof-ccus",
        "year": years,
        "steel_production": total,
        "eaf_production": eaf,
        "dri_network_gas_production": dri_ch4,
        "dri_hydrogen_production": dri_h2,
        "blastfurnace_bof_production": bf_total,
    })
    return df, dri_mix, bf_mix


BUILDERS = {
    "full-dri-eaf": build_full_dri_eaf,
    "full-bf-bof-ccus": build_bf_bof_ccus,
    "import-dri": build_import_dri,
    "intermerdiary": build_intermediary,
}

In [11]:
def write_outputs(scenario_name: str, df_main: pd.DataFrame, dri_mix: pd.DataFrame, bf_mix: pd.DataFrame | None):
    out_dir = SCENARIO_DIRS[scenario_name]
    out_dir.mkdir(parents=True, exist_ok=True)

    # principal (format attendu par POMMES / votre schéma)
    df_main.to_csv(out_dir / "iron_and_steel.csv", index=False)

    # annexes : traçabilité des ratios
    dri_mix.to_csv(out_dir / "dri_mix.csv", index=False)
    if bf_mix is not None:
        bf_mix.to_csv(out_dir / "bf_fuel_mix.csv", index=False)


all_outputs = {}

for sc_name, builder in BUILDERS.items():
    p = SC_PARAMS[sc_name]

    df_list = []
    dri_list = []
    bf_list = []

    for (country, base_scenario), total in total_steel.items():
        base_row = base_rows[(country, base_scenario)]
        df_sc, dri_mix, bf_mix = builder(country, base_scenario, base_row, YEARS, total, p)

        # contrôles
        assert_non_negative(df_sc, [
            "steel_production",
            "eaf_production",
            "dri_network_gas_production",
            "dri_hydrogen_production",
            "blastfurnace_bof_production",
        ])

        # nouvelle identité de masse : steel = eaf + dri_gas + dri_h2 + bf
        s = (
            df_sc["eaf_production"]
            + df_sc["dri_network_gas_production"]
            + df_sc["dri_hydrogen_production"]
            + df_sc["blastfurnace_bof_production"]
        )
        err = (df_sc["steel_production"] - s).abs().max()
        if err > 1e-6:
            raise ValueError(f"Incohérence masse (steel vs somme routes incl. EAF) pour {country}/{base_scenario}/{sc_name}: {err}")

        check_coverage(df_sc, YEAR_START, YEAR_END)

        # mix DRI annoté
        dri_mix2 = dri_mix.copy()
        dri_mix2.insert(0, "country", country)
        dri_mix2.insert(1, "base_scenario", base_scenario)
        dri_mix2.insert(2, "tech_scenario", sc_name)

        df_list.append(df_sc)
        dri_list.append(dri_mix2)

        if bf_mix is not None:
            bf_mix2 = bf_mix.copy()
            bf_mix2.insert(0, "country", country)
            bf_mix2.insert(1, "base_scenario", base_scenario)
            bf_mix2.insert(2, "tech_scenario", sc_name)
            bf_list.append(bf_mix2)

    df_main = pd.concat(df_list, ignore_index=True)

# ajout des demandes H2 / CH4 pilotées par YAML
    df_main = add_fuel_demands(df_main)

    dri_out = pd.concat(dri_list, ignore_index=True)
    bf_out = pd.concat(bf_list, ignore_index=True) if bf_list else None
    write_outputs(sc_name, df_main, dri_out, bf_out)
    all_outputs[sc_name] = (df_main, dri_out, bf_out)

# on écrit aussi une copie de la config utilisée (traçabilité)
OUT_BASE.mkdir(parents=True, exist_ok=True)
with (OUT_BASE / "config_effective.yaml").open("w", encoding="utf-8") as f:
    yaml.safe_dump(CFG, f, sort_keys=False, allow_unicode=True)

print("OK: scénarios générés dans", OUT_BASE)

OK: scénarios générés dans /Users/simonbrigode/Desktop/tp_pommes_kraft/data_country/industry/pommes-h2-network/data-pommes/scenarios-steel


In [49]:
all_outputs["full-dri-eaf"][0].head(12)

Unnamed: 0,country,scenario,year,steel_production,dri_network_gas_production,dri_hydrogen_production,blastfurnace_bof_production
0,AT,GA|full-dri-eaf,2019,7592214.0,0.0,0.0,7592214.0
1,AT,GA|full-dri-eaf,2020,7592214.0,0.0,0.0,7592214.0
2,AT,GA|full-dri-eaf,2021,7592214.0,0.0,0.0,7592214.0
3,AT,GA|full-dri-eaf,2022,7592214.0,0.0,0.0,7592214.0
4,AT,GA|full-dri-eaf,2023,7592214.0,0.0,0.0,7592214.0
5,AT,GA|full-dri-eaf,2024,7592214.0,0.0,0.0,7592214.0
6,AT,GA|full-dri-eaf,2025,7592214.0,0.0,0.0,7592214.0
7,AT,GA|full-dri-eaf,2026,7592214.0,303688.56,0.0,7288525.44
8,AT,GA|full-dri-eaf,2027,7592214.0,607377.12,0.0,6984836.88
9,AT,GA|full-dri-eaf,2028,7592214.0,911065.68,0.0,6681148.32


In [50]:
all_outputs["full-bf-bof-ccus"][2].head(12)  # bf_fuel_mix.csv (si présent)

Unnamed: 0,country,scenario,year,coal_share,biomass_share,h2_share,ccs_capture_rate
0,AT,full-bf-bof-ccus,2019,1.0,0.0,0.0,0.0
1,AT,full-bf-bof-ccus,2020,1.0,0.0,0.0,0.0
2,AT,full-bf-bof-ccus,2021,1.0,0.0,0.0,0.0
3,AT,full-bf-bof-ccus,2022,1.0,0.0,0.0,0.0
4,AT,full-bf-bof-ccus,2023,1.0,0.0,0.0,0.0
5,AT,full-bf-bof-ccus,2024,1.0,0.0,0.0,0.0
6,AT,full-bf-bof-ccus,2025,1.0,0.0,0.0,0.0
7,AT,full-bf-bof-ccus,2026,1.0,0.0,0.0,0.0
8,AT,full-bf-bof-ccus,2027,1.0,0.0,0.0,0.0
9,AT,full-bf-bof-ccus,2028,1.0,0.0,0.0,0.0
