In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

DEFAULT_COLS = [
    "electricity_pricing",
    "electricity_pricing_predicted_1",
    "electricity_pricing_predicted_2",
    "electricity_pricing_predicted_3",
]

def smard_to_citylearn(
    smard_in,
    out_csv,
    src_col=None,
    template_csv=None,
    expected_len=None,
    columns=None
):
    smard_in = Path(smard_in)
    out_csv = Path(out_csv)

    cols_expected = None
    if template_csv is not None:
        tpl = pd.read_csv(template_csv)
        expected_len = len(tpl) if expected_len is None else expected_len
        cols_expected = list(tpl.columns)
    if columns is None and cols_expected is None:
        cols_expected = DEFAULT_COLS
    if columns is not None:
        cols_expected = list(columns)

    df_raw = pd.read_csv(smard_in, sep=';', decimal=',')

    if src_col is None:
        candidates = [c for c in df_raw.columns if '€/MWh' in c]
        if not candidates:
            raise KeyError(
                "Keine Spalte mit '€/MWh' gefunden. Übergib src_col explizit."
            )
        src_col = candidates[0]

    p_mwh = pd.to_numeric(df_raw[src_col].replace('-', np.nan), errors='coerce')
    p = p_mwh / 1000.0

    df_out = pd.DataFrame({
        "electricity_pricing": p,
    })
    for i in (1, 2, 3):
        df_out[f"electricity_pricing_predicted_{i}"] = df_out["electricity_pricing"]

    if expected_len is not None:
        if len(df_out) < expected_len:
            raise ValueError(
                f"SMARD hat {len(df_out)} Zeilen, erwartet sind {expected_len}. Zeitraum/Quelle angleichen."
            )
        df_out = df_out.iloc[:expected_len].ffill().bfill()

    if cols_expected is not None:
        if set(cols_expected) == set(DEFAULT_COLS):
            df_out = df_out[DEFAULT_COLS]
        elif set(cols_expected) == set(df_out.columns):
            df_out = df_out[cols_expected]
        else:
            if len(cols_expected) == 4:
                rename_map = dict(zip(DEFAULT_COLS, cols_expected))
                df_out = df_out.rename(columns=rename_map)[cols_expected]
            else:
                raise ValueError(
                    f"Unerwartete Spaltenvorgabe: {cols_expected}. Erwartet 4 Spalten."
                )

    df_out.to_csv(out_csv, index=False)
    print(f"Wrote {out_csv} with shape {df_out.shape} (src_col='{src_col}')")

if __name__ == "__main__":
    data_dir = Path("Bachelorthesis_DQN_Agent/data/datasets/citylearn_challenge_2023_phase_3_1")

    SMARD_IN = data_dir / "pricing_germany_2023_june_to_august_raw.csv"
    OUT_CSV  = data_dir / "pricing_germany_2023_june_to_august.csv"
    TEMPLATE = data_dir / "pricing.csv"

    smard_to_citylearn(
        smard_in=SMARD_IN,
        out_csv=OUT_CSV,
        template_csv=TEMPLATE,
        expected_len=None,
        columns=None,
        src_col="Deutschland/Luxemburg [€/MWh] Originalauflösungen"
    )

Wrote Bachelorthesis_DQN_Agent/data/datasets/citylearn_challenge_2023_phase_3_1/pricing_germany_2023_june_to_august.csv with shape (2208, 4) (src_col='Deutschland/Luxemburg [€/MWh] Originalauflösungen')
