In [4]:
from pathlib import Path
import pandas as pd
import numpy as np

DATA_DIR = Path(".")
PATTERN = "*.csv"
TIME_COL = "time"          # oder "local_time"
OUTFILE = "mean_profile_8760.csv"

def load_csv(fp: Path) -> pd.DataFrame:
    df = pd.read_csv(fp, comment="#")
    if TIME_COL not in df.columns:
        raise ValueError(f"{fp.name}: Spalte '{TIME_COL}' nicht gefunden. Spalten: {list(df.columns)}")

    df[TIME_COL] = pd.to_datetime(df[TIME_COL])

    # Schalt-Tag entfernen
    is_feb29 = (df[TIME_COL].dt.month == 2) & (df[TIME_COL].dt.day == 29)
    df = df.loc[~is_feb29].copy()

    # Key: MM-DD-HH
    df["key"] = (
        df[TIME_COL].dt.month.astype(str).str.zfill(2) + "-" +
        df[TIME_COL].dt.day.astype(str).str.zfill(2) + "-" +
        df[TIME_COL].dt.hour.astype(str).str.zfill(2)
    )
    return df

def main():
    files = sorted(DATA_DIR.glob(PATTERN))
    if not files:
        raise FileNotFoundError(f"Keine CSV-Dateien in {DATA_DIR.resolve()} mit Pattern '{PATTERN}' gefunden")

    dfs = [load_csv(f) for f in files]
    big = pd.concat(dfs, ignore_index=True)

    numeric_cols = big.select_dtypes(include=[np.number]).columns.tolist()
    non_numeric_cols = [c for c in big.columns if c not in numeric_cols + ["key", TIME_COL]]

    mean_num = big.groupby("key")[numeric_cols].mean()
    first_non_num = big.groupby("key")[non_numeric_cols].first()

    mean_df = pd.concat([mean_num, first_non_num], axis=1).reset_index()

    # ✅ Dummy-Zeitachse für ein Nicht-Schaltjahr bauen (2001)
    parts = mean_df["key"].str.split("-", expand=True)
    mean_df[TIME_COL] = pd.to_datetime(
        {
            "year": 2001,
            "month": parts[0].astype(int),
            "day": parts[1].astype(int),
            "hour": parts[2].astype(int),
        }
    )

    mean_df = mean_df.sort_values(TIME_COL).drop(columns=["key"])

    # Spaltenreihenfolge: TIME_COL zuerst, Rest wie in mean_df
    cols = [TIME_COL] + [c for c in mean_df.columns if c != TIME_COL]
    mean_df = mean_df[cols]

    mean_df.to_csv(OUTFILE, index=False)
    print(f"✅ Mittleres Profil erstellt: {OUTFILE} ({len(mean_df)} Stunden)")

if __name__ == "__main__":
    main()


✅ Mittleres Profil erstellt: mean_profile_8760.csv (8760 Stunden)
