# Maybe final form for deriving uw fields

In [48]:
# ===== Konfiguration ===========================================================
from pathlib import Path
import json
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Optional

# Pfade anpassen:
GRAPH_PATH = Path(r"C:\Users\M97947\OneDrive - E.ON\Dokumente\Thesis\Code\fca_leistungsbaender\exploratory\graph\graph_with_jubo_e01.json")
CSV_DIR    = Path(r"C:\Users\M97947\OneDrive - E.ON\Dokumente\Thesis\Code\fca_leistungsbaender\exploratory\handling_graph\out\nodes")

# Spaltennamen / Physik
FEATURE_P     = "P_Datapoint_ID"
FEATURE_Q     = "Q_Datapoint_ID"
WEATHER_COLS  = ["Windgeschw_Datapoint", "Globale_Strahlung_Datapoint", "Aussentemp_Datapoint"]
COS_PHI       = 0.95     # fixer Leistungsfaktor
GRID_FREQ     = "15min"  # fixes Raster
TIMEZONE      = None     # z.B. "Europe/Berlin", ansonsten None (tz-naiv)
FORCE_REBUILD = False    # True = CSVs der derived Nodes komplett neu aufbauen
VERBOSE       = True

def log(msg: str):
    if VERBOSE:
        print(msg)

# ===== Helpers: Zeitindex normalisieren & Duplikate zusammenfassen ============
def _standardize_index(df: pd.DataFrame) -> pd.DataFrame:
    """Index TZ-bereinigen, auf 15-min runden, Duplikate pro Timestamp konsolidieren (letzter gültiger Wert)."""
    if df.empty:
        return df
    idx = pd.to_datetime(df.index)

    # TZ vereinheitlichen
    if getattr(idx, "tz", None) is not None:
        if TIMEZONE:
            idx = idx.tz_convert(TIMEZONE)
        idx = idx.tz_localize(None)

    # Auf Raster runden
    if GRID_FREQ:
        idx = idx.round(GRID_FREQ)

    df = df.copy()
    df.index = idx

    # Duplikate je Timestamp: letzter gültiger Wert pro Spalte
    def _combine(col):
        col = col.dropna()
        return col.iloc[-1] if len(col) else np.nan

    df = df.groupby(df.index).agg(_combine)
    return df.sort_index()

def _dedupe_merge(old: Optional[pd.DataFrame], new: pd.DataFrame) -> pd.DataFrame:
    """Alte + neue Daten zusammenführen und standardisieren."""
    if old is None or old.empty:
        return _standardize_index(new)
    out = pd.concat([old, new]).sort_index()
    return _standardize_index(out)

# ===== Graph laden & derived-Spezifikationen ==================================
def read_graph(path: Path) -> list[dict]:
    elems = json.loads(path.read_text(encoding="utf-8"))
    if not isinstance(elems, list):
        raise ValueError("graph.json muss eine Liste sein.")
    return elems

def split_nodes_edges(elements: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
    nodes = [e for e in elements if "id" in e.get("data", {}) and "source" not in e.get("data", {})]
    edges = [e for e in elements if "source" in e.get("data", {}) and "target" in e.get("data", {})]
    return nodes, edges

elements = read_graph(GRAPH_PATH)
nodes, edges = split_nodes_edges(elements)
nodes_by_id: Dict[str, dict] = {n["data"]["id"]: n for n in nodes}

def ntype(nid: str) -> str:
    return (nodes_by_id.get(nid, {}).get("data", {}).get("type") or "").strip()

def feats(nid: str) -> dict:
    return (nodes_by_id.get(nid, {}).get("data", {}).get("features") or {}) or {}

def collect_derived_specs() -> Dict[str, dict]:
    """
    Sucht in features.derived: method=field_sum, terms=[{"node": "...", "coeff": ...}, ...], bias(optional).
    Nur für type=uw_field.
    """
    specs = {}
    for nid, node in nodes_by_id.items():
        if ntype(nid) != "uw_field":
            continue
        d = feats(nid).get("derived")
        if not d or d.get("method") != "field_sum":
            continue
        terms = []
        for t in (d.get("terms") or []):
            if isinstance(t, dict) and "node" in t:
                terms.append((t["node"], float(t.get("coeff", 1.0))))
            elif isinstance(t, str):
                terms.append((t, 1.0))
        if not terms:
            continue
        specs[nid] = {"terms": terms, "bias": float(d.get("bias", 0.0) or 0.0)}
    return specs

derived_specs = collect_derived_specs()
if not derived_specs:
    log("Keine derived UW-Felder gefunden (features.derived).")

# ===== CSV I/O pro Node ========================================================
_csv_cache: Dict[str, pd.DataFrame] = {}

def load_node_csv(node_id: str) -> Optional[pd.DataFrame]:
    """CSV laden, Index standardisieren (15min, Dedupe). Erwartet Spalte 'timestamp'."""
    if node_id in _csv_cache:
        return _csv_cache[node_id]
    path = CSV_DIR / f"{node_id}.csv"
    if not path.exists():
        return None
    df = pd.read_csv(path)
    if "timestamp" not in df.columns:
        cand = next((c for c in df.columns if "time" in c.lower() or "ts" in c.lower()), None)
        if cand is None:
            raise ValueError(f"{path} hat keine 'timestamp'-Spalte.")
        df = df.rename(columns={cand: "timestamp"})
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
    df = df.set_index("timestamp").sort_index()
    df = _standardize_index(df)
    _csv_cache[node_id] = df
    return df

def save_node_csv(node_id: str, df: pd.DataFrame):
    path = CSV_DIR / f"{node_id}.csv"
    df.to_csv(path, index_label="timestamp")

# ===== Wetterquelle bestimmen ==================================================
def pick_weather_source(target_id: str) -> Optional[str]:
    spec = derived_specs[target_id]
    for src_id, _ in spec["terms"]:
        df = load_node_csv(src_id)
        if df is not None and all(col in df.columns for col in WEATHER_COLS):
            return src_id
    return spec["terms"][0][0]  # Fallback

# ===== Kern: CSV für derived UW-Feld erzeugen/aktualisieren ====================
def update_or_build_derived_csv(target_id: str, force_rebuild: bool = False):
    """
    Erstellt/aktualisiert die CSV-Datei für ein derived UW-Feld.
      - Annahme: alle CSVs sind exakt auf 15-min gerastert.
      - P = gewichtete Summe der Quellen + Bias
      - Q = P * tan(arccos(cosφ))
      - Wetter = 1:1 von geeigneter Quelle
      - Inkrementell: nur neue Timestamps anhängen, wenn force_rebuild=False
    """
    spec = derived_specs[target_id]
    terms = spec["terms"]
    bias  = float(spec.get("bias", 0.0))

    # Referenzquelle = erste Quelle
    ref_id = terms[0][0]
    ref_df = load_node_csv(ref_id)
    if ref_df is None or ref_df.empty:
        log(f"⚠️ {target_id}: Referenzquelle {ref_id} nicht gefunden oder leer.")
        return

    # Ziel-CSV (Altbestand)
    tgt_old = load_node_csv(target_id)
    existed = tgt_old is not None and not tgt_old.empty and not force_rebuild
    last_ts = tgt_old.index.max() if existed else None

    # Zeitindex (15-min) bestimmen
    idx = ref_df.index
    if existed:
        idx = idx[idx > last_ts]
        if idx.empty:
            log(f"= {target_id}: bereits aktuell ({len(tgt_old)} Zeilen).")
            return

    # P aus Quellen bilden (gleicher Index)
    df_P = pd.DataFrame(index=idx)
    for src_id, coeff in terms:
        src_df = load_node_csv(src_id)
        if src_df is None or FEATURE_P not in src_df.columns:
            log(f"⚠️ {target_id}: Quelle {src_id} ohne {FEATURE_P} – setze NaN.")
            df_P[src_id] = np.nan
        else:
            # direkter Zugriff: gleiche 15-min Timestamps
            df_P[src_id] = src_df.loc[idx, FEATURE_P].astype(float) * coeff

    df_P[FEATURE_P] = df_P.sum(axis=1) + bias

    # Q aus P
    phi = np.arccos(COS_PHI)
    tan_phi = np.tan(phi)
    df_P[FEATURE_Q] = df_P[FEATURE_P] * tan_phi

    # Wetterspalten übernehmen (1:1)
    wsrc = pick_weather_source(target_id)
    wdf  = load_node_csv(wsrc)
    for col in WEATHER_COLS:
        if wdf is not None and col in wdf.columns:
            df_P[col] = wdf.loc[idx, col]
        else:
            df_P[col] = np.nan

    # Statische Felder aus Features (optional mitführen)
    f = feats(target_id)
    if "Strom_Limit_in_A" in f:
        df_P["Strom_Limit_in_A"] = f["Strom_Limit_in_A"]
    if "DAB_ID" in f:
        df_P["DAB_ID"] = f["DAB_ID"]

    # Standardisieren (Dedupe/Raster – idempotent)
    df_P = _standardize_index(df_P)

    # Anhängen oder neu schreiben
    if existed:
        out_df = _dedupe_merge(tgt_old, df_P)
        save_node_csv(target_id, out_df)
        log(f"✅ {target_id}: angehängt → +{len(df_P)} / gesamt {len(out_df)} Zeilen.")
    else:
        save_node_csv(target_id, df_P)
        log(f"✅ {target_id}: neu erstellt → {len(df_P)} Zeilen.")

# ===== Ausführen: alle derived UW-Felder aktualisieren =========================
if not derived_specs:
    log("Nichts zu tun (keine derived-Nodes gefunden).")
else:
    for target in sorted(derived_specs.keys()):
        update_or_build_derived_csv(target, force_rebuild=FORCE_REBUILD)
    log("Fertig.")


✅ JUBO_E01: neu erstellt → 1344 Zeilen.
Fertig.
