In [None]:
import pandas as pd 
from pvlib.location import Location
from functools import lru_cache
from datetime import date
from backend_leistung import get_day_and_update
import polars as pl
from calendar import monthrange
import numpy as np
from itertools import combinations
import re




PATH_META = "allgemein.csv"
PATH_ERTRAG = "app/data/ertrag.parquet"
PATH_DELTA = "./delta-table/"
CACHE_SIZE=32

class Standort:
    def __init__(self,standort:str,):
        self.standort=standort
        self.meta = self.__get_meta_data()
        return None
    
    def __get_meta_data(self):
        allgemein = pd.read_csv(PATH_META)
        meta = allgemein.loc[allgemein.id==self.standort].iloc[0].to_dict()
        return meta
        
    @lru_cache(maxsize=CACHE_SIZE)
    def calculate_sunrise_times(self, datum: date):
        """Berechne Sonnenaufgang und -untergang."""
        location = Location(self.meta["lat"], self.meta["lon"], tz=self.meta["tz"], altitude=self.meta["alt"])
        times_for_sun = pd.DatetimeIndex([datum + pd.Timedelta(hours=24)], tz=self.meta["tz"])
        sun_df = location.get_sun_rise_set_transit(times_for_sun, method='spa')
        display(sun_df)
        return sun_df['sunrise'].iloc[0], sun_df['sunset'].iloc[0]
    
    ##############################################################################################################
    ## Leistungs-Daten:
    @lru_cache(maxsize=CACHE_SIZE)
    def load_total_power_of_day(self, datum: date,ttl_hash=None) -> pd.DataFrame:
        return (
            get_day_and_update(self.standort,datum)
            .filter(
                (pl.col("string") == -1) &
                (pl.col("sensor") == "P")
            )
            .group_by("Datetime")
            .agg(pl.col("value").sum().alias("P_gesamt"))
            .sort("Datetime")
            .collect(engine="streaming")
            .to_pandas()
        )
    
    @lru_cache(maxsize=CACHE_SIZE)
    def load_wr_power_of_day(self, datum: date,ttl_hash=None) -> pd.DataFrame:
        df_polars = get_day_and_update(self.standort,datum)
        return (
            df_polars
            .filter(
                (pl.col("string") == -1) &
                (pl.col("sensor") == "P")
            )
            #.pivot(on="wr",on_columns=df_polars.select("wr").unique().sort(by="wr").collect(),index="Datetime",values="value")
            .sort("Datetime")
            .collect(engine="streaming")
            .to_pandas()
        )
        
    @lru_cache(maxsize=CACHE_SIZE)
    def load_string_power_of_day(self, datum: date,ttl_hash=None) -> pd.DataFrame:
        df_polars = get_day_and_update(self.standort,datum)
        return (
            df_polars
            .filter(
                (pl.col("string") != -1) &
                (pl.col("sensor") == "P")
            )
            .sort("Datetime")
            .collect(engine="streaming")
            .to_pandas()
        )

    ##############################################################################################################
    ## Ertrags-Daten:
    
    @lru_cache
    def load_daily_yield_this_month(self):
        data_polars = pl.scan_parquet(PATH_ERTRAG)

        heute = date.today()
        akt_jahr = heute.year
        akt_monat = heute.month
        # Anzahl Tage im aktuellen Monat
        days_in_month = monthrange(akt_jahr, akt_monat)[1]
        
        df_monat = (
            data_polars
            .filter(
                (pl.col("standort").str.to_lowercase() == self.standort) &
                (pl.col("date").dt.year() == akt_jahr) &
                (pl.col("date").dt.month() == akt_monat)
            )
            .group_by(["standort", "date"])
            .agg(
                pl.col("value").sum().alias("value_sum")/1000
            )
            .sort("date")   # optional: nach Datum sortieren
            .collect(engine="streaming")
        )

        numpy_array =  df_monat.to_pandas()["value_sum"].values
        # Array auffüllen bis zur Länge des Monats
        if len(numpy_array) < days_in_month:
            fill_length = days_in_month - len(numpy_array)
            numpy_array = np.pad(numpy_array, (0, fill_length), 'constant', constant_values=0)

        # Als Liste zurückgeben
        return np.round(numpy_array,1)
    
    @lru_cache
    def load_monthly_yield_this_year(self):
        data_polars = pl.scan_parquet(PATH_ERTRAG)

        heute = date.today()
        akt_jahr = heute.year
        
        # Daten für das aktuelle Jahr filtern
        df_jahr = (
            data_polars
            .filter(
                (pl.col("standort").str.to_lowercase() == self.standort.lower()) &
                (pl.col("date").dt.year() == akt_jahr)
            )
            .with_columns([
                pl.col("date").dt.month().alias("month")  # Monat extrahieren
            ])
            .group_by("month")
            .agg(
                (pl.col("value").sum()/1000).alias("value_sum")  # kWh
            )
            .sort("month")
            .collect(engine="streaming")
        )

        # In ein dict für schnelles Auffüllen
        month_dict = {m: v for m, v in zip(df_jahr["month"], df_jahr["value_sum"])}
        
        # Array für 12 Monate erstellen
        ertrag_liste = [month_dict.get(i, 0) for i in range(1, 13)]
        
        # Runde auf 1 Nachkommastelle und als numpy Array zurückgeben
        return np.round(ertrag_liste, 1)
    
    @lru_cache
    def load_total_yield(self):
        data_polars = pl.scan_parquet(PATH_ERTRAG)

        # Filter nur nach Standort
        df = (
            data_polars
            .filter(pl.col("standort").str.to_lowercase() == self.standort.lower())
            .select(
                (pl.col("value").sum() / 1000).alias("total_sum")  # Summe in kWh
            )
            .collect(engine="streaming")
        )

        total = df["total_sum"][0] if len(df) > 0 else 0
        return int(round(total, 0))  # gerundet als Integer
    
    @lru_cache
    def load_daily_yield_last_year(self):
        data_polars = pd.scan_parquet(PATH_ERTRAG)
        
            .assign(date=lambda df: pd.to_datetime(df["date"]).dt.date)
            .loc[lambda df: df["standort"] == selected_standort]
        )
    ##############################################################################################################
    ## Fehler-analyse:
    @lru_cache
    def calculate_error_statistics(self) -> pl.DataFrame:
        dl = pl.scan_delta(PATH_DELTA)
        filtered = dl.filter(
            (pl.col("standort") == self.standort)
            & (pl.col("string") == -1)
            & (pl.col("sensor") == "P")
        )

        unique_wr = (
            filtered.select(pl.col("wr")).unique().collect().to_series().to_list()
        )

        pivot_like = (
            filtered.group_by(pl.col("Datetime"))
            .agg(
                [
                    pl.col("value").filter(pl.col("wr") == v).max().alias(str(v))
                    for v in unique_wr
                ]
            )
            .sort("Datetime")
            .with_columns(pl.col("Datetime").dt.date().alias("date"))
        )

        columns = [str(v) for v in unique_wr]

        # Korrelationen zwischen allen Spalten
        corr_exprs = [
            pl.corr(pl.col(c1), pl.col(c2)).alias(f"corr_{c1}_{c2}")
            for c1, c2 in combinations(columns, 2)
        ]

        # Zero-Count nur, wenn andere Spalten ungleich 0 sind
        zero_count_exprs = []
        for c in columns:
            other_cols = [pl.col(col) for col in columns if col != c]
            zero_count_exprs.append(
                ((pl.col(c) == 0) & (pl.sum_horizontal(other_cols) != 0)).sum().alias(f"{c}_zero_count")
            )

        # total_count nur für Zeilen, bei denen mindestens ein Wert != 0
        total_count_expr = (pl.any_horizontal([pl.col(c) != 0 for c in columns])).sum().alias("total_count")

        # Gruppieren nach Datum: Korrelationen + Zero-Counts + total_count
        date_groups = (
            pivot_like.group_by("date")
            .agg(
                corr_exprs
                + zero_count_exprs
                + [total_count_expr]
            )
        )

        # Zero-Fraction berechnen
        for c in columns:
            date_groups = date_groups.with_columns(
                (1-pl.col(f"{c}_zero_count") / pl.col("total_count")).fill_nan(0).alias(f"{c}_availability")
            )

        # total_count skalieren (0 bis Maximum)
        date_groups = date_groups.with_columns(
            (pl.col("total_count") / pl.col("total_count").max()).fill_nan(0).alias("total_availability")
        )

        # Alle Spaltennamen für spätere Verarbeitung
        cols = date_groups.collect_schema().names()

        # Matching-Spalten für Fehlerberechnung
        new_exprs = []
        for t in unique_wr:
            # nur Korrelationen, die die Nummer t enthalten
            matching = [c for c in cols if c.startswith("corr_") and re.search(rf"_{t}(_|$)", c)]
            if matching:
                new_exprs.append((t, matching))
        error_cols = [f"{t}_correlation" for t, _ in new_exprs]

        # Finale Berechnungen: Fehler-Median und Mittelwert
        final = (
            date_groups.with_columns(
                [
                    pl.max_horizontal([pl.col(c).fill_nan(0) for c in matching]).alias(
                        f"{t}_correlation"
                    )
                    for t, matching in new_exprs
                ]
            )
            .with_columns(
                pl.mean_horizontal([pl.col(c) for c in error_cols]).alias("mean_correlation")
            )
            .collect()
        )

        return final


In [66]:
from ui_tagesleistung import create_pv_plot
DH = Standort("muensingen")
dt = dt_aware.date()
create_pv_plot(DH.load_total_power_of_day(dt),DH.load_wr_power_of_day(dt),*DH.calculate_sunrise_times(dt)).show()

Unnamed: 0,sunrise,sunset,transit
2025-12-13 00:00:00+01:00,2025-12-12 08:04:31.734929664+01:00,2025-12-12 16:27:04.649338880+01:00,2025-12-12 12:15:52.624835840+01:00



Discarding nonzero nanoseconds in conversion.



In [73]:
from calendar_plot import plot_calendar_heatmap
Yellows = [[0.0, 'rgb(255, 250, 220)'],[1.0, 'rgb(255, 180, 0)']]

fig_heatmap = plot_calendar_heatmap(DH.load_electric_yield_this_year(), 
                                    date_col='date', 
                                    value_col='value', 
                                    formatting_colorscale=Yellows, 
                                    formatting_locale='de_DE', 
                                    formatting_scale=30,
                                    grid_width=4,
                                    formatting_value_formatter= lambda value: round(value/1_000),
                                    highlight_date=date.today())
    

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [74]:
DH.load_electric_yield_this_year()

array([ 718. , 2388.5, 5798.3, 8633.6, 9223.2, 9656.5, 8375.4, 8205.2,
       5358.9, 3810.7, 1925.3,  687.7])

In [46]:
DH.calculate_sunrise_times(dt)

(Timestamp('2025-12-11 08:03:36.665531136+0100', tz='Europe/Berlin'),
 Timestamp('2025-12-11 16:27:02.982362112+0100', tz='Europe/Berlin'))