In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from darts import TimeSeries

from aare.constants import LOC_BERN, LOC_THUN, TIME, TEMP
from aare.remote_existenz_store import RemoteExistenzStore

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
store = RemoteExistenzStore()

In [None]:
freq = "1h"
ANYTIME = "0"  # to be used as period start when querying influx. starting at 0 just returns all the data.

In [None]:
df = store.query_hydro(ANYTIME, LOC_BERN, agg_freq=freq)
o_df = df.copy()
df

In [None]:
# restore original df for iterative development, can re-run if necessary
df = o_df.copy()

In [None]:
df.isna().value_counts()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.describe().T

In [None]:
# at max one is allowed, at the end of the series
assert (df.set_index(TIME).resample(freq).count() > 1).sum().item() <= 1, "Has more than one data points within one time-step according to frequency"

In [None]:
# resample to add nan points where data is missing. also removes the trailing data point if 18:00 and 18:55 for example.
df = df.set_index(TIME).resample(freq).first().reset_index(TIME)

In [None]:
df.isna().value_counts()

In [None]:
px.line(df, TIME, TEMP)

In [None]:
df.loc[df[TEMP] <= 0, TEMP] = np.nan

In [None]:
df.shape

In [None]:
df.describe().T

In [None]:
df.isna().value_counts()

In [None]:
px.line(df, TIME, TEMP)

In [None]:
df["temp_diff"] = df[TEMP].diff().abs()

In [None]:
df.query("temp_diff > 10")

In [None]:
df = df.set_index(TIME)
# turn it into timezone-naive timestamps because that's what darts wants.
# all the data is in UTC anyway, so a conversion is necessary on display no matter what.
df.index = df.index.tz_localize(None)

In [None]:
ts = TimeSeries.from_dataframe(df, freq=freq)
ts

In [None]:
ts.gaps()