In [None]:
import os
import sys
sys.path.append("../")

import math

import pandas as pd
import numpy as np
import datetime as dt
import re

import pymc3 as pm

import altair as alt
alt.data_transformers.disable_max_rows()

from src.preprocess.preprocess import load_data, split_last
from src.plot.altair import plot_total
from src.plot.formatting import dummy_forecast
from src.utils.preprocess import MinMaxScaler
from src.model.model import det_dot, drift_model, seasonality_model

In [None]:
boxid = [
    "ESD.000088-1",
    "063.623-1",
    "VRY.CHOPS-1",
    "HVT.111153-1",
    "TTR.251049-1",
    "BGL.CROLA-1",
][0]

In [None]:
df_data, df_meta = load_data(boxid=boxid)
df_train, df_test = split_last(df_data)

In [None]:
# value_vars = ["max", "min"]
# df_data = df_data.melt(
#     id_vars=df_data.columns.difference(value_vars),
#     value_vars=value_vars,
#     var_name="extreme",
#     value_name="value",
# ).assign(period="history", model_var="observed")
# df_data

In [None]:
plot_total(df_data=df_data, df_meta=df_meta)

In [None]:
df_data

In [None]:
df_train, df_test = split_last(df_data)

In [None]:
# select one side
extreme = "max"
df_observed = df_train.query(f"extreme=='{extreme}' & model_var=='observed'")

# scale t, y 
t_scaler = MinMaxScaler(lower=0)
t = t_scaler.fit_transform(X=df_observed["date"])

y_scaler = MinMaxScaler(lower=0)
y_observed = y_scaler.fit_transform(X=df_observed["value"])

p = t_scaler.transform(t_scaler.min + dt.timedelta(weeks=52.1775))

In [None]:
with pm.Model() as m:
    
    drift = drift_model(t, n=2)
    yearly = seasonality_model(t, p=p)
    
    σ_ε = pm.Uniform('σ_ε', lower=0, upper=1)
    Σ = pm.Normal("Σ", mu = drift + yearly, sd=σ_ε, observed=y_observed)
    
pm.model_to_graphviz(m)

In [None]:
with m:
    trace = pm.sample(draws=500, tune=500, init="adapt_diag")

In [None]:
with m:
    ppc = pm.sample_posterior_predictive(trace, samples=1000, var_names=["drift", "yearly", "Σ"])
# pm.sample_posterior_predictive(trace=model=m, var_names=["drift"])

In [None]:
# samples = ppc["Σ"]
#  df_forecast[]

def format_trace_samples(t, samples, quantiles=[5, 15, 50, 85, 95]):
    # get quantiles
    q_data = np.quantile(samples, [q / 100 for q in quantiles], axis=0)
    boundaries = ["upper", "lower"]
    df_Q = pd.DataFrame(columns=boundaries)
    
    # create bands from two quantile boundaries (median: upper=lower)
    for ci in range(math.ceil(len(quantiles) / 2)):
        upper_i, lower_i = -ci - 1, ci
        band_range = f"Q{quantiles[lower_i]}-Q{quantiles[upper_i]}".replace(
            "Q50-Q50", "median"
        )
        df_interval = (
            pd.DataFrame(data=q_data[[upper_i, lower_i]].T, columns=boundaries)
            .assign(date=t, band=band_range)
        )
        df_Q = pd.concat([df_Q, df_interval], axis=0)

    # in long format
    df_Q = df_Q.melt(
                id_vars=df_interval.columns.difference(boundaries),
                value_vars=boundaries,
                var_name="boundary",
                value_name="value",
            )
    
    return df_Q

In [None]:
def format_ppc(date, ppc):
    df_fit = pd.DataFrame()
    for var, data in ppc.items():
        samples = y_scaler.inverse_transform(data)
        if var=="yearly":
            samples -= y_scaler.min
        df_var = format_trace_samples(t=date, samples=samples).assign(
            period="history", extreme=extreme,
        ).assign(model_var = var)
        df_fit = pd.concat([df_fit, df_var], axis=0)
    return df_fit

In [None]:
df_estimates = format_ppc(df_observed["date"], ppc)

In [None]:
# plot_total(df_data=pd.concat([df_observed, df_estimates]))
df_data = pd.concat([df_observed, df_estimates])


total_plot = plot_total(
    df_data=df_data.query("model_var in ('observed', 'Σ')"), df_meta=df_meta
).properties(title="history & forecast")
drift_plot = plot_total(df_data.query("model_var=='drift'")).properties(
    title="drift / trend", height=100
)
yearly_plot = plot_total(df_data.query("model_var=='yearly'")).properties(
    title="yearly pattern", height=100
)

In [None]:
selection = alt.selection_interval(bind='scales', encodings=['x'])
(total_plot & drift_plot & yearly_plot)#.add_selection(selection)

In [None]:
# def format_trace_samples(t, samples, quantiles=[5, 15, 50, 85, 95]):

#     q_data = np.quantile(samples, [q / 100 for q in quantiles], axis=0)
#     boundaries = ["upper", "lower"]
#     df_Q = pd.DataFrame(columns=boundaries)
#     for ci in range(math.ceil(len(quantiles) / 2)):
#         upper_i, lower_i = -ci - 1, ci
#         forecast_range = f"Q{quantiles[lower_i]}-Q{quantiles[upper_i]}".replace("Q50-Q50", "median")
#         df_interval = pd.DataFrame(
#             data=q_data[[upper_i, lower_i]].T, columns=boundaries
#         ).assign(date=t, forecast=forecast_range)

#         df_Q = pd.concat([df_Q, df_interval], axis=0)

#     return df_Q

In [None]:
df_forecast = format_trace_samples(t, ppc["Σ"]).assign(extreme=extreme)
df_forecast["lower"] = y_scaler.inverse_transform(df_forecast["lower"])
df_forecast["upper"] = y_scaler.inverse_transform(df_forecast["upper"])
df_forecast["date"] = t_scaler.inverse_transform(df_forecast["date"])
total = plot_total(df_data=df_data, df_forecast=df_forecast, df_meta=None)
total

In [None]:
df_forecast = format_trace_samples(t, ppc["drift"]).assign(extreme=extreme)
df_forecast["lower"] = y_scaler.inverse_transform(df_forecast["lower"])
df_forecast["upper"] = y_scaler.inverse_transform(df_forecast["upper"])
df_forecast["date"] = t_scaler.inverse_transform(df_forecast["date"])
drift = plot_total(df_forecast=df_forecast).properties(width=800, height = 100)
drift

In [None]:
df_forecast = format_trace_samples(t, ppc["yearly"]).assign(extreme=extreme)
df_forecast["lower"] = y_scaler.inverse_transform(df_forecast["lower"])
df_forecast["upper"] = y_scaler.inverse_transform(df_forecast["upper"])
df_forecast["date"] = t_scaler.inverse_transform(df_forecast["date"])
plot_total(df_forecast=df_forecast).properties(width=800, height=100)

## forecast

In [None]:
# value_vars = ["max", "min"]
# df_data.melt(
#     id_vars=df_data.columns.difference(value_vars),
#     value_vars=value_vars,
#     var_name="history",
#     value_name="extreme",
# )
value_vars = ["max", "min"]
df_data = df_data.melt(
    id_vars=df_data.columns.difference(value_vars),
    value_vars=value_vars,
    var_name="extreme",
    value_name="value",
).assign(period="history", model_var="observed")

In [None]:
df_data

In [None]:
def extrapolate_data(df, horizon=dt.timedelta(weeks=26)):
    t_start = df["date"].max()
    t_end = t_start + horizon + dt.timedelta(weeks=1)
    t_extra = np.arange(t_start, t_end, dt.timedelta(weeks=1))
    df_extra = pd.DataFrame(data=t_extra, columns=["date"]).assign(
        boxid=df["boxid"][0], l=df["l"][0], extreme=df["extreme"][0], period="forecast"
    )
    df_extra[["year", "week"]] = df_extra["date"].dt.isocalendar().iloc[:, :-1]

    return df_extra

In [None]:
df_forecast = extrapolate_data(df_data)

In [None]:
df_forecast

In [None]:
# def plot_history(df):
#     """
#     Plot the historic load of a transformer.

#     Parameters
#     ----------
#     df : pd.DataFrame
#         DataFrame with the columns: date, week, year, power, history(min, max)

#     Returns
#     -------
#         Altair chart
#     """
#     alt_history = (
#         alt.Chart(df.query("model_var == 'observed'"))
#         .mark_point(color="black")
#         .encode(
#             x=alt.X("date:T", title="date"),
#             y=alt.Y("value:Q", title="power [kW]"),
#             shape=alt.Shape(
#                 "extreme:N", title="history",
#                 scale=alt.Scale(
#                     domain=["max", "min"],
#                     range=["triangle-up", "triangle-down"],
#                 ),
#             ),
#             tooltip=[
#                 alt.Tooltip("power:Q", format=".2f"),
#                 alt.Tooltip("extreme:N"),
#                 alt.Tooltip("year:Q"),
#                 alt.Tooltip("week:Q"),
#             ],
#         )
#         .properties(width=800)
#     ).interactive()
#     return alt_history

In [None]:
# plot_history(df=df_plot)

In [None]:
df_forecast = format_ppc(df_data["date"], ppc)
df_forecast

In [None]:
df_estimate = df_forecast.query("model_var == 'Σ'")
df_estimate

In [None]:
def plot_estimates(df):
    """
    Plot the load forecast transformer.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with the columns date, lower, upper, forecast(Q10-Q190, median), extreme

    Returns
    -------
        Altair chart
    """
    # to solve: using longformat for y and y2, till then reformat boundaries
    df = (
        df.pivot(
            index=df.columns.difference(["boundary", "value"]),
            columns="boundary",
            values="value",
        )
        .reset_index()
        .rename_axis(columns=None)
    )

    # get ranges and coloring correct
    ranges = list(df["band"].unique())
    ranges.sort(key=lambda item: (len(item), item))
    if "median" in ranges:
        ranges.remove("median")
        ranges.append("median")
    ranges = ranges[::-1]
    parsed = [re.match("^Q(\d{1,2})-", s) for s in ranges]
    factors = [1 if res is None else (2 * int(res.groups()[0]) / 100) for res in parsed]
    colors = [lightness_scale(f) for f in factors]

    return (
        alt.Chart(df)
        .mark_area(line=True)
        .encode(
            x=alt.X("date:T"),
            y=alt.Y("lower:Q", stack=None, title=""),
            y2=alt.Y2("upper:Q", title=""),
            color=alt.Color(
                "band:N",
                title="estimate",
                scale=alt.Scale(domain=ranges, range=colors),
            ),
            detail="extreme:N",
        )
        .properties(width=800)
        .interactive()
    )

In [None]:
plot_estimates(df_estimate)

In [None]:
df_data

In [None]:
plot_history(pd.concat([df_data, df_fit], axis=0))

In [None]:
t_fc = t_scaler.transform(df_forecast["date"])
t_hist = t_scaler.transform(df_data["date"])

In [None]:
# # :

#     q_data = np.quantile(samples, [q / 100 for q in quantiles], axis=0)
#     boundaries = ["upper", "lower"]
#     df_Q = pd.DataFrame(columns=boundaries)
#     for ci in range(math.ceil(len(quantiles) / 2)):
#         upper_i, lower_i = -ci - 1, ci
#         forecast_range = f"Q{quantiles[lower_i]}-Q{quantiles[upper_i]}".replace("Q50-Q50", "median")
#         df_interval = pd.DataFrame(
#             data=q_data[[upper_i, lower_i]].T, columns=boundaries
#         ).assign(date=t, forecast=forecast_range)

#         df_Q = pd.concat([df_Q, df_interval], axis=0)

#     return df_Q

In [None]:
# def extrapolate_time(t = df["date"], horizon=dt.timedelta(weeks=26)):
#     t_start = t.max()
#     t_end = t_start + horizon + dt.timedelta(weeks=1)
#     t_forecast = np.arange(t_start, t_end, dt.timedelta(weeks=1))
# #     df_forecast = pd.DataFrame(data=data, columns=["date"])
# #     df_forecast[["year", "week"]] = df_forecast["date"].dt.isocalendar().iloc[:,:-1]
#     return t

In [None]:
df_t_forecast = extrapolate_time(df_data)
df_t_forecast["date"]