In [None]:
import os
import sys
sys.path.append("../")

import math

import pandas as pd
import numpy as np
import datetime as dt
import re

import pymc3 as pm

import altair as alt
alt.data_transformers.disable_max_rows()

from src.preprocess.preprocess import load_data
from src.plot.altair import plot_total
from src.plot.formatting import split_last, dummy_forecast
from src.utils.preprocessing import MinMaxScaler
from src.model.model import det_dot, drift_model, seasonality_model

In [None]:
boxid = [
    "ESD.000088-1",
    "063.623-1",
    "VRY.CHOPS-1",
    "HVT.111153-1",
    "TTR.251049-1",
    "BGL.CROLA-1",
][0]

In [None]:
df_data, df_meta = load_data(boxid=boxid)
df_train, df_test = split_last(df_data)

In [None]:
df_train, df_test = split_last(df_data)
df_focast = dummy_forecast(df_test)
plot_total(df_data=df_data, df_meta=df_meta, df_forecast=df_focast)

In [None]:
df_data

In [None]:

# select one side
extreme = "max"

# scale t, y 
t_scaler = MinMaxScaler(lower=0)
t = t_scaler.fit_transform(X=df_data["date"])

y_scaler = MinMaxScaler(lower=0)
y = y_scaler.fit_transform(X=df_data[extreme])





In [None]:
with pm.Model() as m:
    
    drift = drift_model(t)
    yearly = seasonality_model(t)
#     yearly=0
    
    σ_ε = pm.Uniform('σ_ε', lower=0, upper=1)
    total = pm.Deterministic("total", drift + yearly)
    
    y_obs = pm.Normal('y_obs', mu=total, sd=σ_ε, observed=y)
    
pm.model_to_graphviz(m)

In [None]:
with m:
    trace = pm.sample(500, tune=500, init="adapt_diag")

In [None]:
[t for t in trace.varnames]

In [None]:
dummy_forecast(df_data)

In [None]:
pd.DataFrame(trace["total"].mean(axis=0)).plot()

In [None]:
# Q = [5, 15, 50, 85, 95]
# q = [q/100 for q in Q]
# # q_names
# Q

In [None]:
def format_trace_samples(t, samples, quantiles=[5, 15, 50, 85, 95]):

    q_data = np.quantile(samples, [q / 100 for q in quantiles], axis=0)
    boundaries = ["upper", "lower"]
    df_Q = pd.DataFrame(columns=boundaries)
    for ci in range(math.ceil(len(quantiles) / 2)):
        upper_i, lower_i = -ci - 1, ci
        forecast_range = f"Q{Q[lower_i]}-Q{Q[upper_i]}".replace("Q50-Q50", "median")
        df_interval = pd.DataFrame(
            data=q_data[[upper_i, lower_i]].T, columns=boundaries
        ).assign(date=t_fc, forecast=forecast_range)

        df_Q = pd.concat([df_Q, df_interval], axis=0)

    return df_Q

In [None]:
df_forecast = format_trace_samples(t, trace["total"]).assign(extreme=extreme)
df_forecast["lower"] = y_scaler.inverse_transform(df_forecast["lower"])
df_forecast["upper"] = y_scaler.inverse_transform(df_forecast["upper"])
df_forecast["date"] = t_scaler.inverse_transform(df_forecast["date"])

In [None]:
plot_total(df_forecast=df_forecast)

In [None]:
df_forecast = format_trace_samples(t, trace["drift"]).assign(extreme=extreme)
df_forecast["lower"] = y_scaler.inverse_transform(df_forecast["lower"])
df_forecast["upper"] = y_scaler.inverse_transform(df_forecast["upper"])
df_forecast["date"] = t_scaler.inverse_transform(df_forecast["date"])
plot_total(df_forecast=df_forecast)

In [None]:
df_forecast = format_trace_samples(t, trace["yearly"]).assign(extreme=extreme)
df_forecast["lower"] = y_scaler.inverse_transform(df_forecast["lower"])
df_forecast["upper"] = y_scaler.inverse_transform(df_forecast["upper"])
df_forecast["date"] = t_scaler.inverse_transform(df_forecast["date"])
plot_total(df_forecast=df_forecast)