## imports

In [1]:
import os
import sys
sys.path.append("../")

import math

import pandas as pd
import numpy as np
import datetime as dt
import re

import pymc3 as pm

import altair as alt
alt.data_transformers.disable_max_rows()

from src.preprocess.preprocess import load_data, split_last
from src.plot.altair import plot_decompose, plot_all
from src.forecast.forecast import determine_estimates_minmax, forecast

In [2]:
boxid = [
    "ESD.000088-1",
    "063.623-1",
    "VRY.CHOPS-1",
    "HVT.111153-1",
    "TTR.251049-1",
    "BGL.CROLA-1",
][3]

## load data

In [3]:
boxid = "063.691-1"

In [4]:
%%time
df_data, df_meta = load_data(boxid=boxid)
plot_all(df_data=df_data)

[Fri, 19 Nov 2021 10:58:13] INFO [snowflake.py.read_meta] reading meta preprocess
[Fri, 19 Nov 2021 10:58:13] INFO [snowflake.py.read_meta] for boxids: 063.691-1
[Fri, 19 Nov 2021 10:58:17] INFO [snowflake.py.read_week_extremes] reading extremes table
[Fri, 19 Nov 2021 10:58:21] INFO [preprocess.py.too_short] checking number of preprocess points (<=104)


INFO:SPARK:checking number of preprocess points (<=104)


[Fri, 19 Nov 2021 10:58:21] INFO [preprocess.py.too_small] checking absolute values (<0.5)


INFO:SPARK:checking absolute values (<0.5)


[Fri, 19 Nov 2021 10:58:21] INFO [preprocess.py.remove_leading_idling] removing leading low values (<0.01)


INFO:SPARK:removing leading low values (<0.01)


[Fri, 19 Nov 2021 10:58:21] INFO [preprocess.py.too_short] checking number of preprocess points (<=104)


INFO:SPARK:checking number of preprocess points (<=104)


CPU times: user 796 ms, sys: 125 ms, total: 921 ms
Wall time: 7.4 s


## split in train + test

In [5]:
df_train, df_test = split_last(df_data.copy())

## estimate parameters + forecast

In [6]:
%%time
df_estimates = determine_estimates_minmax(df_train)
df_total=pd.concat([df_data, df_estimates], axis=0)

[Fri, 19 Nov 2021 10:58:21] INFO [forecast.py.determine_estimates_minmax] forecast for weekly min


INFO:SPARK:forecast for weekly min


[Fri, 19 Nov 2021 10:58:21] INFO [forecast.py.determine_estimates] add forecast horizon


INFO:SPARK:add forecast horizon


[Fri, 19 Nov 2021 10:58:21] INFO [forecast.py.determine_estimates] scale data


INFO:SPARK:scale data


[Fri, 19 Nov 2021 10:58:21] INFO [forecast.py.determine_estimates] setup model


INFO:SPARK:setup model


[Fri, 19 Nov 2021 10:58:21] INFO [model.py.create_model] creating PYMC3 model


INFO:SPARK:creating PYMC3 model


[Fri, 19 Nov 2021 10:58:21] INFO [model.py.create_model] polynomial order = 2 for drift/trend


INFO:SPARK:polynomial order = 2 for drift/trend


[Fri, 19 Nov 2021 10:58:21] INFO [model.py.create_model] fourier order = 5 for seasonality


INFO:SPARK:fourier order = 5 for seasonality
  x = np.power(t[:, None], p)
  x = x * t[:, None]


[Fri, 19 Nov 2021 10:58:51] INFO [forecast.py.determine_estimates] tune and sample model


INFO:SPARK:tune and sample model
  trace = pm.sample(
Auto-assigning NUTS sampler...
INFO:pymc3:Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
INFO:pymc3:Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [Σ_missing, σ_ε, β_yearly, β_drift]
INFO:pymc3:NUTS: [Σ_missing, σ_ε, β_yearly, β_drift]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 73 seconds.
INFO:pymc3:Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 73 seconds.
There were 2 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc3:There were 2 divergences after tuning. Increase `target_accept` or reparameterize.
There were 99 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc3:There were 99 divergences after tuning. Increase `target_accept` or reparameterize.
There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc3:There were 6 divergences after tuning. Increase `target_accept` or reparameterize.
The number of effective samples is smaller than 25% for some parameters.
INFO:pymc3:The number of effective samples is smaller than 25% for some parameters.


[Fri, 19 Nov 2021 11:00:20] INFO [forecast.py.determine_estimates] scale ouput data back


INFO:SPARK:scale ouput data back


[Fri, 19 Nov 2021 11:00:20] INFO [forecast.py.determine_estimates] format output data


INFO:SPARK:format output data


[Fri, 19 Nov 2021 11:00:20] INFO [format.py.format_model_estimates] calculating bands for quantiles: [1, 5, 50, 95, 99]


INFO:SPARK:calculating bands for quantiles: [1, 5, 50, 95, 99]


[Fri, 19 Nov 2021 11:00:20] INFO [format.py.format_model_estimates] calculating bands for variable: drift


INFO:SPARK:calculating bands for variable: drift


[Fri, 19 Nov 2021 11:00:20] INFO [format.py.format_model_estimates] calculating bands for variable: yearly


INFO:SPARK:calculating bands for variable: yearly


[Fri, 19 Nov 2021 11:00:20] INFO [format.py.format_model_estimates] calculating bands for variable: Σ


INFO:SPARK:calculating bands for variable: Σ


[Fri, 19 Nov 2021 11:00:20] INFO [forecast.py.determine_estimates_minmax] forecast for weekly max


INFO:SPARK:forecast for weekly max


[Fri, 19 Nov 2021 11:00:20] INFO [forecast.py.determine_estimates] add forecast horizon


INFO:SPARK:add forecast horizon


[Fri, 19 Nov 2021 11:00:20] INFO [forecast.py.determine_estimates] scale data


INFO:SPARK:scale data


[Fri, 19 Nov 2021 11:00:20] INFO [forecast.py.determine_estimates] setup model


INFO:SPARK:setup model


[Fri, 19 Nov 2021 11:00:20] INFO [model.py.create_model] creating PYMC3 model


INFO:SPARK:creating PYMC3 model


[Fri, 19 Nov 2021 11:00:20] INFO [model.py.create_model] polynomial order = 2 for drift/trend


INFO:SPARK:polynomial order = 2 for drift/trend


[Fri, 19 Nov 2021 11:00:20] INFO [model.py.create_model] fourier order = 5 for seasonality


INFO:SPARK:fourier order = 5 for seasonality
  x = np.power(t[:, None], p)
  x = x * t[:, None]


[Fri, 19 Nov 2021 11:00:21] INFO [forecast.py.determine_estimates] tune and sample model


INFO:SPARK:tune and sample model
  trace = pm.sample(
Auto-assigning NUTS sampler...
INFO:pymc3:Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
INFO:pymc3:Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [Σ_missing, σ_ε, β_yearly, β_drift]
INFO:pymc3:NUTS: [Σ_missing, σ_ε, β_yearly, β_drift]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 62 seconds.
INFO:pymc3:Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 62 seconds.
There were 114 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc3:There were 114 divergences after tuning. Increase `target_accept` or reparameterize.
There were 19 divergences after tuning. Increase `target_accept` or reparameterize.
ERROR:pymc3:There were 19 divergences after tuning. Increase `target_accept` or reparameterize.
The number of effective samples is smaller than 25% for some parameters.
INFO:pymc3:The number of effective samples is smaller than 25% for some parameters.


[Fri, 19 Nov 2021 11:01:27] INFO [forecast.py.determine_estimates] scale ouput data back


INFO:SPARK:scale ouput data back


[Fri, 19 Nov 2021 11:01:27] INFO [forecast.py.determine_estimates] format output data


INFO:SPARK:format output data


[Fri, 19 Nov 2021 11:01:27] INFO [format.py.format_model_estimates] calculating bands for quantiles: [1, 5, 50, 95, 99]


INFO:SPARK:calculating bands for quantiles: [1, 5, 50, 95, 99]


[Fri, 19 Nov 2021 11:01:27] INFO [format.py.format_model_estimates] calculating bands for variable: drift


INFO:SPARK:calculating bands for variable: drift


[Fri, 19 Nov 2021 11:01:27] INFO [format.py.format_model_estimates] calculating bands for variable: yearly


INFO:SPARK:calculating bands for variable: yearly


[Fri, 19 Nov 2021 11:01:27] INFO [format.py.format_model_estimates] calculating bands for variable: Σ


INFO:SPARK:calculating bands for variable: Σ


CPU times: user 20.1 s, sys: 16.7 s, total: 36.8 s
Wall time: 3min 6s


## plot results

In [10]:
plot_all(df_data=df_total, df_meta=df_meta).properties(width=800)

In [9]:
plot_decompose(df=df_total.query("extreme == 'min'"))

SchemaValidationError: Invalid specification

        altair.vegalite.v4.api.VConcatChart, validating 'additionalProperties'

        Additional properties are not allowed ('width' was unexpected)
        

alt.VConcatChart(...)

In [None]:
df_total