In [2]:
%matplotlib inline
import mxnet as mx
from mxnet import gluon
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

import project_path
from shorelineforecasting.utils.forecasting_metrics import evaluate 
from shorelineforecasting.utils.configs import GluonConfigs

In [3]:
tf = GluonConfigs.load_data()

In [4]:
tf = tf.set_index('transect_id')
tf = tf.dropna(thresh=33)
print(f"Transects included in dataset: {tf.shape[0]}; timesteps: {tf.shape[1]}")
tf.head()

Transects included in dataset: 37111; timesteps: 33


Unnamed: 0_level_0,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
transect_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BOX_051_151_15,677.32043,695.6637,713.29486,682.1743,688.23425,672.13007,700.43286,693.4346,699.3856,687.9795,...,700.664,712.4174,708.45123,740.3324,680.4251,758.3515,754.49695,763.04297,743.2968,779.4157
BOX_051_151_18,768.938,769.23883,762.443,755.8622,761.79663,760.5171,763.30505,761.8136,769.221,765.4706,...,763.1057,759.726,766.2315,770.7713,798.2683,797.99615,803.9095,797.9078,798.1883,803.68256
BOX_051_151_21,711.41626,684.2054,695.49817,701.6695,705.2999,703.6147,707.69403,692.9036,704.6828,704.99945,...,825.8698,820.5172,820.6868,842.18097,850.4546,775.967,883.0424,867.92426,877.37415,874.50244
BOX_051_151_30,795.81573,820.637,799.7238,799.8664,823.5626,822.073,823.568,823.733,824.584,824.3336,...,823.782,823.15674,823.6579,821.3186,818.85803,817.3122,822.9297,818.7335,822.1848,818.7348
BOX_051_151_32,242.70204,238.05159,229.93718,244.062,247.68105,257.499,302.69217,301.27722,301.12033,316.40414,...,301.1231,313.63553,313.09814,312.90347,306.18658,294.18326,297.23654,302.8865,323.8584,304.36823


In [5]:
tf = tf.sample(1000)

In [6]:
sites = pd.read_csv("/media/storage/data/shorelines/sites-gluonts-prepared-37k.csv")
sites = sites.loc[sites['transect_id'].isin(tf.index)]


In [6]:
from scipy.cluster.hierarchy import ward, fcluster
from scipy.spatial.distance import pdist

Z = ward(pdist(sites[['Intersect_lon', 'Intersect_lat']].values))
sites['fcluster'] = fcluster(Z, t=0.1, criterion='distance')

In [7]:
metadata = {
    'num_series': len(tf),
    'num_steps': len(tf.columns),
    'prediction_length': 7,
    'freq': "AS",
    'start': [pd.Timestamp("01-01-1984", freq='AS') for _ in range(len(tf))],
    'item_id': tf.index.values,
}

In [8]:
from gluonts.dataset.common import ListDataset
from gluonts.dataset.field_names import FieldName


def get_gluon_ds():

    train_ds = ListDataset(
        [
         {
             FieldName.TARGET: target,
             FieldName.START: start,
             FieldName.ITEM_ID: item_id,
             FieldName.FEAT_STATIC_CAT: [fclust, dbclust],
             FieldName.FEAT_STATIC_REAL: [fsr]
          }

         for (target, start, item_id, fclust, dbclust, fsr) in zip(tf.values[:, :-metadata['prediction_length']],
                                            metadata['start'], 
                                            metadata['item_id'],
                                            sites['fcluster'].values,
                                            sites['dbscan_cluster'].values,
                                            sites['changerate_unc'].values)
        ], freq=metadata['freq'])

    test_ds = ListDataset(
        [
         {
             FieldName.TARGET: target,
             FieldName.START: start,
             FieldName.ITEM_ID: item_id,
             FieldName.FEAT_STATIC_CAT: [fclust, dbclust],
             FieldName.FEAT_STATIC_REAL: [fsr]
          }

         for (target, start, item_id, fclust, dbclust, fsr) in zip(tf.values,
                                            metadata['start'], 
                                            metadata['item_id'],
                                            sites['fcluster'].values,
                                            sites['dbscan_cluster'].values,
                                            sites['changerate_unc'].values)
        ], freq=metadata['freq'])
          
    return train_ds, test_ds

In [9]:
train_ds, test_ds = get_gluon_ds()

train_it = iter(train_ds)
test_it = iter(test_ds)

In [10]:
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.seq2seq import MQCNNEstimator
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.model.deepstate import DeepStateEstimator

from gluonts.transform import (
    AddAgeFeature,
    AddObservedValuesIndicator,
    Chain,
    ExpectedNumInstanceSampler,
    InstanceSplitter,
    SetFieldIfNotPresent,
)

class MySimpleFeedForward(SimpleFeedForwardEstimator):

    def create_transformation(self):
        return Chain(
            [
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                ),
                AddAgeFeature(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.FEAT_AGE,
                    pred_length=self.prediction_length,
                    log_scale=True,
                ),
                InstanceSplitter(
                    target_field=FieldName.TARGET,
                    is_pad_field=FieldName.IS_PAD,
                    start_field=FieldName.START,
                    forecast_start_field=FieldName.FORECAST_START,
                    train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                    past_length=self.context_length,
                    future_length=self.prediction_length,
                    time_series_fields=[
                        FieldName.FEAT_AGE,
                        FieldName.OBSERVED_VALUES,
                    ],
                ),
            ]
        )
    
    
class MyMQCNN(MQCNNEstimator):

    def create_transformation(self):
        return Chain(
            [
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                ),
                AddAgeFeature(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.FEAT_AGE,
                    pred_length=self.prediction_length,
                    log_scale=True,
                ),
                InstanceSplitter(
                    target_field=FieldName.TARGET,
                    is_pad_field=FieldName.IS_PAD,
                    start_field=FieldName.START,
                    forecast_start_field=FieldName.FORECAST_START,
                    train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                    past_length=self.context_length,
                    future_length=self.prediction_length,
                    time_series_fields=[
                        FieldName.FEAT_AGE,
                        FieldName.OBSERVED_VALUES,
                    ],
                ),
            ]
        )

In [11]:
import pprint
from functools import partial


from gluonts.distribution.piecewise_linear import PiecewiseLinearOutput
from gluonts.evaluation import Evaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.seq2seq import MQCNNEstimator
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.model.deepstate import DeepStateEstimator
from gluonts.trainer import Trainer

from shorelineforecasting.utils.configs import get_predictor_id


batch_size = 32
epochs = 10
num_batches_per_epoch = 300
cardinality = [len(sites['fcluster'].unique()), len(sites['dbscan_cluster'].unique())]
print(cardinality)

estimators = [
#     partial(
#         SimpleFeedForwardEstimator,
#         freq=metadata["freq"],
#         trainer=Trainer(
#             epochs=epochs, num_batches_per_epoch=num_batches_per_epoch
#         ),
#     ),
#     partial(
#         MySimpleFeedForward,
#         freq=metadata["freq"],
#         trainer=Trainer(
#             epochs=epochs, num_batches_per_epoch=num_batches_per_epoch
#         ),
#     ),  
    
    
#     partial(
#         DeepAREstimator,
#         freq="12M",
#         use_feat_static_real=True,
#         use_feat_static_cat=True,
#         cardinality=cardinality,
#         time_features=[],
#         trainer=Trainer(
#             epochs=epochs,
#             num_batches_per_epoch=num_batches_per_epoch,
#             batch_size=batch_size
#         ),
#     ),
    
    
#     partial(
#         DeepAREstimator,
#         freq="12M",
#         use_feat_static_real=True,
#         use_feat_static_cat=True,
#         cardinality=cardinality,
#         distr_output=PiecewiseLinearOutput(8),
#         trainer=Trainer(
#             epochs=epochs, num_batches_per_epoch=num_batches_per_epoch
#         ),
#     ),
#     partial(
#         DeepStateEstimator,
#         freq=metadata["freq"],
#         cardinality=cardinality,
#         trainer=Trainer(
#             epochs=epochs, num_batches_per_epoch=num_batches_per_epoch
#         ),
#     ),
    partial(
        MQCNNEstimator,
        freq=metadata["freq"],
        trainer=Trainer(
            epochs=epochs, num_batches_per_epoch=num_batches_per_epoch
        ),
    ),
#     partial(
#         MyMQCNN,
#         freq=metadata["freq"],
#         trainer=Trainer(
#             epochs=epochs, num_batches_per_epoch=num_batches_per_epoch
#         ),
#     ),
]


def evaluate(estimator):
    estimator = estimator(
        prediction_length=metadata['prediction_length'],
#         context_length=2*metadata['prediction_length'],
    )

    print(f"evaluating {estimator}")

    predictor = estimator.train(train_ds)

    forecast_it, ts_it = make_evaluation_predictions(
        test_ds, predictor=predictor, num_samples=100
    )

    agg_metrics, item_metrics = Evaluator()(
        ts_it, forecast_it, num_series=len(test_ds)
    )
    
    item_metrics["prediction_length"] = metadata['prediction_length']
#     item_metrics["context_length"] = metadata['num_steps'] - metadata['prediction_length']
    item_metrics["predictor"] = type(estimator).__name__
    item_metrics["predictor_id"] = get_predictor_id()
    agg_metrics["predictor"] = type(estimator).__name__
    
    return item_metrics


metrics = []
for prediction_length in np.arange(7, 8, 1):
    metadata['prediction_length'] = prediction_length
    train_ds, test_ds = get_gluon_ds() # update according to metadata
    for estimator in estimators:
        # catch exceptions that are happening during training to avoid failing the whole evaluation
        try:
            metrics.append(evaluate(estimator))
        except Exception as e:
            print(str(e))
#     pd.concat(metrics).to_csv("/media/storage/data/shorelines/dl-metrics-9.csv", index=False, header=True)

for gr, frame in pd.concat(metrics).groupby('predictor_id'):
    print(f"Group {gr}: MAPE {frame['MAPE'].mean()}")

  0%|          | 0/300 [00:00<?, ?it/s]

[946, 931]
evaluating gluonts.model.seq2seq._mq_dnn_estimator.MQCNNEstimator(context_length=None, freq="AS", mlp_final_dim=20, mlp_hidden_dimension_seq=[], prediction_length=7, quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], trainer=gluonts.trainer._base.Trainer(batch_size=32, clip_gradient=10.0, ctx=None, epochs=10, hybridize=True, init="xavier", learning_rate=0.001, learning_rate_decay_factor=0.5, minimum_learning_rate=5e-05, num_batches_per_epoch=300, patience=10, weight_decay=1e-08))
learning rate from ``lr_scheduler`` has been overwritten by ``learning_rate`` in optimizer.


100%|██████████| 300/300 [00:07<00:00, 41.06it/s, epoch=1/10, avg_epoch_loss=13.5]
100%|██████████| 300/300 [00:07<00:00, 39.85it/s, epoch=2/10, avg_epoch_loss=1.53]
100%|██████████| 300/300 [00:07<00:00, 42.24it/s, epoch=3/10, avg_epoch_loss=1.52]
100%|██████████| 300/300 [00:07<00:00, 41.75it/s, epoch=4/10, avg_epoch_loss=1.5]
100%|██████████| 300/300 [00:07<00:00, 42.44it/s, epoch=5/10, avg_epoch_loss=1.51]
100%|██████████| 300/300 [00:07<00:00, 42.69it/s, epoch=6/10, avg_epoch_loss=1.49]
100%|██████████| 300/300 [00:07<00:00, 42.08it/s, epoch=7/10, avg_epoch_loss=1.49]
100%|██████████| 300/300 [00:07<00:00, 38.20it/s, epoch=8/10, avg_epoch_loss=1.48]
100%|██████████| 300/300 [00:07<00:00, 39.24it/s, epoch=9/10, avg_epoch_loss=1.47]
100%|██████████| 300/300 [00:08<00:00, 37.01it/s, epoch=10/10, avg_epoch_loss=1.48]
Running evaluation: 100%|██████████| 1000/1000 [00:01<00:00, 743.93it/s]


Group 157: MAPE 0.07408936366202057


In [8]:
pd.concat(metrics).groupby('predictor_id').mean()

Unnamed: 0_level_0,MSE,abs_error,abs_target_sum,abs_target_mean,seasonal_error,MASE,MAPE,sMAPE,OWA,MSIS,...,Coverage[0.5],QuantileLoss[0.6],Coverage[0.6],QuantileLoss[0.7],Coverage[0.7],QuantileLoss[0.8],Coverage[0.8],QuantileLoss[0.9],Coverage[0.9],prediction_length
predictor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
147,19001.444272,831.351187,5148.241266,735.463038,13.435316,12.828042,0.219557,0.151702,,65.170413,...,0.975895,740.274627,0.980402,626.318318,0.98389,481.371367,0.987193,298.975908,0.990727,7
148,3672.1571,198.530911,5148.241266,735.463038,13.435316,2.462395,0.071973,0.044381,,29.522021,...,0.755698,199.604428,0.806992,193.042139,0.851211,175.566079,0.888412,142.054784,0.926745,7
149,3748.577692,206.461076,5148.241266,735.463038,13.435316,2.558043,0.070498,0.045473,,28.635919,...,0.203074,214.343514,0.257013,212.508028,0.333213,203.000373,0.4301,165.960534,0.647417,7
150,,222.11176,5148.241266,735.463038,13.435316,3.17105,0.065219,0.047424,,,...,0.498062,183.263779,0.602601,149.402961,0.606986,160.980377,0.703084,140.450943,0.726662,7


In [10]:
pd.concat(metrics).groupby('predictor_id').mean()

Unnamed: 0_level_0,MSE,abs_error,abs_target_sum,abs_target_mean,seasonal_error,MASE,MAPE,sMAPE,OWA,MSIS,...,Coverage[0.5],QuantileLoss[0.6],Coverage[0.6],QuantileLoss[0.7],Coverage[0.7],QuantileLoss[0.8],Coverage[0.8],QuantileLoss[0.9],Coverage[0.9],prediction_length
predictor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
143,1871.505826,139.265537,5148.241266,735.463038,13.435316,1.77299,0.058572,0.031126,,22.283065,...,0.72782,139.38879,0.791548,133.423694,0.844836,118.476903,0.888304,90.717742,0.931045,7
144,2150.574743,145.686622,5148.241266,735.463038,13.435316,1.812748,0.049397,0.03273,,22.232659,...,0.719101,144.879474,0.77319,138.559651,0.822817,124.510421,0.867517,99.161096,0.914992,7
145,2103.366009,141.338182,5148.241266,735.463038,13.435316,1.746742,0.047908,0.031775,,22.095838,...,0.713959,141.211175,0.769121,134.827595,0.814445,120.9089,0.853624,98.519893,0.909869,7
146,,117.713779,5148.241266,735.463038,13.435316,1.412177,0.041971,0.027286,,,...,0.352333,114.11782,0.565558,111.784716,0.675868,103.464447,0.750101,83.767415,0.861866,7


In [68]:
for gr, frame in pd.concat(metrics).groupby('predictor_id'):
    print(frame['MAPE'].mean())

0.05697571455006599
0.05013267189624887
0.04456121455097609
0.06555314856955873


In [70]:
for gr, frame in pd.read_csv("/media/storage/data/shorelines/dl-metrics-1.csv").groupby('predictor_id'):
    print(frame['MAPE'].mean())

0.0570972067798468
0.045237669125545554
0.05134579319014857
0.04160705504096912


In [65]:
np.arange(7, 8, 1)

array([7])

In [25]:
metrics_1 = pd.read_csv("/media/storage/data/shorelines/dl-metrics-robustness.csv")

In [34]:
pd.concat([metrics_1, metrics_2]).to_csv("/media/storage/data/shorelines/dl-metrics-robustness.csv", index=False, header=True)

In [35]:
test = pd.read_csv("/media/storage/data/shorelines/dl-metrics-robustness.csv")

In [27]:
metrics_2 = pd.concat(metrics)

In [16]:
pd.concat(metrics).groupby('predictor_id').mean()[['MAPE','sMAPE', 'prediction_length']]

Unnamed: 0_level_0,MAPE,sMAPE,prediction_length
predictor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
121,0.105766,0.067504,5
122,0.067642,0.038131,5
123,0.060585,0.034388,5
124,0.038419,0.028056,5
125,0.222379,0.155472,9
126,0.07285,0.038663,9
127,0.064954,0.048274,9
128,1.223889,0.716871,9
129,0.811598,0.481986,13
130,0.060478,0.037674,13


In [16]:
np.arange(16, 17, 1)

array([16])

In [17]:
np.arange(5, 28, 4)

array([ 5,  9, 13, 17, 21, 25])

In [18]:
np.arange(17, 28, 4)

array([17, 21, 25])

In [None]:
from gluonts.dataset.util import to_pandas


train_entry = next(iter(train_ds))
train_entry.keys()

test_entry = next(iter(test_ds))
test_entry.keys()

test_series = to_pandas(test_entry)
train_series = to_pandas(train_entry)

fig, ax = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(10, 7))

train_series.plot(ax=ax[0])
ax[0].grid(which="both")
ax[0].legend(["train series"], loc="upper left")

test_series.plot(ax=ax[1])
ax[1].axvline(train_series.index[-1], color='r') # end of train dataset
ax[1].grid(which="both")
ax[1].legend(["test series", "end of train series"], loc="upper left")

In [None]:
item_metrics_5

In [None]:
# item_metrics_1 = pd.concat(item_metrics_list)
# item_metrics_2 = pd.concat(item_metrics_list)
item_metrics_5 = pd.concat(item_metrics_list)
# item_metrics_5.to_pickle('/media/storage/data/shorelines/dl-metrics-2.pkl')

In [None]:
for estimator_id in item_metrics_5["estimator_id"].unique():
    tmp = item_metrics_5.loc[item_metrics_5["estimator_id"]==estimator_id]
    for c in list(tmp.select_dtypes(include=[np.float])):
        print(f"{c}: {tmp[c].mean()}")
    print("-"*100)

In [None]:
pd.read_pickle("/media/storage/data/shorelines/lr-metrics.pkl")['mape'].mean()

In [None]:
for estimator_id in item_metrics_2["estimator_id"].unique():
    tmp = item_metrics_1.loc[item_metrics_1["estimator_id"]==estimator_id]
    for c in list(tmp.select_dtypes(include=[np.float])):
        print(f"{c}: {tmp[c].mean()}")
    print("-"*100)

In [None]:

for estimator_id in item_metrics_1["estimator_id"].unique():
    tmp = item_metrics_1.loc[item_metrics_1["estimator_id"]==estimator_id]
    for c in list(tmp.select_dtypes(include=[np.float])):
        print(f"{c}: {tmp[c].mean()}")
    print("-"*100)
    


In [None]:
for estimator in item_metrics["estimator"].unique():
    metrics = item_metrics.loc[item_metrics["estimator"]==estimator]
    print("-"*100)
    print(estimator)
    print("-"*100)
    for i in list(metrics.select_dtypes(include=[np.float])):
        print(f"{i}: {metrics[i].mean()}")

# item_metrics.loc[item_metrics["estimator"]==]

In [None]:
# item_metrics.to_pickle("/media/storage/data/shorelines/metrics-gluonts.pkl")

In [None]:
eval_item_dict_list[0]["item_metrics"]['MAPE'].mean()

In [None]:
for i in eval_item_dict_list:
  metrics = i["item_metrics"]
  print(metrics["MAPE"].mean())
  print(max(metrics["MAPE"]))
  print(min(metrics["MAPE"]))
  print("---"*10)

In [None]:
fig, ax = plt.subplots(figsize=(12,12))
for i in eval_item_dict_list:
  metrics = i["item_metrics"]
  metrics["MSE"].plot(kind='hist')

plt.show()



In [None]:
ff# all_metrics = pd.concat(item_metrics_list)
all_metrics['MAPE'].mean()

## Sample dataset

In [None]:
from gluonts.dataset.artificial import ConstantDataset


metadata = {'num_series': 1000, 
            'num_steps': 33, 
            'prediction_length': 7,
            'freq': '365D',   # not 1Y for DeepAR freq requirements
            'start': [pd.Timestamp("01-01-1984", freq='365D') for _ in range(len(tf))]
            }

ds_generator = ConstantDataset(num_timeseries = metadata['num_series'],
                              num_steps = metadata['num_steps'],
                              freq = metadata['freq'],
                              start = '1984-01-01 00:00:00')

constant_ds = ds_generator.generate()
train_ds = constant_ds.train
test_ds = constant_ds.test

In [None]:
# from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
# from gluonts.trainer import Trainer

# estimator = SimpleFeedForwardEstimator(
#     num_hidden_dimensions=[10],
#     prediction_length=metadata['prediction_length'],
#     context_length=metadata['prediction_length'] * 2,
#     freq=metadata['freq'],
#     trainer=Trainer(ctx="cpu", 
#                     epochs=20, 
#                     learning_rate=1e-3, 
#                     hybridize=False, 
#                     num_batches_per_epoch=100
#                    )
# )

# predictor = estimator.train(train_ds)




from gluonts.model.deepar import DeepAREstimator
from gluonts.distribution.neg_binomial import NegativeBinomialOutput
from gluonts.trainer import Trainer

estimator = DeepAREstimator(
    prediction_length=metadata['prediction_length'],
    freq="M",
    distr_output = NegativeBinomialOutput(),
    use_feat_dynamic_real=False,
    use_feat_static_cat=False,
    # cardinality=stat_cat_cardinalities,
    trainer=Trainer(
        learning_rate=1e-3,
        epochs=10,
        num_batches_per_epoch=50,
        batch_size=32
    )
)

predictor = estimator.train(train_ds)



In [None]:
# save the trained model in tmp/
from pathlib import Path
predictor.serialize(Path("/tmp/"))

In [None]:
# loads it back
from gluonts.model.predictor import Predictor
predictor_deserialized = Predictor.deserialize(Path("/tmp/"))

In [None]:
from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

forecasts = list(forecast_it)
tss = list(ts_it)

# first entry of the time series list
ts_entry = tss[0]
forecast_entry =  forecasts[0]
test_ds_entry = next(iter(test_ds))

# first 5 values of the time series (convert from pandas to numpy)
np.array(ts_entry[:5]).reshape(-1,)

# first 5 values
test_ds_entry['target'][:5]

print(f"Number of sample paths: {forecast_entry.num_samples}")
print(f"Dimension of samples: {forecast_entry.samples.shape}")
print(f"Start date of the forecast window: {forecast_entry.start_date}")
print(f"Frequency of the time series: {forecast_entry.freq}")

In [None]:
print(f"Mean of the future window:\n {forecast_entry.mean}")
print(f"0.5-quantile (median) of the future window:\n {forecast_entry.quantile(0.5)}")

In [None]:
def plot_prob_forecasts(ts_entry, forecast_entry):
    plot_length = 100 
    prediction_intervals = (50.0, 90.0)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    fig, ax = plt.subplots(1, 1, figsize=(10, 7))
    ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.show()

for forecast_entry, ts_entry in zip(forecasts[:30], tss[:30]):
  plot_prob_forecasts(ts_entry, forecast_entry)

In [None]:
print(len(tss))
print(len(forecasts))
print(len(test_ds))



In [None]:
count = 0
for i in range(4):
    count+=1
    print()

In [None]:
from gluonts.evaluation import Evaluator
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss[:10000]), iter(forecasts[:10000]), num_series=10000)
print(json.dumps(agg_metrics, indent=4))

In [None]:
item_metrics.head()

In [None]:
min(item_metrics['MAPE'])