In [1]:
%matplotlib inline
import mxnet as mx
from mxnet import gluon, nd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json

from utils import GluonConfigs

In [2]:
tf = GluonConfigs.load_data()

In [3]:
tf = tf.set_index('transect_id')
tf = tf.dropna(thresh=33)
print(f"Transects included in dataset: {tf.shape[0]}; timesteps: {tf.shape[1]}")
tf.head()

Transects included in dataset: 37111; timesteps: 33


Unnamed: 0_level_0,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
transect_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BOX_051_151_15,677.32043,695.6637,713.29486,682.1743,688.23425,672.13007,700.43286,693.4346,699.3856,687.9795,...,700.664,712.4174,708.45123,740.3324,680.4251,758.3515,754.49695,763.04297,743.2968,779.4157
BOX_051_151_18,768.938,769.23883,762.443,755.8622,761.79663,760.5171,763.30505,761.8136,769.221,765.4706,...,763.1057,759.726,766.2315,770.7713,798.2683,797.99615,803.9095,797.9078,798.1883,803.68256
BOX_051_151_21,711.41626,684.2054,695.49817,701.6695,705.2999,703.6147,707.69403,692.9036,704.6828,704.99945,...,825.8698,820.5172,820.6868,842.18097,850.4546,775.967,883.0424,867.92426,877.37415,874.50244
BOX_051_151_30,795.81573,820.637,799.7238,799.8664,823.5626,822.073,823.568,823.733,824.584,824.3336,...,823.782,823.15674,823.6579,821.3186,818.85803,817.3122,822.9297,818.7335,822.1848,818.7348
BOX_051_151_32,242.70204,238.05159,229.93718,244.062,247.68105,257.499,302.69217,301.27722,301.12033,316.40414,...,301.1231,313.63553,313.09814,312.90347,306.18658,294.18326,297.23654,302.8865,323.8584,304.36823


In [4]:
tf = tf.sample(1000)

In [5]:
sites = pd.read_csv("/media/storage/data/shorelines/sites-gluonts-prepared.csv")
sites = sites.loc[sites['transect_id'].isin(tf.index)]

logs = np.log(sites['changerate_unc'].values)
sites['changerate_cat'] = pd.qcut(logs, q=10, labels=range(10))

In [6]:
def get_timestamp_first_obs(series: pd.Series) -> pd.Timestamp:
  yr = series.first_valid_index()
  return pd.Timestamp(f"01-01-{yr}", freq='1Y')

metadata = {'num_series': len(tf),
            'num_steps': len(tf.columns),
            'prediction_length': 7,
            'freq': "AS",
            'start': [pd.Timestamp("01-01-1984", freq='AS') for _ in range(len(tf))],
            'item_id': tf.index.values,
            'context_length': 26
            }

In [7]:
from gluonts.dataset.field_names import FieldName
from gluonts.dataset.common import ListDataset

train_ds = ListDataset(
    [
     {
         FieldName.TARGET: target,
         FieldName.START: start,
         FieldName.ITEM_ID: item_id,
         FieldName.FEAT_STATIC_CAT: [fsc1, fsc2],
         FieldName.FEAT_STATIC_REAL: [fsr]
      }
     
     for (target, start, item_id, fsc1, fsc2, fsr) in zip(tf.values[:, :-metadata['prediction_length']],
                                        metadata['start'], 
                                        metadata['item_id'],
                                        sites['coastline_idint'].values,
                                        sites['changerate_cat'].values,
                                        sites['changerate_unc'].values)
    ], freq=metadata['freq'])

test_ds = ListDataset(
    [
     {
         FieldName.TARGET: target,
         FieldName.START: start,
         FieldName.ITEM_ID: item_id,
         FieldName.FEAT_STATIC_CAT: [fsc1, fsc2],
         FieldName.FEAT_STATIC_REAL: [fsr]
      }
     
     for (target, start, item_id, fsc1, fsc2, fsr) in zip(tf.values,
                                        metadata['start'], 
                                        metadata['item_id'],
                                        sites['coastline_idint'].values,
                                        sites['changerate_cat'].values,
                                        sites['changerate_unc'].values)
    ], freq=metadata['freq'])

In [8]:
next(iter(train_ds))

{'target': array([787.0619 , 789.99536, 791.3418 , 784.9836 , 799.69147, 793.4877 ,
        788.6033 , 788.66046, 784.8622 , 780.5118 , 805.3251 , 790.579  ,
        809.4227 , 774.3561 , 817.7444 , 788.6603 , 809.219  , 809.20526,
        809.2215 , 805.1269 , 788.94464, 809.21765, 809.1172 , 812.491  ,
        821.6146 , 809.219  ], dtype=float32),
 'start': Timestamp('1984-01-01 00:00:00', freq='AS-JAN'),
 'item_id': 'BOX_074_041_17',
 'feat_static_cat': array([7427,    6], dtype=int32),
 'feat_static_real': array([0.2793744], dtype=float32),
 'source': SourceContext(source='list_data', row=0)}

In [None]:
class MyNetwork(gluon.HybridBlock):
    def __init__(self, prediction_length, num_cells, **kwargs):
        super().__init__(**kwargs)
        self.prediction_length = prediction_length
        self.num_cells = num_cells

        with self.name_scope():
            # Set up a 3 layer neural network that directly predicts the target values
            self.nn = mx.gluon.nn.HybridSequential()
            self.nn.add(mx.gluon.nn.Dense(units=self.num_cells, activation='relu'))
            self.nn.add(mx.gluon.nn.Dense(units=self.num_cells, activation='relu'))
            self.nn.add(mx.gluon.nn.Dense(units=self.prediction_length, activation='softrelu'))


class MyTrainNetwork(MyNetwork):
    def hybrid_forward(self, F, past_target, future_target):
        prediction = self.nn(past_target)
        # calculate L1 loss with the future_target to learn the median
        return (prediction - future_target).abs().mean(axis=-1)


class MyPredNetwork(MyTrainNetwork):
    # The prediction network only receives past_target and returns predictions
    def hybrid_forward(self, F, past_target):
        prediction = self.nn(past_target)
        return prediction.expand_dims(axis=1)

