In [7]:
import mxnet as mx
from gluonts.mx.trainer import Trainer
from gluonts.dataset.common import ListDataset
import os
from pathlib import Path
import shutil
import pandas as pd
import numpy as np
from My_DeepAREstimator import my_DeepAREstimator

# Create train / dev sample

In [8]:
 #Create train / dev sample
def creat_dict_data( df_input, data_type, data_config):
    
    target_var = data_config['target_var']
    covariate = data_config['covariate']
    start_time = data_config[data_type + '_period']['start']
    end_time = data_config[data_type + '_period']['end']
    
    Data = []
    for s, e in zip(start_time, end_time):
        for idx, var in enumerate(target_var):
            d = {"start": s, "target": df_input.loc[s:e][var]}
            if len(target_var) > 1:
                d['feat_static_cat'] = [idx]
            if len(covariate) != 0:
                d['feat_dynamic_real'] = df_input.loc[s:e, covariate].T.values
            Data.append(d)

    if data_config['show_train_valid_dict_data']:
        print(f'create {len(Data)} dict_data, The first sampe  dict_data:')
        print(Data[0])
        # for idx, d in enumerate(Data):
        #     print(f'The {idx} dict_data:\n', d)
    return Data

# DeepAR Model

In [9]:
# DeepAR Model
def DeepAR_model(dataset, data_config, model_config):

    freq = data_config['freq']
    target_var = data_config['target_var']
    covariate = data_config['covariate']
    
    print()
    print("="*40)
    print('Create Training Set')
    training_data = ListDataset(creat_dict_data(dataset[::freq], 
                                                'train', data_config),
                                freq=str(freq) + "min")

    print("="*40)
    print('Create Development Set')
    dev_data = ListDataset(creat_dict_data(dataset[::freq],
                                                'dev', data_config),
                                freq=str(freq) + "min")
    print("="*40)
    
    # build model
    print('Build and Train the DeepAR model')
    estimator = my_DeepAREstimator(
        freq=str(freq) + "min",
        context_length = data_config['context_length'],
        prediction_length = data_config['prediction_length'],
        use_feat_dynamic_real = True if len(covariate) != 0 else False,
        use_feat_static_cat = True if len(target_var) > 1 else False,
        cardinality = [len(target_var)] if len(target_var) > 1 else None,
        num_layers = model_config['num_layers'],
        num_cells = model_config['num_cells'],
        cell_type = model_config['cell_type'],
        trainer=Trainer(epochs=model_config['epochs'], 
                        ctx=model_config['ctx']))
    
    # Train Model
    mx.random.seed(model_config['r_seed'])
    predictor = estimator.train(training_data = training_data,
                                validation_data = dev_data,
                                continue_train = model_config['continue_train'],
                                model_path = model_config['continue_train_model'])
    
    
    # Save Model
    model_name = model_config['model_name']
    if not os.path.exists('DeepAR_Model'):
        os.mkdir('DeepAR_Model')
    if model_config['save_model']:
        print('Saving the model ...')
        if os.path.exists(f'DeepAR_Model/{model_name}'):
             # delete the old model
            shutil.rmtree(f'DeepAR_Model/{model_name}') 
        os.mkdir(f'DeepAR_Model/{model_name}')
        predictor.serialize(Path(f'DeepAR_Model/{model_name}'))

    return (predictor, training_data, dev_data)

# run_model

In [10]:
def run_model(data_config, model_config):
    # Format data
    PATH_1 = 'clean data'
    Features_Path = 'new features.csv'
    
    target_var = data_config['target_var']
    
    # Concate water consumption data into one dataframe df_wc
    df_wc_0 = pd.read_csv(f'{PATH_1}\\{target_var[0]}.csv', index_col=0)
    df_wc = df_wc_0.copy()
    for station in target_var[1:]:
        df_wc_i = pd.read_csv(f'{PATH_1}\\{station}.csv', index_col=0)
        df_wc = np.hstack((np.array(df_wc), np.array(df_wc_i)))

    # data_config['target_var']= [station + '_Water_Consumption' for station in target_var]
    df_wc = pd.DataFrame(df_wc, index=df_wc_0.index, columns=target_var)
    
    # get other features into df_features
    df_features = pd.read_csv(Features_Path, index_col=0)
    
    # Concate water consumption series and other features into one dataframe
    df = pd.DataFrame(np.hstack((np.array(df_wc), np.array(df_features))),
                      index=df_wc.index,
                      columns=np.append(df_wc.columns.values,
                                        df_features.columns.values))
    # Normalize df
    df_01 = (df - df.min()) / (df.max() - df.min())
    
    # Built and train DeepAR model
    (predictor, training_data, dev_data) = DeepAR_model(df_01, data_config, model_config)

# get_model_name

In [12]:
def get_model_name(data_config, model_config):
    
    target_var = data_config['target_var']
    covariate = data_config['covariate']
    freq = data_config['freq']
    prediction_length = data_config['prediction_length']
    context_length = data_config['context_length']
    cell_type = model_config['cell_type']
    # continue_train = str(model_config['continue_train'] + 0)
    
    model_name = '_'.join(target_var) + '_' + '_'.join(covariate)  \
        + '_' + '_'.join((str(freq), str(context_length), str(prediction_length), cell_type))
        
    return model_name

# Basic model

In [14]:
if __name__ == '__main__':
    data_config = dict(target_var = ['HY', 'WJ'],
                       freq = 10,
                       prediction_length = 144,
                       context_length = 720,
                       covariate = ['HF_e1','MT'],
                       show_train_valid_dict_data = True,
                       )
    model_config = dict(r_seed = 0,
                        num_layers = 2,
                        num_cells = 50,
                        cell_type = 'lstm',
                        epochs = 20,
                        ctx = 'gpu',
                        save_model = True)
# =============================================================================
# Basic model
# =============================================================================

    data_config['train_period'] = {
        'start': ['2019-01-01 00:00'],
        'end': ['2020-08-07 00:00']}
    
    data_config['dev_period'] = {
        'start': ['2020-08-07 00:00'],
        'end': ['2020-10-19 00:00']
    }
    
    model_config['continue_train'] = False
    model_config['continue_train_model'] = None
    model_config['model_name'] = get_model_name(data_config, model_config)
    run_model(data_config, model_config)
    

  0%|                                                                                           | 0/50 [00:00<?, ?it/s]


Create Training Set
create 2 dict_data, The first sampe  dict_data:
{'start': '2019-01-01 00:00', 'target': 2019-01-01 00:00    0.404908
2019-01-01 00:10    0.420715
2019-01-01 00:20    0.289434
2019-01-01 00:30    0.336855
2019-01-01 00:40    0.309983
                      ...   
2020-08-06 23:20    0.320715
2020-08-06 23:30    0.377621
2020-08-06 23:40    0.354077
2020-08-06 23:50    0.343927
2020-08-07 00:00    0.329700
Name: HY, Length: 84097, dtype: float64, 'feat_static_cat': [0], 'feat_dynamic_real': array([[0.465252  , 0.465252  , 0.465252  , ..., 0.        , 0.        ,
        0.        ],
       [0.18918919, 0.18918919, 0.18918919, ..., 0.91891892, 0.91891892,
        0.94594595]])}
Create Development Set
create 2 dict_data, The first sampe  dict_data:
{'start': '2020-08-07 00:00', 'target': 2020-08-07 00:00    0.329700
2020-08-07 00:10    0.306240
2020-08-07 00:20    0.269384
2020-08-07 00:30    0.259817
2020-08-07 00:40    0.228369
                      ...   
2020-10-18 

100%|████████████████████████████████████████████████| 50/50 [00:51<00:00,  1.03s/it, epoch=1/20, avg_epoch_loss=-1.09]
1it [00:02,  2.46s/it, epoch=1/20, validation_avg_epoch_loss=-1.78]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.02it/s, epoch=2/20, avg_epoch_loss=-1.82]
1it [00:02,  2.46s/it, epoch=2/20, validation_avg_epoch_loss=-2]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.01it/s, epoch=3/20, avg_epoch_loss=-1.94]
1it [00:02,  2.50s/it, epoch=3/20, validation_avg_epoch_loss=-2.04]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.02it/s, epoch=4/20, avg_epoch_loss=-2.07]
1it [00:02,  2.57s/it, epoch=4/20, validation_avg_epoch_loss=-1.95]
100%|████████████████████████████████████████████████| 50/50 [00:48<00:00,  1.02it/s, epoch=5/20, avg_epoch_loss=-2.08]
1it [00:02,  2.58s/it, epoch=5/20, validation_avg_epoch_loss=-2.19]
100%|████████████████████████████████████████████████| 50/50 [0

Saving the model ...


# Real-time model

In [16]:
if __name__ == '__main__':
    
    data_config = dict(target_var = ['HY', 'WJ'],
                       freq = 10,
                       prediction_length = 144,
                       context_length = 720,
                       covariate = ['HF_e1'],
                       show_train_valid_dict_data = True,
                       )
    
    model_config = dict(r_seed = 0,
                        num_layers = 2,
                        num_cells = 50,
                        cell_type = 'lstm',
                        epochs = 10,
                        ctx = 'gpu',
                        save_model = True)
    
# =============================================================================
# Updata model real time
# =============================================================================

    # raw model 
    data_config['train_period']  = {
        'start': ['2019-01-01 00:00'],
        'end': ['2019-10-07 00:00']}
    data_config['dev_period'] = {
        'start': ['2019-10-07 00:00'],
        'end': ['2019-11-19 00:00']
    }
    model_config['continue_train'] = False
    model_config['continue_train_model'] = None
    model_config['model_name'] = get_model_name(data_config, model_config) + '_raw'
    run_model(data_config, model_config)
    
    # update
    data_config['train_period'] = {
        'start': ['2019-10-07 00:00'],
        'end': ['2020-08-07 00:00']}
    data_config['dev_period'] = {
        'start': ['2020-08-07 00:00'],
        'end': ['2020-10-19 00:00']
    }
    model_config['continue_train'] = True
    model_config['continue_train_model'] = 'DeepAR_Model/' + get_model_name(data_config, model_config) + '_raw'
    model_config['model_name'] = get_model_name(data_config, model_config) + '_update'
    run_model(data_config, model_config)

  0%|                                                                                           | 0/50 [00:00<?, ?it/s]


Create Training Set
create 2 dict_data, The first sampe  dict_data:
{'start': '2019-01-01 00:00', 'target': 2019-01-01 00:00    0.404908
2019-01-01 00:10    0.420715
2019-01-01 00:20    0.289434
2019-01-01 00:30    0.336855
2019-01-01 00:40    0.309983
                      ...   
2019-10-06 23:20    0.378785
2019-10-06 23:30    0.352246
2019-10-06 23:40    0.349917
2019-10-06 23:50    0.338103
2019-10-07 00:00    0.371464
Name: HY, Length: 40177, dtype: float64, 'feat_static_cat': [0], 'feat_dynamic_real': array([[0.465252  , 0.465252  , 0.465252  , ..., 0.90535007, 0.90535007,
        0.73534667]])}
Create Development Set
create 2 dict_data, The first sampe  dict_data:
{'start': '2019-10-07 00:00', 'target': 2019-10-07 00:00    0.371464
2019-10-07 00:10    0.342013
2019-10-07 00:20    0.325374
2019-10-07 00:30    0.323710
2019-10-07 00:40    0.316389
                      ...   
2019-11-18 23:20    0.343760
2019-11-18 23:30    0.360566
2019-11-18 23:40    0.338769
2019-11-18 23:50  

100%|████████████████████████████████████████████████| 50/50 [00:54<00:00,  1.09s/it, epoch=1/10, avg_epoch_loss=-1.04]
1it [00:03,  3.33s/it, epoch=1/10, validation_avg_epoch_loss=-1.71]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.01it/s, epoch=2/10, avg_epoch_loss=-1.71]
1it [00:03,  3.21s/it, epoch=2/10, validation_avg_epoch_loss=-1.79]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.01it/s, epoch=3/10, avg_epoch_loss=-1.84]
1it [00:03,  3.18s/it, epoch=3/10, validation_avg_epoch_loss=-1.58]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.01it/s, epoch=4/10, avg_epoch_loss=-1.92]
1it [00:03,  3.14s/it, epoch=4/10, validation_avg_epoch_loss=-2.07]
100%|████████████████████████████████████████████████| 50/50 [00:49<00:00,  1.01it/s, epoch=5/10, avg_epoch_loss=-1.97]
1it [00:03,  3.17s/it, epoch=5/10, validation_avg_epoch_loss=-2.02]
100%|████████████████████████████████████████████████| 50/50

Saving the model ...


  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  0%|                                                                                           | 0/50 [00:00<?, ?


Create Training Set
create 2 dict_data, The first sampe  dict_data:
{'start': '2019-10-07 00:00', 'target': 2019-10-07 00:00    0.371464
2019-10-07 00:10    0.342013
2019-10-07 00:20    0.325374
2019-10-07 00:30    0.323710
2019-10-07 00:40    0.316389
                      ...   
2020-08-06 23:20    0.320715
2020-08-06 23:30    0.377621
2020-08-06 23:40    0.354077
2020-08-06 23:50    0.343927
2020-08-07 00:00    0.329700
Name: HY, Length: 43921, dtype: float64, 'feat_static_cat': [0], 'feat_dynamic_real': array([[0.73534667, 0.73534667, 0.73534667, ..., 0.        , 0.        ,
        0.        ]])}
Create Development Set
create 2 dict_data, The first sampe  dict_data:
{'start': '2020-08-07 00:00', 'target': 2020-08-07 00:00    0.329700
2020-08-07 00:10    0.306240
2020-08-07 00:20    0.269384
2020-08-07 00:30    0.259817
2020-08-07 00:40    0.228369
                      ...   
2020-10-18 23:20    0.341930
2020-10-18 23:30    0.324210
2020-10-18 23:40    0.308569
2020-10-18 23:50  

100%|████████████████████████████████████████████████| 50/50 [00:53<00:00,  1.07s/it, epoch=1/10, avg_epoch_loss=-1.79]
1it [00:03,  3.43s/it, epoch=1/10, validation_avg_epoch_loss=-2.15]
100%|████████████████████████████████████████████████| 50/50 [00:50<00:00,  1.01s/it, epoch=2/10, avg_epoch_loss=-2.31]
1it [00:03,  3.37s/it, epoch=2/10, validation_avg_epoch_loss=-2.27]
100%|████████████████████████████████████████████████| 50/50 [00:50<00:00,  1.01s/it, epoch=3/10, avg_epoch_loss=-2.35]
1it [00:03,  3.50s/it, epoch=3/10, validation_avg_epoch_loss=-2.28]
100%|████████████████████████████████████████████████| 50/50 [00:50<00:00,  1.01s/it, epoch=4/10, avg_epoch_loss=-2.36]
1it [00:03,  3.36s/it, epoch=4/10, validation_avg_epoch_loss=-2.25]
100%|████████████████████████████████████████████████| 50/50 [00:50<00:00,  1.02s/it, epoch=5/10, avg_epoch_loss=-2.36]
1it [00:03,  3.31s/it, epoch=5/10, validation_avg_epoch_loss=-2.29]
100%|████████████████████████████████████████████████| 50/50

Saving the model ...
