# Custom Heirarchical Time Series Forecasting

###  Dynamically slicing the dataset basis the heirarchy variables the user passes upon which it requires the forecast for,then using the sliced dataset the model is trained and generate predictions for the same. There is an additional caching feature to avoid retraining of models on the same heirarchy variables.

### This is benificial in 2 ways:

- ### We can work with the standard compute  resource and don't need to acquire more expensive compute resources
- ### Traning time reduced significantly, as we train only on what's needed at this point in time.


In [6]:
# The usual Imports
import numpy as np
import pickle
import pandas as pd
from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
import logging
from azureml.automl.core.forecasting_parameters import ForecastingParameters


class Runner:

    def __init__(
        self,
        train_df_path,
        date_var,
        hr_vars,
        freq,
        holiday_feature,
        target_var,
        ):
        # Storing all the configuration parameters
        self.df = pd.read_csv(train_df_path)
        self.freq = freq
        self.target_var = target_var
        self.date_time_var = date_var
        self.df[date_var] = pd.to_datetime(self.df[date_var])
        self.hr_vars = hr_vars
        self.holiday = holiday_feature
        self.suggestion = {}
        # generate a list of unique values in the heirarchy list passed and store it as a dictionary
        for x in self.hr_vars:
            self.suggestion[x] = list(self.df[x].unique())

        self.job_cache = {}

    def _get_suggestions(self):
        
        return self.suggestion

    # Creating the Training Job
    def _create_job(self, config_list, test_df_path):
        self.config_list = config_list
        #Generating a Unique key name based on the heirarchy combination passed. The key would look like this __state_WA_store_id_1_product_category_B_SKU_B2.csv
        key_val = '_'
        for x in config_list:
            key_val = key_val + '_' + x[0] + '_' + str(x[1])

        print ('Check if Key Exists in Job Cache')
        # Measure to avouid re-training the model on the same heirarchy combination
        if key_val in self.job_cache.keys():

            return key_val
        else:

            # Perform Slicing of Dataset based on the heirarchy combination passed

            final_df = self.df
            for x in config_list:
                final_df = final_df[final_df[x[0]] == x[1]]
            self.final_df = final_df
            path = key_val + '.csv'
            final_df.to_csv(path)

            train_data = pd.read_csv(path)
            # Train_data is the sliced dataframe we pass for training
            # Setting up Automl Config
            forecasting_parameters = ForecastingParameters(
                time_column_name=self.date_time_var,
                forecast_horizon=50,
                country_or_region_for_holidays='US',
                freq=self.freq,
                target_lags='auto',
                target_rolling_window_size=10,
                )

            automl_config = AutoMLConfig(
                task='forecasting',
                primary_metric='normalized_root_mean_squared_error',
                experiment_timeout_minutes=15,
                enable_early_stopping=True,
                training_data=train_data,
                label_column_name=self.target_var,
                n_cross_validations=5,
                enable_ensembling=False,
                verbosity=logging.INFO,
                forecasting_parameters=forecasting_parameters,
                )
            ws = Workspace.from_config()
            experiment = Experiment(ws, 'local-Delta')
            local_run = experiment.submit(automl_config,
                    show_output=True)
            print ('Training Job Complete')
            (best_run, fitted_model) = local_run.get_output()
            print ('Making Predictions')
            # saving the model in cache with key generated from it's heirarchy combination
            self.job_cache[key_val] = fitted_model
            print ('Finish')
            return key_val

    def _predict(self, test_df_path, key_val):
        
        test_df = pd.read_csv(test_df_path)
        fitted_model = self.job_cache[key_val]
        # slicing the target data basis the heirarchy combination to predict output fot that combination
        print ('Slicing Test Data')
        for x in self.config_list:
            test_df = test_df[test_df[x[0]] == x[1]]

        final_test_df = test_df
        test_path = key_val + 'test_df' + '.csv'
        final_test_df.to_csv(test_path)
        print ('Test Data Slicing Finish')
        test_data = pd.read_csv(test_path)
        
        
        
        print ('Creating Query')
        
        fitted_model.quantiles = [0.05, 0.5, 0.9, 0.75]
        result = fitted_model.forecast_quantiles(test_data,
                ignore_data_errors=True)
        print ('Finish')
        return result


### Intializing the class and passing in the prarameters
- ### train_df_path: path to the training dataset
- ### test_df_path: path to the test dataset
- ### heirarchy_vars: list of heirarchy variables
- ### date_var : date variable
- ### target_var : target variable (The variable for which you want your forecasts in this case it is the quantity)
- ### holiday_feature:Bool, if you want to include the holiday feature in the model
- ### freq: frequency of the time series data

In [7]:
r = Runner(train_df_path='hts-sample-train.csv',date_var='date',target_var='quantity',holiday_feature=True,hr_vars=["state",'store_id','product_category','SKU'],freq='D')

### Having a look at the unique values of the heirarchy columns

In [8]:
suggestion = r._get_suggestions()
print(suggestion)

{'state': ['CA', 'FL', 'WA'], 'store_id': [1, 2, 3], 'product_category': ['A', 'B'], 'SKU': ['A1', 'A2', 'A3', 'B1', 'B2']}


### Creating the training Job
- ### test_df_path:path to the test dataset
- ### Config List : List consisting of Tuple of (heirarchy_column_name,heirarchy_value)

### Consider we want predictions for state=WA,store_id=1,product_category=B,SKU=B2,hence the config list to be passed will appear something like this
``` 
[("state","WA"),('store_id',1),('product_category','B'),("SKU","B2")]
```

### The class method returns a key that uniquely identifies the model saved in Job cache,the key is then used for predictions


In [9]:
k = r._create_job(config_list=[("state","WA"),('store_id',1),('product_category','B'),("SKU","B2")],test_df_path='hts-sample-test.csv')

Check if Key Exists in Job Cache
No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
local-Delta,AutoML_bd470986-202e-4ca5-a11a-6d221626d946,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Heuristic parameters: Target_Lag = '[0]'.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Curr

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


### Fetching Results 
- ### key_val: key returned by the training job

### Basis the key passed,the model is fetched from the cache and the predictions are generated
### By default predictions are generated for the 5th percentile range,50th percentile range and 75th percentile range and 90th percentile range

In [10]:
final_res = r._predict(test_df_path='hts-sample-test.csv',key_val=k)
print(final_res)

Slicing Test Data
Test Data Slicing Finish
Creating Query
Finish
         date  0.05  0.5  0.9  0.75
0  2016-07-28  6.67 7.55 8.23  7.91
1  2016-07-29  6.95 7.82 8.50  8.17
2  2016-07-30  7.10 7.88 8.50  8.21
3  2016-07-31  7.34 8.03 8.56  8.31
4  2016-08-01  6.95 7.65 8.20  7.94
5  2016-08-02  7.08 7.72 8.22  7.98
6  2016-08-03  6.96 7.67 8.21  7.95
7  2016-08-04  6.83 7.69 8.35  8.04
8  2016-08-05  7.12 7.82 8.36  8.10
9  2016-08-06  7.12 7.84 8.40  8.13
10 2016-08-07  7.21 7.92 8.47  8.21
11 2016-08-08  6.88 7.56 8.10  7.84
12 2016-08-09  7.29 7.77 8.14  7.96
13 2016-08-10  6.86 7.57 8.13  7.87
14 2016-08-11  6.88 7.56 8.10  7.84
15 2016-08-12  6.99 7.68 8.21  7.96
16 2016-08-13  7.13 7.78 8.28  8.04
17 2016-08-14  7.04 7.69 8.19  7.96
18 2016-08-15  7.03 7.69 8.19  7.95
19 2016-08-16  6.86 7.48 7.95  7.73
20 2016-08-17  7.05 7.73 8.25  8.01
21 2016-08-18  6.97 7.61 8.11  7.87
22 2016-08-19  7.00 7.72 8.29  8.02
23 2016-08-20  6.96 7.75 8.36  8.07
24 2016-08-21  7.01 7.77 8.35  8.08