In [15]:
import numpy as np
import pickle
import pandas as pd
from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
import logging
from azureml.automl.core.forecasting_parameters import ForecastingParameters

class Runner:
    def __init__(self,train_df_path,date_var,hr_vars,freq,holiday_feature,target_var):
        self.df = pd.read_csv('train.csv')
        self.freq = freq
        self.target_var = target_var
        self.date_time_var = date_var
        self.df[date_var] = pd.to_datetime(self.df[date_var])
        self.hr_vars = hr_vars
        self.holiday = holiday_feature
        self.suggestion = {}
        for x in self.hr_vars:
            self.suggestion[x] = list(self.df[x].unique())
            
        self.job_cache={}

    def _get_suggestions(self):
        return self.suggestion

    def _create_job(self,config_list,test_df_path):
        self.config_list = config_list

        key_val = "_"
        for x in config_list:
            key_val = key_val+"_"+x[0]+"_"+str(x[1])
            
        
        
        print("Check if Key Exists in Job Cache")
        if key_val in self.job_cache.keys():
            
            return key_val
        
        else:
            #Perform Slicing
            final_df = self.df
            for x in config_list:
                final_df = final_df[final_df[x[0]]==x[1]]
            self.final_df = final_df
            path = key_val+".csv"
            final_df.to_csv(path)

            
            
            


            
            train_data = pd.read_csv(path)
            
            forecasting_parameters = ForecastingParameters(time_column_name=self.date_time_var, 
                                               forecast_horizon=50,
                                               country_or_region_for_holidays='US',
                                               
                                               freq=self.freq,
                                               target_lags='auto',
                                               target_rolling_window_size=10)
            
            automl_config = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_root_mean_squared_error',
                             experiment_timeout_minutes=15,
                             enable_early_stopping=True,
                             training_data=train_data,
                             label_column_name=self.target_var,
                             n_cross_validations=5,
                             enable_ensembling=False,
                             verbosity=logging.INFO,
                             forecasting_parameters = forecasting_parameters)
            ws = Workspace.from_config()
            experiment = Experiment(ws, "local-Delta")
            local_run = experiment.submit(automl_config, show_output=True)
            print("Training Job Complete")
            best_run, fitted_model = local_run.get_output()
            print("Making Predictions")


            
            self.job_cache[key_val] = fitted_model
            print("Finish")
            return key_val


    def _predict(self,test_df_path,key_val):
        test_df = pd.read_csv(test_df_path)
        fitted_model = self.job_cache[key_val]
        print("Slicing Test Data")
        for x in self.config_list:
            test_df = test_df[test_df[x[0]]==x[1]]
        final_test_df = test_df
        test_path = key_val+"test_df"+".csv"
        final_test_df.to_csv(test_path)
        print("Test Data Slicing Finish")
        test_data = pd.read_csv(test_path)
        test_labels = test_data[self.config_list[0][0]].to_numpy()
        label_query = test_labels.copy().astype(np.float)
        print("Creating Query")
        label_query.fill(np.nan)
        fitted_model.quantiles = [0.05,0.5, 0.9,0.75]
        result=fitted_model.forecast_quantiles(test_data,label_query,ignore_data_errors=True)
        print("Finish")
        return result





            
            




In [16]:
r = Runner(train_df_path='train.csv',date_var='Date',target_var='Weekly_Sales',holiday_feature=True,hr_vars=['Store','Dept'],freq='D')

In [17]:
r._get_suggestions()

{'Store': [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45],
 'Dept': [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  40,
  41,
  42,
  44,
  45,
  46,
  47,
  48,
  49,
  51,
  52,
  54,
  55,
  56,
  58,
  59,
  60,
  67,
  71,
  72,
  74,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  85,
  87,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  39,
  50,
  43,
  65]}

In [18]:
k = r._create_job(config_list=[('Store',1),('Dept',2)],test_df_path='test.csv')

Check if Key Exists in Job Cache
No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
local-Delta,AutoML_81be0f7a-d6b4-40fc-b725-2575828f76af,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Heuristic parameters: Target_Lag = '[0]'.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Curr

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


In [19]:
final_res = r._predict(test_df_path='test.csv',key_val=k)

Slicing Test Data
Test Data Slicing Finish
Creating Query
Finish


In [20]:
final_res

Unnamed: 0,Date,0.05,0.5,0.9,0.75
0,2012-11-02,44586.91,47054.11,48976.38,48065.81
1,2012-11-09,,45977.22,,
2,2012-11-16,,44166.71,,
3,2012-11-23,,45329.02,,
4,2012-11-30,,46826.61,,
5,2012-12-07,,47397.65,,
6,2012-12-14,,51093.88,,
7,2012-12-21,,56250.88,,
8,2012-12-28,33286.82,44498.7,53234.19,49096.25
9,2013-01-04,,48210.7,,


In [21]:
k = r._create_job(config_list=[('Store',1),('Dept',2)],test_df_path='test.csv')

Check if Key Exists in Job Cache


In [22]:
k = r._create_job(config_list=[('Store',3),('Dept',5)],test_df_path='test.csv')

Check if Key Exists in Job Cache
No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
local-Delta,AutoML_346e57b7-8b92-47a1-9a79-92d6a3566ee6,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Heuristic parameters: Target_Lag = '[0]'.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Curr

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


In [23]:
final_res_s3_d5 = r._predict(test_df_path='test.csv',key_val=k)

Slicing Test Data
Test Data Slicing Finish
Creating Query
Finish


In [24]:
final_res_s3_d5

Unnamed: 0,Date,0.05,0.5,0.9,0.75
0,2012-11-02,6381.53,10053.93,12915.19,11559.83
1,2012-11-09,,10372.03,,
2,2012-11-16,,10446.52,,
3,2012-11-23,,40743.43,,
4,2012-11-30,,17359.45,,
5,2012-12-07,,17134.28,,
6,2012-12-14,,19585.18,,
7,2012-12-21,,20337.18,,
8,2012-12-28,12494.4,18129.85,22520.58,20440.73
9,2013-01-04,,12583.31,,
