In [None]:
import azureml.core
from azureml.core import Workspace, Datastore
import pandas as pd
from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder
from azureml.pipeline.core import Pipeline
# Set up your workspace
ws = Workspace.from_config()
ws.get_details()

# Set up your datastores
dstore = ws.get_default_datastore()

output = {}
output["SDK version"] = azureml.core.VERSION
output["Subscription ID"] = ws.subscription_id
output["Workspace"] = ws.name
output["Resource Group"] = ws.resource_group
output["Location"] = ws.location
output["Default datastore name"] = dstore.name
pd.set_option("display.max_colwidth", None)
outputDf = pd.DataFrame(data=output, index=[""])
outputDf.T
import warnings 
warnings.filterwarnings("ignore")
# setup azure workspace for heireacheal forecasting 

In [None]:
from azureml.core import Experiment

experiment = Experiment(ws, "automl-hts-OnRent")

print("Experiment name: " + experiment.name)

In [None]:
datastore_path = "hts-sample-OnRent"

In [None]:
datastore = ws.get_default_datastore()
datastore

In [None]:
date_column ='EffectiveDate'
file_location = 'FleetForecasting_Top100ProductSubCategory_WithIHSData_Weather_BYDay_V2.csv'
input_data_raw=pd.read_csv(file_location ,sep ='|',parse_dates=[date_column])
input_data_copy = input_data_raw.copy()

In [None]:
input_data_copy.Region.unique()

In [None]:
input_data_copy.RegionName.unique()

In [None]:
input_data_copy.RegionName.value_counts().plot(kind='bar') 
## number of observations per region

In [None]:
input_data_copy.sort_values(by=['EffectiveDate'], ascending=True,inplace=True)

In [None]:
input_data_copy['RegionName'] = input_data_copy['RegionName'].str.replace(r"[\"\',< ]", '')
input_data_copy['ProductCategory_Desc'] = input_data_copy['ProductCategory_Desc'].str.replace(r"[\"\',< ]", '')
cols = ['ProductCategory_Nbl', 'ProductCategory_Desc']
input_data_copy['eq_nm'] = input_data_copy[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)

In [None]:
input_data_copy.head()

In [None]:
use_region = ['REGION04','REGION06','REGION07','REGION09','REGION08','REGION12','REGION03','REGION05','REGION14','REGION02','REGION11']
input_data_copy = input_data_copy[input_data_copy.RegionName.isin(use_region)]

In [None]:
input_data_copy.head()

In [None]:
feature_importance = [col for col in input_data_copy.columns if col not in ['Region','DRKey','clean_time','Rental', 'QtyOwned', "ProductCategory_Nbl","ProductCategory_Desc"]]

In [None]:
sg_district_elect=input_data_copy.copy()
sg_district_elect_train=sg_district_elect[feature_importance]
sg_district_elect_train.drop(sg_district_elect_train.loc[sg_district_elect_train[date_column] < '2015-05-01 00:00:00'].index, inplace=True) # prob dont need because data is from 2016
sg_district_elect_train.drop(sg_district_elect_train.loc[sg_district_elect_train[date_column] > '2020-05-01 01:00:00'].index, inplace=True)

In [None]:
sg_district_elect_train.tail()

In [None]:
sg_training=sg_district_elect_train
split_date = '2019-5-20'
train = sg_district_elect_train.loc[sg_district_elect_train[date_column] <= split_date]
test = sg_district_elect_train.loc[sg_district_elect_train[date_column] > split_date]
print(f"{len(train)} days of training data \n {len(test)} days of testing data ")

In [None]:
train.head()

In [None]:
train[date_column].min(), train[date_column].max()

In [None]:
test[date_column].min(), test[date_column].max()

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

datastore = ws.get_default_datastore()
train_dataset = TabularDatasetFactory.register_pandas_dataframe(
    train, target=(datastore, "dataset/"), name="OnRent_train"
)
test_dataset = TabularDatasetFactory.register_pandas_dataframe(
    test, target=(datastore, "dataset/"), name="OnRent_test"
)

In [None]:
from azureml.train.automl.runtime._hts.hts_parameters import HTSTrainParameters

model_explainability = True

engineered_explanations = False
# Define your hierarchy. Adjust the settings below based on your dataset.
hierarchy = ["RegionName", "Division", "eq_nm"]
training_level = "eq_nm"

# Set your forecast parameters. Adjust the settings below based on your dataset.
time_column_name = "EffectiveDate"
label_column_name = "OnRent"
forecast_horizon = 120


automl_settings = {
    "task": "forecasting",
    "primary_metric": "normalized_root_mean_squared_error",
    "label_column_name": label_column_name,
    "time_column_name": time_column_name,
    "forecast_horizon": forecast_horizon,
    "hierarchy_column_names": hierarchy,
    "hierarchy_training_level": training_level,
    "track_child_runs": False,
    "pipeline_fetch_max_batch_size": 15,
    "model_explainability": model_explainability,
    # The following settings are specific to this sample and should be adjusted according to your own needs.
    "iteration_timeout_minutes": 15,
    "iterations": 10,
    "n_cross_validations": 2,
}

hts_parameters = HTSTrainParameters(
    automl_settings=automl_settings,
    hierarchy_column_names=hierarchy,
    training_level=training_level,
    enable_engineered_explanations=engineered_explanations,
)

In [None]:
## Create Compute 

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Name your cluster
compute_name = "hts-compute"


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("Found compute target: " + compute_name)
else:
    print("Creating a new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="Standard_D32_v3", max_nodes=300
    )
    # Create the compute target
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20
    )

    # For a more detailed view of current cluster status, use the 'status' property
    print(compute_target.status.serialize())

In [None]:
training_pipeline_steps = AutoMLPipelineBuilder.get_many_models_train_steps(
    experiment=experiment,
    train_data=train_dataset,
    compute_target=compute_target,
    node_count=8,
    process_count_per_node=10,
    train_pipeline_parameters=hts_parameters,
)

In [None]:
from azureml.pipeline.core import Pipeline

training_pipeline = Pipeline(ws, steps=training_pipeline_steps)

In [None]:
training_run = experiment.submit(training_pipeline)
training_run.wait_for_completion(show_output=False)

In [None]:
if model_explainability:
    expl_output = training_run.get_pipeline_output("explanations")
    expl_output.download("training_explanations")
else:
    print(
        "Model explanations are available only if model_explainability is set to True."
    )

In [None]:
import os

if model_explainability:
    explanations_dirrectory = os.listdir(
        os.path.join("training_explanations", "azureml")
    )
    if len(explanations_dirrectory) > 1:
        print(
            "Warning! The directory contains multiple explanations, only the first one will be displayed."
        )
    print("The explanations are located at {}.".format(explanations_dirrectory[0]))
    # Now we will list all the explanations.
    explanation_path = os.path.join(
        "training_explanations",
        "azureml",
        explanations_dirrectory[0],
        "training_explanations",
    )
    print("Available explanations")
    print("==============================")
    print("\n".join(os.listdir(explanation_path)))
else:
    print(
        "Model explanations are available only if model_explainability is set to True."
    )

In [None]:
from IPython.display import display

explanation_type = "raw"
level = "Division"

if model_explainability:
    display(
        pd.read_csv(
            os.path.join(explanation_path, "{}_explanations_{}.csv").format(
                explanation_type, level
            )
        )
    )

In [None]:
from azureml.train.automl.runtime._hts.hts_parameters import HTSInferenceParameters

inference_parameters = HTSInferenceParameters(
    hierarchy_forecast_level="eq_nm", # set to division later to find demand onwards # The setting is specific to this dataset and should be changed based on your dataset.
    allocation_method="bottom_up",
)

steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(
    experiment=experiment,
    inference_data=test_dataset,
    compute_target=compute_target,
    inference_pipeline_parameters=inference_parameters,
    node_count=10,
    process_count_per_node=5,
)
from azureml.pipeline.core import Pipeline

inference_pipeline = Pipeline(ws, steps=steps)
inference_run = experiment.submit(inference_pipeline)
inference_run.wait_for_completion(show_output=False)

In [None]:
forecasts = inference_run.get_pipeline_output("forecasts")
forecasts.download("forecast_results")

In [None]:
inference_run = experiment.submit(
    inference_pipeline, pipeline_parameters={"hierarchy_forecast_level": "eq_nm"}
)
inference_run.wait_for_completion(show_output=False)

In [None]:
# run this last for this stuff
forecasts = inference_run.get_pipeline_output("forecasts")
forecasts.download("forecast_results")