# Importing Libraries 

In [1]:
# Adding Azure Libraries 
from azureml.core import Workspace, Experiment, Dataset, Datastore
from azureml.core.compute import ComputeTarget
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
from azureml.widgets import RunDetails
from azureml.opendatasets import OjSalesSimulated
from azureml.automl.core.forecasting_parameters import ForecastingParameters

In [2]:
# Adding non-Azure Libraries
import pandas as pd
import numpy as np
import os
from pathlib import Path

# Connecting to the Workspace:
1. you can use the config
2. you can point to another config object
3. you can use .get and provide workspace name, subscription id and resource group name

In [3]:
# Connect to the AMLS Workspace
ws = Workspace.from_config()

In [4]:
# Seting Compute Cluster for training
compute = 'training-cluster' # write your own cluster name here
compute_cluster = ComputeTarget(ws, compute)

In [5]:
# Set the Datastore
datastore = Datastore.get_default(ws)
datastore_name = 'workspaceblobstore'
blob_datastore = Datastore.get(ws, datastore_name)

# Downloading the Oranje Juice Sales Dataset

In [6]:
# Download the Orange Juice Sales Data Set from Azure ML Open datasets
oj_sales_files = OjSalesSimulated.get_file_dataset()
oj_sales = oj_sales_files.take(10) # we limit the dataset to include first 10 files only

In [7]:
# Create a folder called 'Orange Juice Sales Forecasting'; leave directory unaltered if already exists
folder = "Orange_Juice_Sales_Forecasting"
os.makedirs(folder, exist_ok=True)

In [8]:
# Download the Orange Juice Sales Files to 'Orange Juice Sales Forecasting' folder
oj_sales.download(folder, overwrite=True)

['/mnt/batch/tasks/shared/LS_root/mounts/clusters/forecasting-instance/code/Orange_Juice_Sales_Forecasting/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1000_dominicks.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/forecasting-instance/code/Orange_Juice_Sales_Forecasting/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1000_minute.maid.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/forecasting-instance/code/Orange_Juice_Sales_Forecasting/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1000_tropicana.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/forecasting-instance/code/Orange_Juice_Sales_Forecasting/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1001_dominicks.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/forecasting-instance/code/Orange_Juice_Sales_Forecasting/https%3A/%2Fazu

In [9]:
# Create a single Pandas dataframe out of the OJ Files
oj_file_path = Path('Orange_Juice_Sales_Forecasting').rglob('*.csv')
oj_files = [x for x in oj_file_path]
df = pd.concat((pd.read_csv(f) for f in oj_files))

In [10]:
# View the Pandas dataframe
df.head()

Unnamed: 0,WeekStarting,Store,Brand,Quantity,Advert,Price,Revenue
0,1990-06-14,1000,dominicks,12003,1,2.59,31087.77
1,1990-06-21,1000,dominicks,10239,1,2.39,24471.21
2,1990-06-28,1000,dominicks,17917,1,2.48,44434.16
3,1990-07-05,1000,dominicks,14218,1,2.33,33127.94
4,1990-07-12,1000,dominicks,15925,1,2.01,32009.25


In [11]:
df.WeekStarting.min()

'1990-06-14'

In [12]:
df.WeekStarting.max()

'1992-10-01'

# Register the Oranje Juice Dataset on Azure Machine Learning Workspace

In [13]:
# Register OJ Sales as a dataset
Dataset.Tabular.register_pandas_dataframe(df, datastore,"OJ Sales Training Dataset")

Method register_pandas_dataframe: This is an experimental method, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/ac4190c4-9a9a-414c-a584-850834dbfb28/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


{
  "source": [
    "('workspaceblobstore', 'managed-dataset/ac4190c4-9a9a-414c-a584-850834dbfb28/')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ReadParquetFile",
    "DropColumns"
  ],
  "registration": {
    "id": "eb0e3da6-0b7c-41a4-9331-0ca3c37c59f8",
    "name": "OJ Sales Training Dataset",
    "version": 3,
    "workspace": "Workspace.create(name='wsforecasting', subscription_id='497d1b53-aa2e-4f5e-a21d-20ab61cff740', resource_group='rgforecasting')"
  }
}

In [14]:
# Set the experiment that will be used to train the AutoML Model
experiment_name = 'Orange-Juice-Sales-Forecasting'
exp = Experiment(workspace=ws, name=experiment_name)

In [15]:
# Retrieve the OJ Sales Sample dataset
dataset_name = "OJ Sales Training Dataset"
dataset = Dataset.get_by_name(ws, dataset_name)

# Set Forecasting Parameters 

In [16]:
forecasting_parameters = {'time_column_name':'WeekStarting',
                          'forecast_horizon': 5,
                          'time_series_id_column_names':['Store','Brand'],
                          'target_lags': 'auto',
                          'feature_lags': 'auto',
                          'target_rolling_window_size': 'auto',
                          'country_or_region_for_holidays':'US',
                          'seasonality': 'auto',
                          'short_series_handling': True,
                          'short_series_handling_configuration': 'auto',                                                                                 
                          'use_stl': 'season_trend'                       
                          }

In [17]:
parameters=ForecastingParameters.from_parameters_dict(parameter_dict=forecasting_parameters,validate_params=True)

# Set AutoML Configuration

In [18]:
config = AutoMLConfig(task='forecasting',
                     primary_metric= 'normalized_root_mean_squared_error',
                     featurization= 'auto',
                     compute_target=compute_cluster,
                     training_data=dataset,
                     label_column_name= 'Quantity',
                     experiment_timeout_minutes= 60,
                     enable_early_stopping= False,
                     n_cross_validations=3,
                     model_explainability=False,
                     enable_stack_ensemble=False,
                     enable_voting_ensemble=True,
                     enable_dnn = False,
                     forecasting_parameters=parameters)

# Start a New Training Run

In [19]:
# Train the AutoML model to forecast Oranje Juice Sales
remote_run = exp.submit(config, show_output = True)
RunDetails(remote_run).show()

Submitting remote run.
No run_configuration provided, running on training-cluster with default configuration
Running on remote compute: training-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
Orange-Juice-Sales-Forecasting,AutoML_ae98dd4c-0a79-4486-ac74-44480a9bc507,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: ModelSelection. Beginning model selection.
Heuristic parameters: Target_Lag = '[0]', Target_Rolling_Window = '0'.


****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Time Series ID detection
STATUS:       PASSED
DESCRIPTION:  The data set was analyzed, and no duplicate time index were detected.
              Learn more about time-series forecasting configurations: https://aka.ms/AutomatedMLForecastingConfiguration

****************************************************************************************************

TYPE:         Frequency detection
STATUS:       PASSED
DESCRIPTION:  The time series was analyzed, all data points are aligned with detect

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

# Model Registration

In [23]:
description = 'First Time series forecasting Model by Azure AutoML' 
model_name = 'OJ-Sales-Sample-Forecasting-AutoML'

In [58]:
remote_run.deplpoy()

AttributeError: 'AutoMLRun' object has no attribute 'deplpoy'