In [1]:
from azureml.core import Workspace, Dataset, Datastore, Environment, Experiment
from azureml.core.compute import ComputeTarget
from azureml.core.runconfig import CondaDependencies, RunConfiguration, DEFAULT_CPU_IMAGE
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import StepSequence, Pipeline, PublishedPipeline
from azureml.widgets import RunDetails

In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
subscription_id = '' # Enter subscription ID
resource_group = 'rgforecasting'
workspace_name = 'wsforecasting'
workspace = Workspace(subscription_id, resource_group, workspace_name)

In [4]:
data = {"WeekStarting": ["1992-10-08","1992-10-08"], "Store": [1000,1002],"Brand":["dominicks","tropicana"],"Advert": [1,1],"Price":[2.5,2], "Revenue":[30000,40000]}  


In [5]:
testDF = pd.DataFrame(data)

In [6]:
testDF

Unnamed: 0,WeekStarting,Store,Brand,Advert,Price,Revenue
0,1992-10-08,1000,dominicks,1,2.5,30000
1,1992-10-08,1002,tropicana,1,2.0,40000


In [7]:
datastore = Datastore.get_default(workspace)
my_datastore_name = 'workspaceblobstore'
my_datastore = Datastore.get(workspace, my_datastore_name)

In [8]:
Dataset.Tabular.register_pandas_dataframe(testDF,datastore,'Forecasting Test Data')

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/f5245461-af68-4e68-975c-c43ca839d738/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


{
  "source": [
    "('workspaceblobstore', 'managed-dataset/f5245461-af68-4e68-975c-c43ca839d738/')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ReadParquetFile",
    "DropColumns"
  ],
  "registration": {
    "id": "128de481-389e-40fa-87df-0a1a9ee4a21d",
    "name": "Forecasting Test Data",
    "version": 3,
    "workspace": "Workspace.create(name='wsforecasting', subscription_id='497d1b53-aa2e-4f5e-a21d-20ab61cff740', resource_group='rgforecasting')"
  }
}

In [9]:
os.makedirs('Batch_Inferencing_Scripts', exist_ok=True)  

In [10]:
compute_name = 'training-cluster'
compute_target = ComputeTarget(workspace, compute_name)

In [11]:
%%writefile Batch_Inferencing_Scripts/Forecasting_Script.py
from azureml.core import Run, Workspace
from azureml.core import Dataset, Datastore, Model
import pandas as pd
import numpy as np
import joblib
import os

run = Run.get_context()

def main():
    workspace = run.experiment.workspace
    datastore = Datastore.get_default(workspace)
    model_path = Model.get_model_path('OJ-Sales-Sample-Forecasting-AutoML')
    model = joblib.load(model_path)
    dataset = Dataset.get_by_name(workspace,'Forecasting Test Data')
    scoringDF = dataset.to_pandas_dataframe()
    scoringDF_out = model.forecast_quantiles(scoringDF)
    output_datastore_path = 'Output_Folder'
    os.makedirs(output_datastore_path, exist_ok=True) 
    FileName = "OJ_Predictions.csv"
    OutputPath = os.path.join(output_datastore_path, FileName)
    scoringDF_out.to_csv(OutputPath, index = False, sep=',')
    datastore.upload_files(files=[OutputPath], target_path=output_datastore_path, overwrite=True)
    os.remove(OutputPath)
    os.rmdir(output_datastore_path)
    
if __name__ == '__main__':
    main()

Overwriting Batch_Inferencing_Scripts/Forecasting_Script.py


In [12]:
# Create a Custom Environment
Env = Environment(name='AutoML Environment')
conda_dep = CondaDependencies()

# Adding conda packages
conda_dep.add_conda_package("numpy>=1.16.0,<1.19.0")
conda_dep.add_conda_package("joblib==0.14.1")
conda_dep.add_conda_package("pandas==0.25.1")
conda_dep.add_conda_package("scikit-learn==0.22.1")
conda_dep.add_conda_package("py-xgboost<=0.90")
conda_dep.add_conda_package("fbprophet==0.5")
conda_dep.add_conda_package("holidays==0.9.11")
conda_dep.add_conda_package("psutil>=5.2.2,<6.0.0")
conda_dep.add_conda_package("pip")

# Adding pip packages
conda_dep.add_pip_package("azureml-train-automl-runtime==1.35.1")
conda_dep.add_pip_package("inference-schema")
conda_dep.add_pip_package("azureml-interpret==1.35.0")
conda_dep.add_pip_package("azureml-defaults==1.35.0")
conda_dep.add_pip_package("azureml-interpret==1.35.0")

Env.python.conda_dependencies=conda_dep

# Register the environment to your workspace
RegisteredEnvironment = Env.register(workspace=workspace)

In [13]:
run_config = RunConfiguration()
run_config.environment = Env
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [14]:
scoring_step = PythonScriptStep(name='OJ-scoring-step',script_name='Forecasting_Script.py',source_directory='Batch_Inferencing_Scripts', 
arguments=[],
inputs=[],
compute_target=compute_target,
runconfig=run_config,
allow_reuse=False)

In [15]:
step_sequence = StepSequence(steps=[scoring_step])
pipeline = Pipeline(workspace=workspace, steps=step_sequence)

In [16]:
pipeline_experiment = Experiment(workspace, 'OJ-Inferencing-Pipeline-Run')
pipeline_run = pipeline_experiment.submit(pipeline, show_output=True)
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Created step OJ-scoring-step [dc6675f0][ea56065b-d4f2-4bc4-a82f-a1845a26f254], (This step will run and generate new outputs)
Submitted PipelineRun 6edb7345-e425-4db0-8d22-0bad2b7349b3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/6edb7345-e425-4db0-8d22-0bad2b7349b3?wsid=/subscriptions/497d1b53-aa2e-4f5e-a21d-20ab61cff740/resourcegroups/rgforecasting/workspaces/wsforecasting&tid=e7e6256a-7f45-4c36-b602-d90f8b7e3a92


In [17]:
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 6edb7345-e425-4db0-8d22-0bad2b7349b3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/6edb7345-e425-4db0-8d22-0bad2b7349b3?wsid=/subscriptions/497d1b53-aa2e-4f5e-a21d-20ab61cff740/resourcegroups/rgforecasting/workspaces/wsforecasting&tid=e7e6256a-7f45-4c36-b602-d90f8b7e3a92
PipelineRun Status: Running


StepRunId: adb3bf3e-7297-4932-83a6-2a2044efafa5
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/adb3bf3e-7297-4932-83a6-2a2044efafa5?wsid=/subscriptions/497d1b53-aa2e-4f5e-a21d-20ab61cff740/resourcegroups/rgforecasting/workspaces/wsforecasting&tid=e7e6256a-7f45-4c36-b602-d90f8b7e3a92
StepRun( OJ-scoring-step ) Status: Running

StepRun(OJ-scoring-step) Execution Summary
StepRun( OJ-scoring-step ) Status: Finished

This run might be using a new job runtime with improved performance and error reporting. The logs from your script are in user_logs/std_log.txt. Please let us know if you run into any issues, and if you would like to opt-out, plea

'Finished'

In [18]:
published_pipeline = pipeline_run.publish_pipeline(name='OJ-Inferencing-Pipeline',description='Pipeline that Forecasts OJ Sales', version= '1.0')
published_pipeline

Name,Id,Status,Endpoint
OJ-Inferencing-Pipeline,3195a6f6-0617-4711-a0bf-2620a5c6bd12,Active,REST Endpoint
