In [1]:
import datetime
import azureml.core

from azureml.core import Workspace, Experiment, Datastore, Dataset 
from azureml.pipeline.core import Pipeline, PipelineData, PipelineRun
from azureml.pipeline.steps import PythonScriptStep
 
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
 
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
 
from azureml.core.model import Model

 
# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.36.0


In [2]:
experiment_name = 'ContentModelTraining'
datastore_name = 'mltrainingdata'

input_dataset_name = 'content_config'
input_data_name = 'content_config_json'


# Static path for content cinfig JSON in datastore
input_data_path = 'configs/content'

In [3]:
ws = Workspace.from_config()
datastore = Datastore.get(ws, datastore_name=datastore_name)

In [4]:
print(f'Workspace name: {ws.name}')
print(f'Workspacev resource group: {ws.resource_group}')
print(f'Datastore name: {datastore.name}')
print(f'container name: {datastore.container_name}')
print(f'account name: {datastore.account_name}')

Workspace name: aiml-workspace-dev
Workspacev resource group: analytics_mart_dev_rg
Datastore name: mltrainingdata
container name: ml-training-data
account name: analyticsmartdevadls


In [5]:
aml_compute_target = "demo-cluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Azure Machine Learning Compute attached")

found existing compute target.
Azure Machine Learning Compute attached


In [6]:
aml_run_config = RunConfiguration()
 
aml_run_config.target = aml_compute
aml_run_config.environment.docker.enabled = True
aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:latest"
 
aml_run_config.environment.python.user_managed_dependencies = False
 
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn','numpy'], 
    pip_packages=['joblib','azureml-sdk','fusepy', 'nltk'], 
    pin_sdk_version=False)

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [7]:
dataset = Dataset.File.from_files(datastore.path(input_data_path))
dataset = dataset.register(ws, input_dataset_name, create_new_version=True)
content_config = dataset.as_named_input(input_data_name)

model_registration_flag = PipelineData("model_registration_flag", datastore=datastore).as_dataset()

# see a list of files referenced by dataset
dataset.to_path()

['/content_config.json']

In [8]:
source_directory="./content"

step1 = PythonScriptStep(name="train",
                         script_name="./content_model_training.py", 
                         arguments=["--model_registration_flag", model_registration_flag],
                         inputs=[content_config],
                         outputs=[model_registration_flag],
                         compute_target=aml_compute, 
                         runconfig=aml_run_config,
                         source_directory=source_directory,
                         allow_reuse=False)

In [9]:
source_directory="./scoring"
step2 = PythonScriptStep(name="score",
                         script_name="./model_deployment.py", 
                         arguments=["--model_registration_flag", model_registration_flag],
                         inputs=[model_registration_flag],
                         compute_target=aml_compute, 
                         runconfig=aml_run_config,
                         source_directory=source_directory,
                         allow_reuse=False)

In [10]:
steps = [step1, step2]
# steps = [step1]

In [11]:
pipeline = Pipeline(workspace=ws, steps=steps)
pipeline_run = Experiment(ws, experiment_name).submit(pipeline, regenerate_outputs=True)

Created step train [318565d2][2f802c15-0bbd-48c3-b826-a586b50ca27a], (This step will run and generate new outputs)
Created step score [02170142][3bb5dc7e-3a17-410d-ac1e-c361e155b189], (This step will run and generate new outputs)
Submitted PipelineRun 00d49779-c7e9-4525-b623-df77bee2a9bc
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/00d49779-c7e9-4525-b623-df77bee2a9bc?wsid=/subscriptions/a00a8e0f-73c4-4480-8097-07bf63060185/resourcegroups/analytics_mart_dev_rg/workspaces/aiml-workspace-dev&tid=4deb802f-fdf2-4ebe-ae9b-8df74fb06435
