In [1]:
from azureml.core import Workspace
from azureml.core import Experiment, Environment
from azureml.pipeline.core import Pipeline
from azureml.data.data_reference import DataReference
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.runconfig import RunConfiguration
from modules.ingestion.data_ingestion_step import data_ingestion_step
from modules.preprocess.data_preprocess_step import data_preprocess_step
from modules.train.train_step import train_step
from modules.evaluate.evaluate_step import evaluate_step
from modules.explainer.explainer_step import explainer_step
from modules.deploy.deployTest_step import deployTest_step
from modules.deploy.deployProd_step import deployProd_step

import json

In [2]:
ws = Workspace.from_config()

datastore = ws.get_default_datastore()

datastore = DataReference(datastore, mode='mount')

In [3]:
# Create CPU compute target
print('Creating CPU compute target ...')
cpu_cluster_name = 'cpuclst'
cpu_compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', 
                                                           idle_seconds_before_scaledown=1200,
                                                           min_nodes=0, 
                                                           max_nodes=2)
cpu_compute_target = ComputeTarget.create(ws, cpu_cluster_name, cpu_compute_config)
cpu_compute_target.wait_for_completion(show_output=True)


aml_run_config = RunConfiguration()
aml_run_config.target = cpu_compute_target
curated_environment = Environment.get(workspace=ws,name="sklearn_20")# name="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")
aml_run_config.environment = curated_environment

Creating CPU compute target ...
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Step 1: Data ingestion 
data_ingestion_step, data_ingestion_outputs = data_ingestion_step(datastore, cpu_compute_target,aml_run_config)

# Step 2: Data preprocessing 
data_preprocess_step, data_preprocess_outputs = data_preprocess_step(data_ingestion_outputs['raw_data_dir'], cpu_compute_target,aml_run_config)

# Step 3: Train Model
train_step, train_outputs = train_step(data_preprocess_outputs['train_dir'], cpu_compute_target,aml_run_config)

# Step 4: Evaluate Model
evaluate_step, evaluate_outputs = evaluate_step(train_outputs['model_dir'], data_preprocess_outputs['test_dir'], cpu_compute_target,aml_run_config)

# Step 5: Evaluate Model
explainer_step = explainer_step(train_outputs['model_dir'], data_preprocess_outputs['test_dir'],data_preprocess_outputs['train_dir'], cpu_compute_target,aml_run_config)

# Step 6: Deploy Model Test
deploy_test_step, deploytest_outputs = deployTest_step(train_outputs['model_dir'],evaluate_outputs['accuracy_file'], data_preprocess_outputs['test_dir'], cpu_compute_target,aml_run_config)

# Step 7: Deploy Model Prod
deploy_prod_step, deployprod_outputs = deployProd_step( evaluate_outputs['accuracy_file'], data_preprocess_outputs['test_dir'],cpu_compute_target,aml_run_config)


In [5]:
# Submit pipeline
print('Submitting pipeline ...')
pipeline_parameters = {
    'max_depth': 5,
    'n_estimators': 500,
    'prod_deploy': 0,
    'endpoint_traffic_pct': 10,
    'set_endpoint_as_default': 0
}


# Submit pipeline
print('Submitting pipeline ...')

pipeline = Pipeline(workspace=ws, steps=[data_ingestion_step,data_preprocess_step,train_step,evaluate_step,explainer_step, deploy_test_step,deploy_prod_step])
pipeline_run = Experiment(ws, 'turbofan-pipeline').submit(pipeline, pipeline_parameters=pipeline_parameters)

Submitting pipeline ...
Submitting pipeline ...
Created step data_ingestion.py [cb3f1d5b][64b2bda7-b632-47c8-ae9d-eef91e2d5032], (This step is eligible to reuse a previous run's output)Created step data_preprocess.py [b2e6732e][48f29293-49e6-4141-b4ef-ca81a0ebb39b], (This step is eligible to reuse a previous run's output)

Created step train_data [509211e1][4a428d6f-fb97-45a9-b6b5-1ead4f7d3e91], (This step will run and generate new outputs)
Created step eval [1d66f396][4f677dd5-9943-4d5e-9152-989b4ca9ba5c], (This step will run and generate new outputs)
Created step explainer [1bf8e683][348585ab-da5d-4edf-9787-3dc1604be091], (This step will run and generate new outputs)
Created step deployTest.py [38be7238][591b8188-c0c0-4e66-931e-1428dc0eadd1], (This step will run and generate new outputs)Created step deployProd.py [d18eeaba][6390bfa6-3717-4dfe-b518-e21624293152], (This step will run and generate new outputs)

Using data reference workspaceblobstore for StepId [4bdaf31f][b870ed78-17e6-