## Setup

In [1]:
!az login

[33mTo sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code FH2LVFFLY to authenticate.[0m
[
  {
    "cloudName": "AzureCloud",
    "id": "63fc0c29-abe6-4170-b3bf-1cf85656b247",
    "isDefault": true,
    "name": "Visual Studio Enterprise",
    "state": "Enabled",
    "tenantId": "4e9843a2-758e-4a3b-8628-2fc4141091af",
    "user": {
      "name": "jmangia@me.com",
      "type": "user"
    }
  }
]


In [2]:
from azureml.core import Workspace, Experiment, Run, Datastore
ws = Workspace.from_config()

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Found the config file in: /home/nbuser/library/config.json


In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('Error getting Compute target...')


found compute target. just use it. cpucluster


## Connect to Blob datastores

In [4]:
#list all datastores registered in current workspace
datastores = ws.datastores
for name, ds in datastores.items():
    print(name, ds.datastore_type)
    
#def_data_store = ws.get_default_datastore() 
def_data_store = Datastore(ws, "challenge1")
def_data_store

workspaceblobstore AzureBlob
workspacefilestore AzureFile
challenge1 AzureBlob


<azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x7f569b7842e8>

In [5]:
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import PipelineData

input_data_reference = DataReference(
    datastore=def_data_store,
    data_reference_name="test_data",
    path_on_datastore="microsoft-malware-prediction/test.csv")

output_data = PipelineData(
    "output_data1",
    datastore=def_data_store,
    output_name="output_data1")

## Create the Model (fake)

In [7]:
from azureml.core import Workspace, Experiment, Run
import math, random, pickle

experiment = Experiment(workspace = ws, name = "model_creation")
run = experiment.start_logging()

pi_counter = 0
n_iter = 100000

# Log total number of iterations
run.log("Number of iterations",n_iter)

for i in range(1,n_iter):
    # Monte Carlo step to update estimate
    x = random.random()
    y = random.random()
    if x*x + y*y < 1.0:
        pi_counter += 1
    pi_estimate = 4.0*pi_counter / i
    
    # Log convergence every 10000 iterations
    if i%10000==0:
        error = math.pi-pi_estimate
        run.log("Pi estimate",pi_estimate)
        run.log("Error",error)

# Log final results
run.log("Final estimate",pi_estimate)
run.log("Final error",math.pi-pi_estimate)

# Write file containing pi value into run history
with open("pi_estimate.txt","wb") as f:
    pickle.dump(str(pi_estimate),f)
run.upload_file(name = 'outputs/pi_estimate.txt', path_or_stream = './pi_estimate.txt')

# Complete tracking and get link to details
run.complete()
print("Run completed")

Run completed


In [8]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
model_creation,6b9dd51b-f763-431d-990b-6f78f9f1ada5,,Completed,Link to Azure Portal,Link to Documentation


In [11]:
model_name = "fake_model"
model = run.register_model(model_name = model_name, model_path = "outputs/pi_estimate.txt")

## Batch Scoring

In [28]:
%%writefile batch_score.py

import argparse
import os
from azureml.core import Run
from azureml.core.model import Model
import pickle, json

def main():
    run = Run.get_context()
    run.log("start batch score", 1)

    print("In batch_score.py")

    parser = argparse.ArgumentParser("scoring")

    parser.add_argument('--model_name', dest="model_name", required=True)
    parser.add_argument("--input", type=str, help="input data", required=True)
    parser.add_argument("--output", type=str, help="output data", required=True)
    parser.add_argument("--param1", type=str, help="param 1")


    args = parser.parse_args()

    print("Model: %s" % args.model_name)
    print("Input: %s" % args.input)
    print("Output: %s" % args.output)
    print("Param1: %s" % args.param1)

    run.log("start batch score", 2)

    
    global pi_estimate
    model_path = Model.get_model_path(model_name = args.model_name)
    with open(model_path, "rb") as f:
        pi_estimate = float(pickle.load(f))

    radius = 3
    area = pi_estimate * radius**2

    run.log("Radius: %s" % radius, 1)
    run.log("Area: %s" % area, 1)

    print("Radius: %s" % radius)
    print("Area: %s" % area)

    run.complete()


if __name__ == "__main__":
    main()

Overwriting batch_score.py


## Create Pipeline

In [29]:
from azureml.pipeline.steps.python_script_step import PythonScriptStep
from azureml.pipeline.core.graph import PipelineParameter

pipeline_param = PipelineParameter(
  name="pipeline_arg", 
  default_value=10)


firstStep = PythonScriptStep(
    script_name="batch_score.py",
    arguments=["--model_name", model_name,"--input", input_data_reference, "--output", output_data, "--param1", pipeline_param],
    inputs=[input_data_reference],
    outputs=[output_data],
    compute_target=compute_target,
    source_directory="."
)

In [30]:
from azureml.pipeline.core import Pipeline

scoring_pipeline = Pipeline(workspace=ws, steps=[firstStep])

## Test Pipeline

In [31]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, "Score_Pipeline_Experiment").submit(scoring_pipeline, show_output=True)
pipeline_run1.wait_for_completion()

Created step batch_score.py [c3d9d1ea][0b7eb841-f85b-47ff-9e51-06dcde03904b], (This step will run and generate new outputs)
Using data reference test_data for StepId [60111503][cfe7239c-0f2e-4945-943e-a824ce376816], (Consumers of this data are eligible to reuse prior runs.)
Submitted pipeline run: c977862a-c22e-4227-802e-65282ce6b42c
RunId: c977862a-c22e-4227-802e-65282ce6b42c
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/63fc0c29-abe6-4170-b3bf-1cf85656b247/resourceGroups/MLHack/providers/Microsoft.MachineLearningServices/workspaces/challenge1/experiments/Score_Pipeline_Experiment/runs/c977862a-c22e-4227-802e-65282ce6b42c
Status: Running
.......
Status: Finished
{'runId': 'c977862a-c22e-4227-802e-65282ce6b42c', 'status': 'Completed', 'startTimeUtc': '2019-04-22T22:14:12.736251Z', 'endTimeUtc': '2019-04-22T22:14:59.878873Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': None, 'runType': 'HTTP', 'azureml.parameters': '{"pipeline_arg":"10"}'}

'Finished'

## Publish Pipeline

In [32]:
pipeline1.publish(name="Scoring Pipeline")

Created step batch_score.py [a0ed9780][0b7eb841-f85b-47ff-9e51-06dcde03904b], (This step is eligible to reuse a previous run's output)
Using data reference test_data for StepId [3a5b31ce][cfe7239c-0f2e-4945-943e-a824ce376816], (Consumers of this data are eligible to reuse prior runs.)


Name,Id,Status,Endpoint
Scoring Pipeline,ab90bb13-18bd-44f3-ad96-3173ef594081,Active,REST Endpoint


## Schedule Pipeline