In [2]:
import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)


Azure ML SDK Version:  1.3.0


In [None]:
!pip install --upgrade azureml-sdk

## Initialize workspace

In [3]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()

print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t')


sandboxaml	westeurope	rg-sandbox	westeurope


## Create an experiment

In [4]:
experiment_name = 'diabetes_exp'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

exp


Name,Workspace,Report Page,Docs Page
diabetes_exp,sandboxaml,Link to Azure Machine Learning studio,Link to Documentation


## Upload dataset

In [None]:
from azureml.core import Dataset

diabetes_dataset = Dataset.get_by_name(ws, name='diabetes')


In [None]:
diabetes = diabetes_dataset.to_pandas_dataframe().drop("Path", axis=1)

In [None]:
diabetes.head()

In [None]:
diabetes.info()

In [None]:
diabetes.describe()

In [None]:
features_names = ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
target = "Y"

## Create a (remote) target compute

In [None]:
# Compute target creation

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cpu_cluster_name = "myComputeCluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print(" Cluster already exists")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           min_nodes=0, max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True, min_node_count=0, timeout_in_minutes=30)

In [5]:
# Retrieve existing compute target

from azureml.core.compute import ComputeTarget


compute_target_name = "computetarget"
compute_target = ComputeTarget(workspace=ws, name=compute_target_name)

print(compute_target.provisioning_state)

Succeeded


## Train the model

### Train the model on a remote target compute

In [6]:
import os
script_folder = os.path.join(os.getcwd(), "remote_train")
os.makedirs(script_folder, exist_ok=True)

In [7]:
%%writefile $script_folder/train.py

import argparse
import os
import numpy as np
import glob

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.externals import joblib
import pickle

from azureml.core import Run
from azureml.core import Dataset
from utils import load_data

from azureml.core.authentication import ServicePrincipalAuthentication
sp = ServicePrincipalAuthentication(tenant_id=os.environ.get('AML_TENANT_ID'),
                               service_principal_id=os.environ.get('AML_PRINCIPAL_ID'),
                                    service_principal_password=os.environ.get('AML_PRINCIPAL_PASS'))

from azureml.core import Workspace
ws = Workspace.get(name="sandboxaml",
                   resource_group="rg-sandbox",
                   subscription_id=os.environ.get('SUBSCRIPTION_ID'),
                   auth=sp)
ws.get_details()
print("Found workspace {} at location {}".format(ws.name, ws.location))

parser = argparse.ArgumentParser()
parser.add_argument('--regularization', type=float, dest='reg', default=0.5, help='regularization strength')
args = parser.parse_args()

# load train and test set into numpy arrays
diabetes_dataset = Dataset.get_by_name(ws, name='diabetes')
diabetes = diabetes_dataset.to_pandas_dataframe().drop("Path", axis=1)
target = "Y"
X = diabetes.drop(target, axis=1)
y = diabetes["Y"].values.reshape(-1,1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# get hold of the current run
run = Run.get_context()

print('Train a Ridge regression model with regularization strength of', args.reg)
model = Ridge(alpha=args.reg, solver="auto", random_state=42)
model.fit(X_train, y_train)

print('Predict the test set')
y_hat = model.predict(X_test)

# calculate score on the prediction
score = model.score(X_test, y_test)
print('Score is ', score)

run.log('regularization strength', np.float(args.reg))
run.log('score', np.float(score))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=model, filename='outputs/diabetes_reg_remote_model.pkl')

Overwriting /mnt/batch/tasks/shared/LS_root/mounts/clusters/sandboxvm/code/users/paul.peton/Diabetes/remote_train/train.py


In [8]:
import shutil
shutil.copy('utils.py', script_folder)

'/mnt/batch/tasks/shared/LS_root/mounts/clusters/sandboxvm/code/users/paul.peton/Diabetes/remote_train/utils.py'

In [9]:
# Set up the (compute target) environnement

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies


env = Environment("diabetes_remote_env")

env.docker.enabled = True
env.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn',
                                                                          'pandas',
                                                                          'numpy',
                                                                          'joblib',                                                                        
                                                                          'matplotlib'
                                                                         ])
env.python.conda_dependencies.add_pip_package("inference-schema[numpy-support]")

env.python.conda_dependencies.save_to_file(".", "diabetes_env.yml")


'diabetes_env.yml'

In [10]:
from azureml.train.estimator import Estimator

script_params = {
    '--regularization': 0.5
}

est = Estimator(source_directory=script_folder,
              script_params=script_params,
              compute_target=compute_target,
              environment_definition=env,
              entry_script='train.py')


In [11]:
run = exp.submit(config=est)
run


Experiment,Id,Type,Status,Details Page,Docs Page
diabetes_exp,diabetes_exp_1587026998_95b594bf,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [14]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [13]:
# specify show_output to True for a verbose log
run.wait_for_completion(show_output=True) 

RunId: diabetes_exp_1587026998_95b594bf
Web View: https://ml.azure.com/experiments/diabetes_exp/runs/diabetes_exp_1587026998_95b594bf?wsid=/subscriptions/f80606e5-788f-4dc3-a9ea-2eb9a7836082/resourcegroups/rg-sandbox/workspaces/sandboxaml

Streaming azureml-logs/55_azureml-execution-tvmps_37307db4050ca100decc06bd9d7baaa34aafcf4da72d05157312e3eecaf93a14_d.txt

2020-04-16T08:50:40Z Starting output-watcher...
2020-04-16T08:50:40Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_2f4cbc791d647ddd7e603887e2a699aa
Digest: sha256:2f02f5d6b21d39f5d8040115b4d4416ebb59e7ba57f2643d5f196a981e372950
Status: Image is up to date for sandboxamlfbab5158.azurecr.io/azureml/azureml_2f4cbc791d647ddd7e603887e2a699aa:latest
23595498e3b2da31236a2931f9eeccf21f1564e94e14639d8b52ccde642e1be4
2020/04/16 08:50:44 Version: 3.0.01172.0001 Branch: master Commit: d33e301a
2020/04/16 08:50:44 /dev/infiniband/uverbs0 found (imply

{'runId': 'diabetes_exp_1587026998_95b594bf',
 'target': 'computetarget',
 'status': 'Completed',
 'startTimeUtc': '2020-04-16T08:50:52.482411Z',
 'endTimeUtc': '2020-04-16T08:54:08.491261Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'afad5905-5f98-4789-856b-0dbef6ba6963',
  'AzureML.DerivedImageName': 'azureml/azureml_2f4cbc791d647ddd7e603887e2a699aa',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': '77d39572-da67-4122-ade6-78643deaa470'}, 'consumptionDetails': {'type': 'Reference'}}],
 'runDefinition': {'script': 'train.py',
  'useAbsolutePath': False,
  'arguments': ['--regularization', '0.5'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'computetarget',
  'dataReferences': {},
  'data': {},
  'jobName': None,
  'maxRunDurationSeconds': None,
  'nodeCount': 1,
  'environment': {'name':

## Register the model

In [None]:
print(run.get_file_names())

In [None]:
# register model 
model = run.register_model(model_name='diabetes_ridge_regression', model_path='outputs/diabetes_reg_remote_model.pkl')

print(model.name, model.id, model.version, sep='\t')

## Create a scoring script

In [None]:
%%writefile score.py

import joblib
import numpy as np
import os

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType


# The init() method is called once, when the web service starts up.
#
# Typically you would deserialize the model file, as shown here using joblib,
# and store it in a global variable so your run() method can access it later.
def init():
    global model

    # The AZUREML_MODEL_DIR environment variable indicates
    # a directory containing the model file you registered.
    model_filename = 'diabetes_regression_model.pkl'
    model_path = os.path.join(os.environ['AZUREML_MODEL_DIR'], model_filename)

    model = joblib.load(model_path)


# The run() method is called each time a request is made to the scoring API.
#
# Shown here are the optional input_schema and output_schema decorators
# from the inference-schema pip package. Using these decorators on your
# run() method parses and validates the incoming payload against
# the example input you provide here. This will also generate a Swagger
# API document for your web service.
@input_schema('data', NumpyParameterType(np.array([[59, 2, 32.1, 101.0, 157, 93.2, 38.0, 4.0, 4.8598, 87]])))
@output_schema(NumpyParameterType(np.array([151.000])))
def run(data):
    # Use the model object loaded by init().
    result = model.predict(data)

    # You can return any JSON-serializable object.
    return result.tolist()

## Define the (inference) environement

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies


environment = Environment('my-sklearn-environment')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'scikit-learn'
])


## Define a inference configuration

In [None]:
from azureml.core.model import InferenceConfig


inference_config = InferenceConfig(entry_script='score.py', environment=environment)

## Deploy in a custom environment

In [None]:
from azureml.core import Webservice
from azureml.core.webservice import AciWebservice
from azureml.exceptions import WebserviceException


service_name = 'diabetes-custom-service'

# Remove any existing service under the same name.
try:
    Webservice(ws, service_name).delete()
except WebserviceException:
    pass

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config)

service.wait_for_deployment(show_output=True)


In [None]:
# reload the model

from azureml.core import Webservice


service_name = 'diabetes-remoteTrain-service'
service = Webservice(ws, service_name)

print(service.name, service.scoring_uri, sep='\t')


## Test the service

In [None]:
import json

input_payload = json.dumps({
    'data': [
        [59, 2, 32.1, 101.0, 157, 93.2, 38.0, 4.0, 4.8598, 87],
        [69, 2, 32.1, 101.0, 157, 93.2, 38.0, 4.0, 4.8598, 87]
    ],
    'method': 'predict'  # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
})

output = service.run(input_payload)

print(output)


In [None]:
service.delete()