In [1]:
from sklearn.linear_model import LogisticRegression
import argparse
import os
import numpy as np
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from azureml.core.run import Run
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Workspace, Dataset
from azureml.core import Workspace,ScriptRunConfig,Experiment, Run

**Dataset**

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
experiment_name = 'lv-hyperparameter'

exp=Experiment(ws, experiment_name)

In [3]:
run = exp.start_logging()

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.


   # Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

   # Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                              max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


**Hyperdrive Configuration**

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [5]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.dnn import TensorFlow
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, normal, choice
import os,shutil


# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.15, evaluation_interval=2)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling( {
        '--C': choice(0,0.25,0.5,1),
        '--max_iter': choice(500,1000,5000,10000)
    }
)

if "training" not in os.listdir():
    os.mkdir("./training")

script_folder = "./training"    
    
# Reference: lesson 6.3: copying the training file into the script folder
shutil.copy('./train.py', script_folder)
    
script_params={
    '--datastore-dir': ws.get_default_datastore().as_mount(),
}

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory='training', 
                     script_params=script_params,
                    compute_target=cpu_cluster,
                    entry_script='train.py',
                    pip_packages=['joblib']
                   )


hyperdrive_run_config = HyperDriveConfig(estimator = estimator, 
                                            hyperparameter_sampling = param_sampling, 
                                            policy = early_termination_policy,
                                            primary_metric_name = "Accuracy",
                                            primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs = 20,
                                            max_concurrent_runs = 4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
You have specified to install packages in your run. Note that you have overridden Azure ML's installation of the following packages: ['joblib']. We cannot guarantee image build will succeed.


**Run Details**

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the RunDetails widget to show the different experiments.

In [None]:
#TODO: Submit your experiment
hd_run = exp.submit(hyperdrive_run_config)
RunDetails(Run(exp, hd_run.id)).show()

**Best Model**

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
import joblib
# Get your best run and save the model from that run.

best_run = hd_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n learning rate:',parameter_values[3])
print('\n keep probability:',parameter_values[5])

**Connect to your workspace - only if reconnect is needed**

In [None]:
from azureml.core import Workspace
ws = Workspace.from_config(path=".file-path/ws_config.json")

**Save and register the model**

In [None]:
import joblib

joblib.dump(svm_model_linear, 'model.joblib')

In [None]:
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = run.register_model(model_name='sklearn-lv-hyper', 
                           model_path='outputs/model.joblib',
                           model_framework=Model.Framework.SCIKITLEARN,
                           model_framework_version='0.19.1',
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5))

**Writing run()**
run() is executed every time your model receives a scoring request, and expects the body of the request to be a JSON document with the following structure:

In [None]:
{
    "data": <model-specific-data-structure>
}

Load a registered scikit-learn model and score it with numpy data

In [None]:
import json
import numpy as np
import os
from sklearn.externals import joblib


def init():
    global model
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'sklearn_mnist_model.pkl')
    model = joblib.load(model_path)

def run(data):
    try:
        data = np.array(json.loads(data))
        result = model.predict(data)
        # You can return any data type, as long as it is JSON serializable.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

**Define an inference configuration**

In [None]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig


env = Environment.get(workspace, "AzureML-Minimal").clone(env_name)

for pip_package in ["scikit-learn"]:
    env.python.conda_dependencies.add_pip_package(pip_package)

inference_config = InferenceConfig(entry_script='path-to-score.py',
                                    environment=env)

**Define a deployment configuration**

In [None]:
from azureml.core.webservice import AciWebservice, AksWebservice, LocalWebservice

Azure Container Instances	deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

**Deployment**

In [None]:
from azureml.core.webservice import LocalWebservice, Webservice

deployment_config = AciWebservice.deploy_configuration(port=8890)
service = Model.deploy(ws, "myservice", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
service = Model.deploy(ws, "myservice", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)
print(service.scoring_uri)
print(service.swagger_uri)

You can use Webservice.list to retrieve a list of deployed web services for models in your workspace. You can add filters to narrow the list of information returned. For more information about what can be filtered on, see the Webservice.list reference documentation.

In [None]:
services = Webservice.list(ws)
print(services[0].scoring_uri)
print(services[0].swagger_uri)

If you know the name of the deployed service, you can create a new instance of Webservice, and provide the workspace and service name as parameters. The new object contains information about the deployed service.

In [None]:
service = Webservice(workspace=ws, name='myservice')
print(service.scoring_uri)
print(service.swagger_uri)

**Authentication for services**

In [None]:
primary, secondary = service.get_keys()
print(primary)

**Request data - KLB: update from Swagger docker page**

In [None]:
{
    "data":
        [
            <model-specific-data-structure>
        ]
}

**Call the service (Python)**

In [None]:
import requests
import json

# URL for the web service
scoring_uri = '<your web service URI>'
# If the service is authenticated, set the key or token
key = '<your key or token>'

# Two sets of data to score, so we get two results back
data = {"data":
        [
            [
                0.0199132141783263,
                0.0506801187398187,
                0.104808689473925,
                0.0700725447072635,
                -0.0359677812752396,
                -0.0266789028311707,
                -0.0249926566315915,
                -0.00259226199818282,
                0.00371173823343597,
                0.0403433716478807
            ],
            [
                -0.0127796318808497,
                -0.044641636506989,
                0.0606183944448076,
                0.0528581912385822,
                0.0479653430750293,
                0.0293746718291555,
                -0.0176293810234174,
                0.0343088588777263,
                0.0702112981933102,
                0.00720651632920303]
        ]
        }
# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

MissingSchema: ignored

**Web service schema (OpenAPI specification)**

In [None]:
{
    "swagger": "2.0",
    "info": {
        "title": "myservice",
        "description": "API specification for Azure Machine Learning myservice",
        "version": "1.0"
    },
    "schemes": [
        "https"
    ],
    "consumes": [
        "application/json"
    ],
    "produces": [
        "application/json"
    ],
    "securityDefinitions": {
        "Bearer": {
            "type": "apiKey",
            "name": "Authorization",
            "in": "header",
            "description": "For example: Bearer abc123"
        }
    },
    "paths": {
        "/": {
            "get": {
                "operationId": "ServiceHealthCheck",
                "description": "Simple health check endpoint to ensure the service is up at any given point.",
                "responses": {
                    "200": {
                        "description": "If service is up and running, this response will be returned with the content 'Healthy'",
                        "schema": {
                            "type": "string"
                        },
                        "examples": {
                            "application/json": "Healthy"
                        }
                    },
                    "default": {
                        "description": "The service failed to execute due to an error.",
                        "schema": {
                            "$ref": "#/definitions/ErrorResponse"
                        }
                    }
                }
            }
        },
        "/score": {
            "post": {
                "operationId": "RunMLService",
                "description": "Run web service's model and get the prediction output",
                "security": [
                    {
                        "Bearer": []
                    }
                ],
                "parameters": [
                    {
                        "name": "serviceInputPayload",
                        "in": "body",
                        "description": "The input payload for executing the real-time machine learning service.",
                        "schema": {
                            "$ref": "#/definitions/ServiceInput"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "The service processed the input correctly and provided a result prediction, if applicable.",
                        "schema": {
                            "$ref": "#/definitions/ServiceOutput"
                        }
                    },
                    "default": {
                        "description": "The service failed to execute due to an error.",
                        "schema": {
                            "$ref": "#/definitions/ErrorResponse"
                        }
                    }
                }
            }
        }
    },
    "definitions": {
        "ServiceInput": {
            "type": "object",
            "properties": {
                "data": {
                    "type": "array",
                    "items": {
                        "type": "array",
                        "items": {
                            "type": "integer",
                            "format": "int64"
                        }
                    }
                }
            },
            "example": {
                "data": [
                    [ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ]
                ]
            }
        },
        "ServiceOutput": {
            "type": "array",
            "items": {
                "type": "number",
                "format": "double"
            },
            "example": [
                3726.995
            ]
        },
        "ErrorResponse": {
            "type": "object",
            "properties": {
                "status_code": {
                    "type": "integer",
                    "format": "int32"
                },
                "message": {
                    "type": "string"
                }
            }
        }
    }
}

## Delete resources
TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
service.delete()
model.delete()