Install the Azure ML SDK on your Azure Databricks Cluster

In [2]:
import azureml.core
azureml.core.VERSION

Initialize Azure ML Workspace

In [4]:
#Provide the Subscription ID of your existing Azure subscription
subscription_id = "fc489c93-72d7-4073-8b24-e2f4ea9336f0"

#Provide a name for the new Resource Group that will contain Azure ML related services 
resource_group = "book"

# Provide a unique name (like "aml-bigdata-lab-SUFFIX") and region for the Azure Machine Learning Workspace that will be created
workspace_name = "sampleMLWorkspace"
workspace_region = "southeastasia"

Create an Azure ML Workspace

In [6]:
import azureml.core

# import the Workspace class and check the azureml SDK version
from azureml.core import Workspace

ws = Workspace.create(
    name = workspace_name,
    subscription_id = subscription_id,
    resource_group = resource_group, 
    location = workspace_region,
    exist_ok = True
)

print("Provisioning complete.")

Persist the Workspace configuration

In [8]:
import os
import shutil

ws = Workspace(
    workspace_name = workspace_name,
    subscription_id = subscription_id,
    resource_group = resource_group)

# persist the subscription id, resource group name, and workspace name in aml_config/config.json.
ws.write_config()

Take a look at the contents of the generated configuration file by running the following cell:

In [10]:
%sh
cat /databricks/driver/.azureml/config.json

Copy the config file to DBFS

In [12]:
#persist the config file to dbfs so that it can be used for the other notebooks.
aml_config_local = 'file:' + os.getcwd() + '/.azureml/'
aml_config_dbfs = '/dbfs/' + 'aml_config'

if os.path.isfile(aml_config_dbfs) or os.path.isdir(aml_config_dbfs):
    shutil.rmtree(aml_config_dbfs)

dbutils.fs.cp(aml_config_local, aml_config_dbfs, recurse=True)

Deploy model to Azure Container Instance (ACI)

In [14]:
import os

from pyspark.ml import PipelineModel

Copy the model from DBFS

In [16]:
##NOTE: service deployment always gets the model from the current working dir. 
model_name = "flightDelayModel"
model_path_dbfs = "/flightDelayModel/"#os.path.join("/dbfs/models", model_name)
model_path_local = "file:" + os.getcwd() + "/" + model_name + "/"

print("copy model from dbfs {} to local {}".format(model_path_dbfs, model_path_local))
dbutils.fs.cp(model_path_dbfs, model_path_local, recurse=True)

Register the model with Azure Machine Learning

In [18]:
import azureml.core
from azureml.core.workspace import Workspace

#get the config file from dbfs
dbutils.fs.cp(aml_config_dbfs, aml_config_local, recurse=True)

ws = Workspace.from_config()

In [19]:
#Register the model
from azureml.core.model import Model
mymodel = Model.register(model_path = model_name, # this points to a local file or folder in the current working dir
                       model_name = model_name, # this is the name the model is registered with                 
                       description = "Flight Delay Prediction Model",
                       workspace = ws)

print(mymodel.name, mymodel.description, mymodel.version)

Create the scoring web service

In [21]:
#%%writefile score_sparkml.py
score_sparkml = """

import json

def init():
    try:
        # One-time initialization of PySpark and predictive model
        import pyspark
        from pyspark.ml import PipelineModel
        from azureml.core.model import Model
        
        global trainedModel
        global spark
        
        spark = pyspark.sql.SparkSession.builder.appName("Scoring").getOrCreate()
      
        model_name = "flightDelayModel" 
        
        model_path = Model.get_model_path(model_name)

        trainedModel = PipelineModel.load(model_path)

    except Exception as e:
        print("Exception in init: " + str(e))
        trainedModel = e

def run(input_df):
    response = ''    

    if isinstance(trainedModel, Exception):
        return json.dumps({"Exception":trainedModel})

    try:
        print("received: " + input_df)
        
        sc = spark.sparkContext
      
        # Set inferSchema=true to prevent the float values from being seen as strings
        # which can later cause the VectorAssembler to throw an error: 'Data type StringType is not supported.'
        df = spark.read.option("inferSchema", "true").json(sc.parallelize([input_df]))
      
        #Get prediction results for the dataframe
        score = trainedModel.transform(df)
        predictions = score.collect()
        
        #Get each scored result (prediction and confidence)
        preds = [{"prediction":str(result['prediction']), "confidence":str(result['probability'])} for result in predictions]
        
        response = json.dumps(preds)
        
        print("response: " + str(response))
        
    except Exception as e:
        print("Exception in run: " + str(e))
        return (str(e))

    # Return results
    return response
    
"""

exec(score_sparkml)

with open("score_sparkml.py", "w") as file:
    file.write(score_sparkml)

Test the scoring script locally and confirm that it works as desired.

In [23]:
import json

# Create two records for testing the prediction
test_input1 = {"OriginAirportCode":"SAT","Month":5,"DayofMonth":5,"CRSDepHour":13,"DayOfWeek":7,"Carrier":"MQ","DestAirportCode":"ORD","WindSpeed":9,"SeaLevelPressure":30.03,"HourlyPrecip":0}

test_input2 = {"OriginAirportCode":"ATL","Month":2,"DayofMonth":5,"CRSDepHour":8,"DayOfWeek":4,"Carrier":"MQ","DestAirportCode":"MCO","WindSpeed":3,"SeaLevelPressure":31.03,"HourlyPrecip":0}

# test init() in local notebook# test  
init()

# package the inputs into a JSON string and test run() in local notebook
test_inputs = [test_input1, test_input2] 
json_str_test_inputs = json.dumps(test_inputs)
run(json_str_test_inputs)

Deployment

In [25]:
from azureml.core.webservice import AciWebservice, Webservice

aci_config = AciWebservice.deploy_configuration(
    cpu_cores = 1, 
    memory_gb = 1, 
    tags = {'name':'Flight Delay Prediction'}, 
    description = 'Predicts if a flight will be delayed by 15 minutes or more.')

Next, we will build an environment that will host our service

In [27]:
from azureml.core import Environment

environment = Environment.get(ws, name="AzureML-PySpark-MmlSpark-0.15")

In [28]:
driver_file = "score_sparkml.py"

from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script=driver_file, environment=environment)

In [29]:
service_name = "sparkmlservicedb001"

from azureml.core.webservice import AciWebservice, Webservice
from azureml.exceptions import WebserviceException

try:
    # if you want to get existing service below is the command
    # since aci name needs to be unique in subscription deleting existing aci if any
    # we use aci_service_name to create azure aci
    service = Webservice(ws, name=service_name)
    if service:
        service.delete()
except WebserviceException as e:
    print()

service = Model.deploy(ws, service_name, [mymodel], inference_config, aci_config)

service.wait_for_deployment(True)
print(service.state)



Test the deployed service

In [31]:
service.run(input_data = json_str_test_inputs)

Retrieve the web service URL

In [33]:
print(service.scoring_uri)