## Track Azure Databricks run using MLflow in Azure Machine Learning

This is an end to end example on how to train azure databrick run using MLflow in Azure ML and deploy the model to an endpoint in Azure ML for inference

   <br />


### Connect to Azure ML workspace

In [2]:
import mlflow
import azureml.mlflow
import azureml.core

from azureml.core import Workspace

subscription_id = ''

# Azure Machine Learning resource group 
resource_group = '' 

#Azure Machine Learning workspace name
workspace_name = ''

# Instantiate Azure Machine Learning workspace
ws = Workspace.get(name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group)
ws

## Configure MLFlow tracking URL

In [6]:
uri = ws.get_mlflow_tracking_uri()
mlflow.set_tracking_uri(uri)
print(uri)

## Train classifier on diabetes dataset

In [7]:
# Import the dataset from scikit-learn and create the training and test datasets. 
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

db = load_diabetes()
X = db.data
y = db.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [8]:
import joblib
import mlflow
import azureml.mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

experiment_name = "diabetes_mlflow_exp" 
mlflow.set_experiment(experiment_name)
model_save_path = "model"

with mlflow.start_run():
  n_estimators = 100
  max_depth = 6
  max_features = 3
  # Create and train model
  rf = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)
  rf.fit(X_train, y_train)
  # Make predictions
  predictions = rf.predict(X_test)
  
  # Log parameters
  mlflow.log_param("num_trees", n_estimators)
  mlflow.log_param("maxdepth", max_depth)
  mlflow.log_param("max_feat", max_features)
  
  # Log model
  mlflow.sklearn.log_model(rf, "random-forest-model")
  
  # Create metrics
  mse = mean_squared_error(y_test, predictions)
    
  # Log metrics
  mlflow.log_metric("mse", mse)
  
  
  # Save the model to the outputs directory for capture
  mlflow.sklearn.log_model(rf, model_save_path)
 

## Deploy the model for inference

Here we Deploy web service to Azure Container instance, it can be easily modified such that it can be deployed to AKS cluster

For more details https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=azcli#choose-a-compute-target

In [12]:
# Retreive Azure ML run ID
exp = ws.experiments[experiment_name]
runs = list(exp.get_runs())
print(runs)

runid = runs[0].id

In [14]:
from azureml.core.webservice import Webservice, AciWebservice
import mlflow.azureml
# Set configuration and service name
prod_webservice_name = "model-prod"
prod_webservice_deployment_config = AciWebservice.deploy_configuration()

web_service, azure_model = mlflow.azureml.deploy(model_uri="runs:/{}/{}".format(runid, model_save_path),
                                                   service_name=prod_webservice_name,
                                                   deployment_config = prod_webservice_deployment_config,
                                                   workspace=ws,
                                                   synchronous=True)


## Invoke Webservice

In [16]:
test_rows = [
    [0.01991321,  0.05068012,  0.10480869,  0.07007254, -0.03596778,
     -0.0266789 , -0.02499266, -0.00259226,  0.00371174,  0.04034337],
    [-0.01277963, -0.04464164,  0.06061839,  0.05285819,  0.04796534,
     0.02937467, -0.01762938,  0.03430886,  0.0702113 ,  0.00720652],
    [ 0.03807591,  0.05068012,  0.00888341,  0.04252958, -0.04284755,
     -0.02104223, -0.03971921, -0.00259226, -0.01811827,  0.00720652]]

import json
import pandas as pd

test_rows_as_json = pd.DataFrame(test_rows).to_json(orient="split")

predictions = web_service.run(test_rows_as_json)
print(predictions)

In [17]:
#Delete webservice
web_service.delete()