# Solution for 2nd Project for Azure ML (Udacity Course 2)
This notebook delivers the solution for the Project Operationalizing Machine Learning from the Udacity Course "Machine Learning Engineer with Microsoft Azure Nanoderee Program"

Author: Aléaume COUSSEAU / aliaume64@gmail.com

## Retrieving current Workspace

In [None]:
from azureml.core import Workspace, Experiment, Dataset

#ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()  # using the current workspace (Lab)  SOURCE: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py
exp = Experiment(workspace=ws, name="udacity-project_2_Cousseau")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

## Creating a compute Cluster

If compute cluster already exists, it will just identify it and end.

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_DS12_V2" in your provisioning configuration.
# min_nodes should be 1

cluster_name = "myCluster"
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print("Cluster already created")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_DS12_V2",min_nodes=1, max_nodes=6)
    cluster = ComputeTarget.create(ws,cluster_name, compute_config) #creates the actual cluster

cluster.wait_for_completion(show_output=True)  #Allows to continus on other threads while cluster is being created


#SOURCE / HELP: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.amlcompute.amlcompute?view=azure-ml-py

## Create Dataset
Here we create a tabular dataset from the url provided and specify among others that the first line are headers.

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.data.dataset_type_definitions import PromoteHeadersBehavior

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

dataPath = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

#dataset = Dataset.Tabular.from_delimited_files(path=dataPath,validate=True, include_path=False,header=True, support_multi_line=False)

dataset = Dataset.Tabular.from_delimited_files(path=dataPath,validate=True, include_path=False,header=PromoteHeadersBehavior.ONLY_FIRST_FILE_HAS_HEADERS, support_multi_line=False)

#Enum with ONLY_FIRST_FILE_HAS_HEADERS https://github.com/Azure/MachineLearningNotebooks/issues/629 
#https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.dataset_type_definitions.promoteheadersbehavior?view=azure-ml-py

#Register dataset

datasetBankMarketing = dataset.register(workspace=ws, name='datasetBankMarketing', description='bank marketing training data')

#Display the first 10 rows for check

dataset.take(10).to_pandas_dataframe()

## Create & Run New AutoML Experiment

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig with
# Classification
# ensure Explain best model is checked
# Exit criteria = 1 hour
# reduce concurency to 5 ???
#

automl_config = AutoMLConfig(
    experiment_timeout_minutes=60,
    task='classification',
    primary_metric='accuracy',
    training_data= dataset,
    validation_size = 0.33,
    label_column_name='y',
    compute_target = cluster,
    enable_early_stopping= True,
    model_explainability = True,
    max_concurrent_iterations = 5
    )

#SOURCE / HELP: https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig?view=azure-ml-py

#  model_explainability: bool = True ???

#validation_size, parameter to hold out a portion of the training data for validation >> here 33% as in the train.py
# otherwise replace with n_cross_validations=5
#SOURCE / HELP : https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-cross-validation-data-splits

In [None]:
# Submit your automl run

from azureml.widgets import RunDetails

runAutoML = exp.submit(automl_config)

RunDetails(runAutoML).show()

In [None]:
# Retrieve and display best run

#bestRunAutoML = runAutoML.get_best_child()

#RunDetails(bestRunAutoML).show()

best_run, fitted_model = runAutoML.get_output()
print(best_run)
print(fitted_model)

In [None]:
# Register run as Model

model_name = "AleaumeModel"
description = "Best AutoML Model"
#model = best_run.register_model(model_name = model_name, description = description) #One direction to investigate on how to register and have a swagger.json... so faar not successful

model = runAutoML.register_model(model_name = model_name, description = description)

#SOURCE / HELP: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python

#Unfortunately so far unsuccessful to register the model via SDK and get a default generated swagger.json uri. Also,  MSFT support recommends to do so via AML studio...

# Step 3: Deploy the Best Model

-Deploy the model and enable "Authentication"
-Deploy the model using Azure Container Instance (ACI)

In [None]:
#Define inference configuration

#score.py needs to be located in the same directory as this notebook. Otherwise update the source_directory variable

from azureml.core import Environment
from azureml.core.model import InferenceConfig

env = Environment(name="Project 2 Udacity")
my_inference_config = InferenceConfig(
    environment=env,
    source_directory="./",
    entry_script="./score.py",
)

In [None]:
from azureml.core.webservice import LocalWebservice

deployment_config = LocalWebservice.deploy_configuration(port=9001)

# Deploy the service locally

service = model.deploy(ws, "local-service", [model], my_inference_config, deployment_config)
service.reload()
print(service.get_logs())


#SOURCE / HELP https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python

In [None]:
#Call model to test 
import requests
import json

uri = service.scoring_uri
requests.get("http://localhost:9001")
headers = {"Content-Type": "application/json"}
data = {
    "query": "This is a test query"
}
data = json.dumps(data)
response = requests.post(uri, data=data, headers=headers)
print(response.json())

In [None]:
#Deploy to ACI

from azureml.core.webservice import AciWebservice

deployment_config = AciWebservice.deploy_configuration(
    cpu_cores=0.5, memory_gb=1, auth_enabled=True
)

service = model.deploy(
    ws,
    "myservice",
    [model],
    my_inference_config,
    deployment_config,
    overwrite=True,
)
service.wait_for_deployment(show_output=True)

print(service.get_logs())

#works but does not provide a swagger URI (see registering model step comments)

#SOURCE / HELP : https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python

# Step 4: Enable Logging

- ensure az is installed, as well as the Python SDK for Azure
- Create a new virtual environment with Python3
- Write and run code to enable Application Insights
- run logs.py to visualize logs

In [None]:
# enable application insight

service.update(enable_app_insights=True)

# OTHERWISE FROM SCRATCH

#from azureml.core.webservice import Webservice
# Requires the config to be downloaded first to the current working directory (config.json)
#ws = Workspace.from_config()
# Set with the deployment name
#name = "name_OF_Service"
# load existing web service
#service = Webservice(name=name, workspace=ws)
# enable application insight
#service.update(enable_app_insights=True)