## Import models from Hugging Face hub
This sample shows how to import and register models from [HuggingFace hub](https://huggingface.co/models). 

### How does import work?
The import process runs as a job in your AzureML workspace using components from the `azureml` system registry. The models are downloaded and converted to mlflow packaged format. You can then register the models to your AzureML Workspace or Registry that makes them available for inference or fine tuning. 

### What models are supported for import?
Any model from Hugging Face hub can be downloaded using the `download_model` component. 

In [None]:

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, ClientSecretCredential
from azure.ai.ml.entities import AmlCompute
import time

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()

workspace_ml_client = MLClient(
        credential,
        #subscription_id =  "<SUBSCRIPTION_ID>",
        #resource_group_name =  "<RESOURCE_GROUP>",
        #workspace_name =  "WORKSPACE_NAME>"
        subscription_id =  "21d8f407-c4c4-452e-87a4-e609bfb86248", #"<SUBSCRIPTION_ID>"
        resource_group_name =  "rg-contoso-819prod", #"<RESOURCE_GROUP>",
        workspace_name =  "mlw-contoso-819prod", #"WORKSPACE_NAME>",
)

# the models, fine tuning pipelines and environments are available in the AzureML system registry, "azureml-preview"
registry_ml_client = MLClient(credential, registry_name="azureml-preview-test1")

experiment_name = "import-model"

# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'gpu-cluster-big'
compute_cluster = "gpu-cluster-big"
try:
    workspace_ml_client.compute.get(compute_cluster)
except Exception as ex:
    compute = AmlCompute(
        name = compute_cluster, 
        size= "Standard_ND40rs_v2",
        max_instances= 2 # For multi node training set this to an integer value more than 1
    )
    workspace_ml_client.compute.begin_create_or_update(compute).wait()

# This is the number of GPUs in a single node of the selected 'vm_size' compute. 
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
gpus_per_node = 2 

# genrating a unique timestamp that can be used for names and versions that need to be unique
timestamp = str(int(time.time())) 


In [None]:


from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import CommandComponent, PipelineComponent, Job, Component
from azure.ai.ml import PyTorchDistribution, Input

# fetch the download_model component from the system registry
download_model_func = registry_ml_client.components.get(name="download_model", version="0.0.2")
# fetch the mlflow converter component from the system registry
convert_model_to_mlflow_func = registry_ml_client.components.get(name="convert_model_to_mlflow", version="0.0.1")

# define the pipeline job
@pipeline()
def create_pipeline():

    download_model_job = download_model_func(
        model_id = "unitary/toxic-bert",
        model_source = "Huggingface"
    )
    convert_model_to_mlflow_job = convert_model_to_mlflow_func(
        model_download_details = download_model_job.outputs.model_info,
        model_path = download_model_job.outputs.model_output,
        #mlflow_flavor = "hftransformers",
        task_type = "text-classification"
    )
    return {
        "imported_model": convert_model_to_mlflow_job.outputs.mlflow_model_folder
    }

pipeline_object = create_pipeline()

# don't use cached results from previous jobs
pipeline_object.settings.force_rerun = True
pipeline_object.settings.default_compute  = compute_cluster

In [None]:
# submit the pipeline job
pipeline_job = workspace_ml_client.jobs.create_or_update(pipeline_object, experiment_name=experiment_name)
# wait for the pipeline job to complete
workspace_ml_client.jobs.stream(pipeline_job.name)

In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes
# check if the `trained_model` output is available
print ("pipeline job outputs: ", workspace_ml_client.jobs.get(pipeline_job.name).outputs)

#fetch the model from pipeline job output - not working, hence fetching from fine tune child job
model_path_from_job = ("azureml://jobs/{0}/outputs/{1}".format(pipeline_job.name, "imported_model"))

imported_model_name = "unitary-toxic-bert"

print("path to register model: ", model_path_from_job)
prepare_to_register_model = Model(
    path=model_path_from_job,
    type=AssetTypes.MLFLOW_MODEL,
    name=imported_model_name,
    version=timestamp, # use timestamp as version to avoid version conflict
    description=imported_model_name + " imported from Huggingface"
)
print("prepare to register model: \n", prepare_to_register_model)
#register the model from pipeline job output 
registered_model = workspace_ml_client.models.create_or_update(prepare_to_register_model)
print ("registered model: \n", registered_model)


In [None]:
import time, sys
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment

# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name

online_endpoint_name = "emotion-" + timestamp
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="Online endpoint for " + registered_model.name + ", fine tuned model for emotion detection",
    auth_mode="key"
)
workspace_ml_client.begin_create_or_update(endpoint).wait()

In [None]:
# create a deployment
demo_deployment = ManagedOnlineDeployment(
    name="demo",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_DS4_v2",
    instance_count=1,
)
workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()
endpoint.traffic = {"demo": 100}
workspace_ml_client.begin_create_or_update(endpoint).result()

In [None]:
endpoint.traffic = {"demo": 100}
workspace_ml_client.begin_create_or_update(endpoint).result()