# Image Object Detection scenario with RAI Dashboard

The [object detection fridge dataset](https://github.com/microsoft/computervision-recipes/tree/master/scenarios/detection) provides images and bounding boxes with four types of items commonly found in the Microsoft New England R&D office refrigerator - carton, water bottle, can and milk bottle.  This example notebook demonstrates how to use a Faster R-CNN ResNet 50 FPN computer vision model from torchvision on the dataset to evaluate the model in AzureML.

First, we need to specify the version of the RAI components which are available in the workspace. This was specified when the components were uploaded.

In [None]:
version_string = "0.0.10"
compute_name = "cpucluster"
rai_example_version_string = "11"

The above cell will specify the version of RAI components for the workspace as well as the compute cluster to utilize in AzureML. The rai string is to specify a version for teh data and components.

In [None]:
import urllib
import os
import sys
from zipfile import ZipFile
import numpy as np
import pandas as pd
import json

try:
    from urllib import urlretrieve
except ImportError:
    from urllib.request import urlretrieve

%pip install torchvision


def download_mscoco_dataset(data_path, annotations_file):
    os.makedirs(data_path, exist_ok=True)

    base_url = "https://publictestdatasets.blob.core.windows.net/"
    coco = "computervision/smallMSCOCO/"
    data_url = base_url + coco + annotations_file

    data_output_path = os.path.join(data_path, annotations_file)
    urlretrieve(data_url, filename=data_output_path)


annotations = "msCOCOValExample7.jsonl"

data_path = "./dataMSCOCO"


download_mscoco_dataset(data_path, annotations)

In the above cell of code, this is downloading the json file from the azure blob storage. Plus it will create a directory call dataMSCOCO and place the ms coco json file. Note that this is just 25 images.

In [None]:
def create_ml_table_file(filename):
    return (
        "$schema: http://azureml/sdk-2-0/MLTable.json\n"
        "type: mltable\n"
        "paths:\n"
        " - file: ./{0}\n"
        "transformations:\n"
        "  - read_json_lines:\n"
        "        encoding: utf8\n"
        "        invalid_lines: error\n"
        "        include_path_column: false\n"
    ).format(filename)


def save_ml_table_file(output_path, ml_table_data):
    mltable_file_contents = create_ml_table_file(ml_table_data)
    with open(os.path.join(output_path, "MLTable"), "w") as f:
        f.write(mltable_file_contents)


save_ml_table_file(data_path, annotations)

The above creates the ML table using the json file that we downloaded.

In [None]:
import mltable

tbl = mltable.load(data_path)

val_df: pd.DataFrame = tbl.to_pandas_dataframe()

Taking the mltable and putting it to a pandas dataframe

In [None]:
target_column_name = "label"

The ground truth of the bounding boxes, this will be used for the dashboard and other components

In [None]:
# Enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace = "<AML_WORKSPACE_NAME>"

In [None]:
# Handle to the workspace
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

try:
    credential = DefaultAzureCredential()
    ml_client = MLClient(
        credential=credential,
        subscription_id=subscription_id,
        resource_group_name=resource_group,
        workspace_name=workspace,
    )
except Exception:
    # If in compute instance we can get the config automatically
    from azureml.core import Workspace

    workspace = Workspace.from_config()
    workspace.write_config()
    ml_client = MLClient.from_config(
        credential=DefaultAzureCredential(exclude_shared_token_cache_credential=True),
        logging_enable=True,
    )

print(ml_client)

To upload the dataset (mltable) so we create an MLClient with AzureML

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

input_test_data = "MSCOCO_Test_MLTable_OD18"

try:
    test_data = ml_client.data.get(
        name=input_test_data,
        version=rai_example_version_string,
    )
except Exception:
    test_data = Data(
        path=data_path,
        type=AssetTypes.MLTABLE,
        description="RAI MSCOCO data",
        name=input_test_data,
        version=rai_example_version_string,
    )
    ml_client.data.create_or_update(test_data)

The MLClient will upload the data to AzureML. One typical error is the name of input_test_data, if you have used that string name, an error will occur saying that it has been used. To remedy this error change the input_test_data string name

In [None]:
import os

os.makedirs("mscoco_component_src_od_1", exist_ok=True)

create directory to place the script file

In [None]:
%%writefile mscoco_component_src_od_1/model_script.py

import argparse
import logging
import json
import os
import time


import mlflow
import mlflow.pyfunc

from azureml.core import Run

import torchvision

from raiutils.common.retries import retry_function

try:
    from urllib import urlretrieve
except ImportError:
    from urllib.request import urlretrieve

_logger = logging.getLogger(__file__)
logging.basicConfig(level=logging.INFO)

MSCOCO_MODEL_NAME = 'mscoco_model'


def parse_args():
    # setup arg parser
    parser = argparse.ArgumentParser()

    # add arguments
    parser.add_argument(
        "--model_output_path", type=str, help="Path to write model info JSON"
    )
    parser.add_argument(
        "--model_base_name", type=str, help="Name of the registered model"
    )
    parser.add_argument(
        "--model_name_suffix", type=int, help="Set negative to use epoch_secs"
    )
    parser.add_argument(
        "--device", type=int, help=(
            "Device for CPU/GPU supports. Setting this to -1 will leverage "
            "CPU, >=0 will run the model on the associated CUDA device id.")
    )

    # parse args
    args = parser.parse_args()

    # return args
    return args


def main(args):
    current_experiment = Run.get_context().experiment
    tracking_uri = current_experiment.workspace.get_mlflow_tracking_uri()
    _logger.info("tracking_uri: {0}".format(tracking_uri))
    mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(current_experiment.name)

    _logger.info("Getting device")
    device = args.device

    _logger.info("Loading parquet input")

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) 

    if device >= 0:
        model = model.cuda()

    if args.model_name_suffix < 0:
        suffix = int(time.time())
    else:
        suffix = args.model_name_suffix
    registered_name = "{0}_{1}".format(args.model_base_name, suffix)
    _logger.info(f"Registering model as {registered_name}")

    # Saving model with mlflow
    _logger.info("Saving with mlflow")

    mlflow.pytorch.log_model(
        model,
        artifact_path=registered_name,
        registered_model_name=registered_name
    )

    _logger.info("Writing JSON")
    dict = {"id": "{0}:1".format(registered_name)}
    output_path = os.path.join(args.model_output_path, "model_info.json")
    with open(output_path, "w") as of:
        json.dump(dict, fp=of)


# run script
if __name__ == "__main__":
    # add space in logs
    print("*" * 60)
    print("\n\n\n")

    # parse args
    args = parse_args()

    # run main function
    main(args)

    # add space in logs
    print("*" * 60)
    print("\n\n\n")

This will create the python file to load the pre-trained pytorch fasterrcnn model that was trained on MS COCO

In [None]:
import time

model_base_name = "mscoco_model"
model_name_suffix = "11008"
device = -1

this is to specify what type of model and model name for logging

In [None]:
from azure.ai.ml import load_component

yaml_contents = f"""
$schema: http://azureml/sdk-2-0/CommandComponent.json
name: mscoco_component_pytorch_13
display_name: MSCOCO component for RAI OD ex.
version: {rai_example_version_string}
type: command
inputs:
  model_base_name:
    type: string
  model_name_suffix: # Set negative to use epoch_secs
    type: integer
    default: -1
  device: # set to >= 0 to use GPU
    type: integer
    default: 0
outputs:
  model_output_path:
    type: path
code: ./mscoco_component_src_od_1/
environment: azureml://registries/azureml/environments/responsibleai-vision-ubuntu20.04-py38-cpu/versions/33
command: >-
  python model_script.py
  --model_base_name ${{{{inputs.model_base_name}}}}
  --model_name_suffix ${{{{inputs.model_name_suffix}}}}
  --device ${{{{inputs.device}}}}
  --model_output_path ${{{{outputs.model_output_path}}}}
"""

yaml_filename = "test_mscoco1.yaml"


with open(yaml_filename, "w") as f:
    f.write(yaml_contents)

test_component_definition = load_component(source=yaml_filename)

ml_client.components.create_or_update(test_component_definition)

yaml file to send it as an ML component

In [None]:
from azure.ai.ml.entities import AmlCompute

all_compute_names = [x.name for x in ml_client.compute.list()]

if compute_name in all_compute_names:
    print(f"Found existing compute: {compute_name}")
else:
    my_compute = AmlCompute(
        name=compute_name,
        size="STANDARD_DS3_V2",
        min_instances=0,
        max_instances=4,
        idle_time_before_scale_down=3600,
    )
    ml_client.compute.begin_create_or_update(my_compute)
    print("Initiated compute creation")

find compute target to run the job

In [None]:
from azure.ai.ml import dsl, Input

test_model_component = ml_client.components.get(
    name="mscoco_component_pytorch_13", version=rai_example_version_string
)


@dsl.pipeline(
    compute=compute_name,
    description="Register Model for RAI MSCOCO example",
    experiment_name=f"RAI_MSCOCO_Example_Model_{model_name_suffix}",
)
def my_pipeline(model_base_name, model_name_suffix, device):
    test_model = test_component_definition(
        model_base_name=model_base_name,
        model_name_suffix=model_name_suffix,
        device=device,
    )
    test_model.set_limits(timeout=3600)

    return {}


model_registration_pipeline_job = my_pipeline(
    model_base_name, model_name_suffix, device
)

This is a pipeline to load the model and register it, this is needed to create the RAI vision insights

In [None]:
from azure.ai.ml.entities import PipelineJob


def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    while created_job.status not in [
        "Completed",
        "Failed",
        "Canceled",
        "NotResponding",
    ]:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print("Latest status : {0}".format(created_job.status))
    assert created_job.status == "Completed"
    return created_job


# This is the actual submission
testing_job = submit_and_wait(ml_client, model_registration_pipeline_job)

Once submitted we can monitor the progress of it

In [None]:
expected_model_id = f"{model_base_name}_{model_name_suffix}:1"
azureml_model_id = f"azureml:{expected_model_id}"
# need these ids

Collecting the name of the id, we will need for rai vision insights

In [None]:
mscoco_test_mltable = Input(
    type="mltable",
    path=f"{input_test_data}:{rai_example_version_string}",
    mode="download",
)

registry_name = "azureml"
credential = DefaultAzureCredential()

ml_client_registry = MLClient(
    credential=credential,
    subscription_id=ml_client.subscription_id,
    resource_group_name=ml_client.resource_group_name,
    registry_name=registry_name,
)

rai_vision_insights_component = ml_client_registry.components.get(
    name="rai_vision_insights", version=version_string
)

In [None]:
import json
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes


@dsl.pipeline(
    compute=compute_name,
    description="Example RAI computation on MSCOCO data",
    experiment_name=f"RAI_MSCOCO_Example_RAIInsights_Computation_{model_name_suffix}",
)
def rai_mscoco_object_detection_pipeline(target_column_name, test_data, classes):
    # Initiate the RAIInsights
    rai_image_job = rai_vision_insights_component(
        task_type="object_detection",
        model_info=expected_model_id,
        model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),
        test_dataset=test_data,
        target_column_name=target_column_name,
        classes=classes,
        model_type="pytorch",
        enable_error_analysis=False,
        num_masks=300,
        mask_res=4,
    )
    rai_image_job.set_limits(timeout=7200)

    rai_image_job.outputs.dashboard.mode = "upload"
    rai_image_job.outputs.ux_json.mode = "upload"

    return {
        "dashboard": rai_image_job.outputs.dashboard,
        "ux_json": rai_image_job.outputs.ux_json,
    }

Specifying our pipeline

In [None]:
import uuid
from azure.ai.ml import Output

insights_pipeline_job = rai_mscoco_object_detection_pipeline(
    target_column_name=target_column_name,
    test_data=mscoco_test_mltable,
    classes='["person", "bicycle", "car", "motorcycle","airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant","street sign", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse","sheep", "cow", "elephant", "bear", "zebra", "giraffe", "hat", "backpack","umbrella", "shoe", "eye glasses", "handbag", "tie", "suitcase", "frisbee", "skis","snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard","surfboard", "tennis racket", "bottle", "plate", "wine glass", "cup", "fork", "knife","spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog","pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "mirror", "dining table","window", "desk", "toilet", "door", "tv", "laptop", "mouse", "remote", "keyboard","cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "blender","book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]',
)

rand_path = str(uuid.uuid4())
insights_pipeline_job.outputs.dashboard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/dashboard/",
    mode="upload",
    type="uri_folder",
)
insights_pipeline_job.outputs.ux_json = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/ux_json/",
    mode="upload",
    type="uri_folder",
)

We are doing a pipeline of RAI object detection

In [None]:
insights_job = submit_and_wait(ml_client, insights_pipeline_job)

To monitor the progress, since this is on 25 ms coco images and to compute D-Rise this may take around 30-60 mins

In [None]:
sub_id = ml_client._operation_scope.subscription_id
rg_name = ml_client._operation_scope.resource_group_name
ws_name = ml_client.workspace_name

expected_uri = f"https://ml.azure.com/model/{expected_model_id}/model_analysis?wsid=/subscriptions/{sub_id}/resourcegroups/{rg_name}/workspaces/{ws_name}"

print(f"Please visit {expected_uri} to see your analysis")

To monitor the progress on azureml 