### <font color="green">Model Evaluation - Metrics Package - Compute Metrics</font>

### <font color="parrot-green">Classification</font>

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_true = np.array([0, 1, 2, 2, 2])
y_pred = np.array([0, 0, 2, 2, 1])

metrics = compute_metrics(task_type=constants.Tasks.CLASSIFICATION,
                          y_test=y_true,
                          y_pred=y_pred)["metrics"]

pprint(metrics)

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_true = np.array([0, 1, 2, 2, 2])
y_pred = np.array([0, 0, 2, 2, 1])

metrics_config = {
    "metrics" : ["accuracy", "confusion_matrix"],
    "sample_weight" : [0, 1, 2, 3, 4],
    "enable_metric_confidence" : True,
}


metrics = compute_metrics(task_type=constants.Tasks.CLASSIFICATION,
                          y_test=y_true,
                          y_pred=y_pred,
                          **metrics_config)

pprint(metrics)

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_test = np.array([["a", "b"], ["c", "d"], ["a"]], dtype=object)
y_pred = np.array([["a", "b"], ["c", "d"], ["a"]], dtype=object)

metrics_config = {
    "multilabel" : True,
    "metrics" : ["accuracy", "confusion_matrix"],
}

metrics = compute_metrics(task_type=constants.Tasks.CLASSIFICATION,
                          y_test=y_test,
                          y_pred=y_pred,
                          **metrics_config)

pprint(metrics)

### <font color="parrot-green">Regression</font>

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_test = [20, 22, 23, 21]
y_pred = [18.4, 23, 22, 20]

metrics = compute_metrics(task_type=constants.Tasks.REGRESSION,
                          y_test=y_test,
                          y_pred=y_pred)

pprint(metrics["metrics"])

### <font color="parrot-green">Text Classification</font>

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_test = ["sports", "politics", "sports", "movies"]
y_pred = ["sports", "sports", "politics", "movies"]

metrics = compute_metrics(task_type=constants.Tasks.TEXT_CLASSIFICATION,
                          y_test=y_test,
                          y_pred=y_pred)

pprint(metrics)

### <font color="parrot-green">Text Classification - Multilabel</font>

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_test = np.array([["politics", "sports"], ["education"], ["movies", "politics"], ["sports", "education"]], dtype="object")
y_pred = np.array([["politics", "movies"], ["sports", "education"], ["movies", "politics"], ["sports", "politics"]], dtype="object")

metrics = compute_metrics(task_type=constants.Tasks.TEXT_CLASSIFICATION_MULTILABEL,
                          y_test=y_test,
                          y_pred=y_pred)

pprint(metrics)

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint


# Total number of class labels : 3 
# class labels -- 0, 1, 2
# Number of examples -- 4
y_test = np.array([[1, 1, 0],
                   [0, 1, 0],
                   [0, 1, 1],
                   [1, 0, 1]])

y_pred_proba = np.array([[0.9, 0.6, 0.4],
                         [0.3, 0.8, 0.6],
                         [0.1, 0.9, 0.8],
                         [0.7, 0.1, 0.6]])

# class labels should be in same order as probability values
class_labels = np.array([0, 1, 2])

metrics_config = {
    "class_labels" : class_labels,
    "multilabel" : True,
}

result = compute_metrics(task_type=constants.Tasks.CLASSIFICATION, y_test=y_test,
                            y_pred_proba=y_pred_proba, **metrics_config)

pprint(result["metrics"])

### <font color="parrot-green">Text-Named Entity Recognition</font>

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

y_test = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', "O"]]
y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]

metrics = compute_metrics(task_type=constants.Tasks.TEXT_NER,
                          y_test=y_test,
                          y_pred=y_pred)

pprint(metrics)

### <font color="green">Sequence to Sequence Metrics</font>

<hr>

Supported NLG tasks:

* Translation
* Summarization
* Question Answering

### <font color="parrot-green">Translation Task - Bleu metric</font>

In [None]:
from azureml.metrics import compute_metrics, constants
from pprint import pprint

y_pred = ["hello there general kenobi","foo bar foobar"]
y_test = [["hello there general kenobi san"], ["foo bar foobar"]]

result = compute_metrics(task_type=constants.Tasks.TRANSLATION, y_test=y_test, y_pred=y_pred)
pprint(result["metrics"])

### <font color="parrot-green">Summarization Task - Rouge metric</font>

In [None]:
from azureml.metrics import compute_metrics, constants
from pprint import pprint

y_pred = ["hello there general kenobi","foo bar foobar"]
y_test = [["hello there general kenobi san"], ["foo bar foobar"]]

result = compute_metrics(task_type=constants.Tasks.SUMMARIZATION, y_test=y_test, y_pred=y_pred)
pprint(result["metrics"])

### <font color="parrot-green">Question Answering - Exact Match, F1 score, BERTScore</font>

In [None]:
from azureml.metrics import compute_metrics, constants
from pprint import pprint

y_pred = ["hello there general kenobi 123","foo bar foobar", "ram 234", "sid"]
y_test = ["hello there general kenobi san", "foo bar foobar", "ram 23", "sid$"]

result = compute_metrics(task_type=constants.Tasks.QUESTION_ANSWERING, y_test=y_test, y_pred=y_pred)
pprint(result["metrics"])

### <font color="parrot-green">Question Answering - GPT-Star metrics</font>

In [None]:
from azureml.metrics import compute_metrics, constants
from pprint import pprint

context = "In 2018, a group of scientists discovered a new type of deep-sea fish that has a transparent head. The fish, named Barreleye, has tubular eyes that can rotate to look either upward or forward, allowing it to see potential prey and predators in the dark depths of the ocean."
question = "What is the name of the deep-sea fish discovered by scientists in 2018, and what is unique about its head?"
coherent_answer = "The deep-sea fish discovered by scientists in 2018 is called Barreleye, and it has a transparent head. The fish has tubular eyes that can rotate to look either upward or forward, allowing it to see potential prey and predators in the dark depths of the ocean."
incoherent_answer = "The scientists who made the discovery in 2018 were actually studying coral reefs, not deep-sea fish. However, they did come across an unusual creature that they couldn't identify. It turned out to be a type of sea cucumber that has a strange, tube-like shape."

# this dictionary is propogated to openai completion or chat completion API.
# please add the keys directly accepted by openai API.

# Note : please replace <placeholder> with actual values.
openai_params = {
    "api_version": "<placeholder>",
    "api_base": "<placeholder>",
    "api_type": "<placeholder>",
    "api_key" : "<placeholder>",
    "deployment_id": "<placeholder>"
}

metrics_config = {
     "y_test" : [coherent_answer, incoherent_answer],
     "y_pred" : [coherent_answer, incoherent_answer],
     "questions" : [question, question],
     "contexts" : [context, context],
     "openai_params" : openai_params
}

# Note : length of lists of y_test, y_pred, questions, contexts should be equal
result = compute_metrics(task_type=constants.Tasks.QUESTION_ANSWERING, 
                        **metrics_config)
pprint(result)

### <font color="parrot-green">Fill Mask - Perplexity</font>

In [None]:
from azureml.metrics import compute_metrics, constants
from pprint import pprint

y_pred = ["hi", "green and blue", "he dances"]

result = compute_metrics(task_type=constants.Tasks.FILL_MASK, y_pred=y_pred,
                         model_id="gpt2")
pprint(result)

### <font color="parrot-green">Text generation - Bleu and Rouge metrics</font>

In [None]:
from azureml.metrics import compute_metrics, constants
from pprint import pprint

y_pred = ["hello there general kenobi","foo bar foobar", "blue & red"]
y_test = [["hello there general kenobi san"], ["foo bar foobar"], ["blue & green"]]

result = compute_metrics(task_type=constants.Tasks.TEXT_GENERATION, y_test=y_test, y_pred=y_pred)
pprint(result)

### <font color="parrot-green">Image Object Detection</font>


In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint

meta_info_per_image = [
    {
        "areas": [60000],
        "iscrowd": [0],
        "filename": "image_1.jpg",
        "height": 640,
        "width": 480,
        "original_width": 640,
        "original_height": 480,
    }
]
gt_objects_per_image = [
    {
        "boxes": np.array([[160, 120, 320, 240]], dtype=np.float32),
        "classes": np.array([1]),
    }
]
predicted_objects_per_image = [
    {
        "boxes": np.array([[160, 120, 320, 240]], dtype=np.float32),
        "masks": None,
        "classes": np.array([1]),
        "scores": np.array([0.75]),
    }
]
num_classes = 3
iou_threshold = 0.5

result = compute_metrics(
    task_type=constants.Tasks.IMAGE_OBJECT_DETECTION,
    y_test=gt_objects_per_image,
    image_meta_info=meta_info_per_image,
    y_pred=predicted_objects_per_image,
    num_classes=num_classes,
    iou_threshold=iou_threshold,
)
pprint(result["metrics"])


### <font color="parrot-green">Image Instance Segmentation</font>


In [None]:



from azureml.metrics import compute_metrics, constants
import numpy as np
from pycocotools import mask as pycoco_mask
from pprint import pprint
from typing import List, Dict, Any

def _get_mask_from_bbox(bbox: List, height: int, width: int) -> Dict[str, Any]:
    x1, y1, x2, y2 = bbox
    polygon = [[x1, y1, x2, y1, x2, y2, x1, y2, x1, y1]]
    rle_masks = pycoco_mask.frPyObjects(polygon, height, width)
    return rle_masks[0]


meta_info_per_image = [
    {
        "areas": [60000],
        "iscrowd": [0],
        "filename": "image_1.jpg",
        "height": 640,
        "width": 480,
        "original_width": 640,
        "original_height": 480,
    }
]
gt_objects_per_image = [
    {
        "boxes": np.array([[160, 120, 320, 240]], dtype=np.float32),
        "masks": [_get_mask_from_bbox([160, 120, 320, 240], 640, 640)],
        "classes": np.array([1]),
    }
]
predicted_objects_per_image = [
    {
        "boxes": np.array([[160, 120, 320, 240]], dtype=np.float32),
        "masks": [_get_mask_from_bbox([160, 120, 320, 240], 640, 640)],
        "classes": np.array([1]),
        "scores": np.array([0.75]),
    }
]
num_classes = 3
iou_threshold = 0.5

result = compute_metrics(
    task_type=constants.Tasks.IMAGE_INSTANCE_SEGMENTATION,
    y_test=gt_objects_per_image,
    image_meta_info=meta_info_per_image,
    y_pred=predicted_objects_per_image,
    num_classes=num_classes,
    iou_threshold=iou_threshold,
)
pprint(result["metrics"])

### <font color="parrot-green">Video Multi-Object Tracking </font>

In [None]:
from azureml.metrics import compute_metrics, constants
import numpy as np
from pprint import pprint


meta_info_per_image = [
    {
        "filename": "image_1.jpg",
        "frame_id": 0,
        "height": 640,
        "width": 480,
        "original_width": 640,
        "original_height": 480,
    }
]
gt_objects_per_image = {
        "bboxes": np.array([[160, 120, 320, 240]], dtype=np.float32),
        "labels": np.array([0]),
        "instance_ids": np.array([1]),
        "bboxes_ignore": np.zeros((0, 4), dtype=np.float32)
    }

predicted_objects_per_image = [
    np.array([[0, 160, 120, 320, 240, 0.75]], dtype=np.float32)]

num_classes = 1
iou_threshold = 0.5

result = compute_metrics(
    task_type=constants.Tasks.VIDEO_MULTI_OBJECT_TRACKING,
    y_test=gt_objects_per_image,
    image_meta_info=meta_info_per_image,
    y_pred=predicted_objects_per_image,
    num_classes=num_classes,
    iou_threshold=iou_threshold,
)
pprint(result["metrics"])

## <font color="parrot-green">Batchwise Data Ingestion</font>


### <font color="parrot-green">Image Object Detection</font>


In [None]:

from azureml.metrics import compute_metrics, constants
from azureml.metrics.azureml_od_is_metrics import AzureMLODMetrics
import numpy as np
from pprint import pprint

meta_info_per_image = [
    {"width": 640, "height": 640, "iscrowd": np.array([False, False, False])},
    {"width": 6400, "height": 6400, "iscrowd": np.array([False, False, False])},
    {"width": 64000, "height": 64000, "iscrowd": np.array([False, False, False])},
]
gt_objects_per_image = [
    # first image
    {
        "boxes": np.array([
            [1, 0, 2, 100],
            [2, 0, 3, 100],
            [3, 0, 4, 100],
        ]),
        "masks": None,
        "classes": np.array([0, 1, 2]),
        "scores": None
    },
    # second image
    {
        "boxes": np.array([
            [10, 0, 20, 100],
            [20, 0, 30, 100],
            [30, 0, 40, 100],
        ]),
        "masks": None,
        "classes": np.array([0, 1, 2]),
        "scores": None
    },
    # third image
    {
        "boxes": np.array([
            [100, 0, 200, 100],
            [200, 0, 300, 100],
            [300, 0, 400, 100],
        ]),
        "masks": None,
        "classes": np.array([0, 1, 2]),
        "scores": None
    }
]
predicted_objects_per_image = [
    # first image
    {
        "boxes": np.array([
            [1, 0, 2, 100],
        ]),
        "masks": None,
        "classes": np.array([0]),
        "scores": np.array([0.5])
    },
    # second image
    {
        "boxes": np.array([
            [20, 0, 30, 100],
        ]),
        "masks": None,
        "classes": np.array([1]),
        "scores": np.array([0.5])
    },
    # third image
    {
        "boxes": np.array([
            [300, 0, 400, 100],
        ]),
        "masks": None,
        "classes": np.array([2]),
        "scores": np.array([0.5])
    }
]

metric_computer = AzureMLODMetrics(num_classes=num_classes, iou_threshold=iou_threshold)
for gt_objects, meta_info, predicted_objects in zip(
    gt_objects_per_image, meta_info_per_image, predicted_objects_per_image
):
    metric_computer.update_states(y_test=[gt_objects], image_meta_info=[meta_info], y_pred=[predicted_objects])
results = metric_computer.aggregate_compute()

pprint(results["metrics"])


### <font color="parrot-green">Image Instance Segmentation </font>


In [None]:

from azureml.metrics import compute_metrics, constants
from azureml.metrics.azureml_od_is_metrics import AzureMLISMetrics
import numpy as np
from pycocotools import mask as pycoco_mask
from pprint import pprint
from typing import List, Dict, Any

def _get_mask_from_bbox(bbox: List, height: int, width: int) -> Dict[str, Any]:
    x1, y1, x2, y2 = bbox
    polygon = [[x1, y1, x2, y1, x2, y2, x1, y2, x1, y1]]
    rle_masks = pycoco_mask.frPyObjects(polygon, height, width)
    return rle_masks[0]

meta_info_per_image = [
    {
        "width": 640,
        "height": 640,
        "original_width": 640,
        "original_height": 640,
        "filename": "image_1.jpg",
        "areas": [60000],
        "iscrowd": np.array([False, False, False]),
    },
    {
        "width": 6400,
        "height": 6400,
        "original_width": 6400,
        "original_height": 6400,
        "filename": "image_2.jpg",
        "areas": [60000],
        "iscrowd": np.array([False, False, False]),
    },
    {
        "width": 64000,
        "height": 64000,
        "original_width": 64000,
        "original_height": 64000,
        "filename": "image_3.jpg",
        "areas": [60000],
        "iscrowd": np.array([False, False, False]),
    },
]
gt_objects_per_image = [
    # first image
    {
        "boxes": np.array([[1, 0, 2, 100], [2, 0, 3, 100], [3, 0, 4, 100]], dtype=np.float32),
        "masks": [
            _get_mask_from_bbox([1, 0, 2, 100], 640, 640),
            _get_mask_from_bbox([2, 0, 3, 100], 640, 640),
            _get_mask_from_bbox([3, 0, 4, 100], 640, 640),
        ],
        "classes": np.array([0, 1, 2]),
        "scores": None,
    },
    # second image
    {
        "boxes": np.array([[10, 0, 20, 100], [20, 0, 30, 100], [30, 0, 40, 100]], dtype=np.float32),
        "masks": [
            _get_mask_from_bbox([10, 0, 20, 100], 6400, 6400),
            _get_mask_from_bbox([20, 0, 30, 100], 6400, 6400),
            _get_mask_from_bbox([30, 0, 40, 100], 6400, 6400),
        ],
        "classes": np.array([0, 1, 2]),
        "scores": None,
    },
    # third image
    {
        "boxes": np.array([[100, 0, 200, 100], [200, 0, 300, 100], [300, 0, 400, 100]], dtype=np.float32),
        "masks": [
            _get_mask_from_bbox([100, 0, 200, 100], 64000, 64000),
            _get_mask_from_bbox([200, 0, 300, 100], 64000, 64000),
            _get_mask_from_bbox([300, 0, 400, 100], 64000, 64000),
        ],
        "classes": np.array([0, 1, 2]),
        "scores": None,
    },
]
predicted_objects_per_image = [
    # first image
    {
        "boxes": np.array([[1, 0, 2, 100]], dtype=np.float32),
        "masks": [_get_mask_from_bbox([1, 0, 2, 100], 640, 640)],
        "classes": np.array([0]),
        "scores": np.array([0.5]),
        "filename": "image_1.jpg",
    },
    # second image
    {
        "boxes": np.array([[20, 0, 30, 100]], dtype=np.float32),
        "masks": [_get_mask_from_bbox([20, 0, 30, 100], 6400, 6400)],
        "classes": np.array([1]),
        "scores": np.array([0.5]),
        "filename": "image_2.jpg",
    },
    # third image
    {
        "boxes": np.array([[300, 0, 400, 100]], dtype=np.float32),
        "masks": [_get_mask_from_bbox([300, 0, 400, 100], 64000, 64000)],
        "classes": np.array([2]),
        "scores": np.array([0.5]),
        "filename": "image_3.jpg",
    },
]

metric_computer = AzureMLISMetrics(num_classes=num_classes, iou_threshold=iou_threshold)
for gt_objects, meta_info, predicted_objects in zip(
    gt_objects_per_image, meta_info_per_image, predicted_objects_per_image
):
    metric_computer.update_states(y_test=[gt_objects], image_meta_info=[meta_info], y_pred=[predicted_objects])
results = metric_computer.aggregate_compute()
pprint(result["metrics"])

### <font color="parrot-green">Forecasting </font>

In [None]:
from azureml.metrics.azureml_forecasting_metrics import AzureMLForecastingMetrics
import numpy as np
import pandas as pd
from pprint import pprint

# We will generate data set, containing two time series.
time_column_name = "date"
time_series_id_column_names = ['ts_id']

X_train = pd.DataFrame({
    time_column_name: list(pd.date_range(end='2000-12-31', freq='MS', periods=24)) * 2,
    'ts_id': np.repeat(['a', 'b'], 24)
})
y_train = np.concatenate([np.arange(24), np.arange(24)])

X_test = pd.DataFrame({
    time_column_name: list(pd.date_range('2001-01-01', freq='MS', periods=12)) * 2,
    'ts_id': np.repeat(['a', 'b'], 12)
})

y_actuals = np.concatenate([np.arange(12), np.arange(12)])
np.random.seed(42)
y_pred = y_actuals + np.random.rand(y_actuals.shape[0])

# Compute metrics.
metric_computer = AzureMLForecastingMetrics(
    X_train=X_train,
    y_train=y_train,
    time_column_name=time_column_name,
    time_series_id_column_names=time_series_id_column_names,
    aggregation_method=np.median
)
result = metric_computer.compute(
    y_test=y_actuals,
    y_pred=y_pred,
    X_test=X_test)
pprint(result["metrics"])