In [None]:
INFER_NUMBER = 1000


def benchmark_model(model: Any, input_data: np.ndarray, benchmark_name: str, device_name: str = "CPU") -> float:
    """
    Helper function for benchmarking the model. It measures the time and prints results.
    """
    # measure the first inference separately -  it may be slower as it contains also initialization
    start = time.perf_counter()
    model(input_data)
    end = time.perf_counter()
    first_infer_time = end - start
    print(f"{benchmark_name} on {device_name}. First inference time: {first_infer_time :.4f} seconds")

    # benchmarking
    start = time.perf_counter()
    for _ in range(INFER_NUMBER):
        model(input_data)
    end = time.perf_counter()

    # elapsed time
    infer_time = end - start

    # print second per image and FPS
    mean_infer_time = infer_time / INFER_NUMBER
    mean_fps = INFER_NUMBER / infer_time
    print(f"{benchmark_name} on {device_name}: {mean_infer_time :.4f} seconds per image ({mean_fps :.2f} FPS)")

    return mean_infer_time

In [None]:
def postprocess(detections: np.ndarray) -> List[Tuple]:
    """
    Postprocess the raw results from the model.
    """
    # candidates - probability > 0.25
    detections = detections[detections[..., 4] > 0.25]

    boxes = []
    labels = []
    scores = []
    for obj in detections:
        xmin, ymin, ww, hh = obj[:4]
        score = obj[4]
        label = np.argmax(obj[5:])
        # Create a box with pixels coordinates from the box with normalized coordinates [0,1].
        boxes.append(
            tuple(map(int, (xmin - ww // 2, ymin - hh // 2, ww, hh)))
        )
        labels.append(int(label))
        scores.append(float(score))

    # Apply non-maximum suppression to get rid of many overlapping entities.
    # See https://paperswithcode.com/method/non-maximum-suppression
    # This algorithm returns indices of objects to keep.
    indices = cv2.dnn.NMSBoxes(
        bboxes=boxes, scores=scores, score_threshold=0.25, nms_threshold=0.5
    )

    # If there are no boxes.
    if len(indices) == 0:
        return []

    # Filter detected objects.
    return [(labels[idx], scores[idx], boxes[idx]) for idx in indices.flatten()]


def draw_boxes(img: np.ndarray, boxes):
    """
    Draw detected boxes on the image.
    """
    for label, score, box in boxes:
        # Choose color for the label.
        color = tuple(map(int, colors[label]))
        # Draw a box.
        x2 = box[0] + box[2]
        y2 = box[1] + box[3]
        cv2.rectangle(img=img, pt1=box[:2], pt2=(x2, y2), color=color, thickness=2)

        # Draw a label name inside the box.
        cv2.putText(
            img=img,
            text=f"{classes[label]} {score:.2f}",
            org=(box[0] + 10, box[1] + 20),
            fontFace=cv2.FONT_HERSHEY_COMPLEX,
            fontScale=img.shape[1] / 1200,
            color=color,
            thickness=1,
            lineType=cv2.LINE_AA,
        )


def show_result(results: np.ndarray):
    """
    Postprocess the raw results, draw boxes and show the image.
    """
    output_img = image.copy()

    detections = postprocess(results)
    draw_boxes(output_img, detections)

    utils.show_array(output_img)

In [None]:
# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:
# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function
def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
    # Define colors for boxes and descriptions.
    colors = {"red": (255, 0, 0), "green": (0, 255, 0)}

    # Fetch the image shapes to calculate a ratio.
    (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]
    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y

    # Convert the base image from BGR to RGB format.
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)

    # Iterate through non-zero boxes.
    for box in boxes:
        # Pick a confidence factor from the last place in an array.
        conf = box[-1]
        if conf > threshold:
            # Convert float to int and multiply corner position of each box by x and y ratio.
            # If the bounding box is found at the top of the image,
            # position the upper box bar little lower to make it visible on the image.
            (x_min, y_min, x_max, y_max) = [
                int(max(corner_position * ratio_y, 10)) if idx % 2
                else int(corner_position * ratio_x)
                for idx, corner_position in enumerate(box[:-1])
            ]

            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)

            # Add text to the image based on position and confidence.
            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
            if conf_labels:
                rgb_image = cv2.putText(
                    rgb_image,
                    f"{conf:.2f}",
                    (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8,
                    colors["red"],
                    1,
                    cv2.LINE_AA,
                )

    return rgb_image

In [None]:
!pip install nncf
#pip install -q "nncf>=2.5.0"

# Semantic 

In [None]:
class_dim = 1
boolean_cat_mask = (normalized_mask.argmax(class_dim) == sem_class_to_idx['cat'])
from torchvision.utils import draw_segmentation_masks

show(draw_segmentation_masks(image, masks=boolean_cat_mask, alpha=0.7, colors='yellow'))