# Inference YOLOv8m

Load model from [Ultralitics](https://github.com/qubvel-org/segmentation_models.pytorch) and process some test images

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import warnings
warnings.simplefilter('ignore')

import os

In [None]:
import numpy as np
import torch
import onnxruntime as ort
import typing as tp

import matplotlib.pyplot as plt
import cv2

In [None]:
IMAGE_DIR = '../test_images'
NN_THRESHOLD = 0.7
BATCH_SIZE = 1
MODEL_ONNX_FILE = '../models/YOLOv11m-seg.onnx'

In [None]:
def nms_pytorch(P: torch.tensor ,thresh_iou: float):
    """
    Apply non-maximum suppression to avoid detecting too many
    overlapping bounding boxes for a given object.
    Args:
        boxes: (tensor) The location preds for the image 
            along with the class predscores, Shape: [num_boxes,5].
        thresh_iou: (float) The overlap thresh for suppressing unnecessary boxes.
    Returns:
        A list of filtered boxes, Shape: [ , 5]
    """
 
    # we extract coordinates for every 
    # prediction box present in P
    x1 = P[:, 0]
    y1 = P[:, 1]
    x2 = P[:, 2]
    y2 = P[:, 3]
 
    # we extract the confidence scores as well
    scores = P[:, 4]
 
    # calculate area of every block in P
    areas = (x2 - x1) * (y2 - y1)
     
    # sort the prediction boxes in P
    # according to their confidence scores
    order = scores.argsort()
 
    # initialise an empty list for 
    # filtered prediction boxes
    keep = []
     
 
    while len(order) > 0:
         
        # extract the index of the 
        # prediction with highest score
        # we call this prediction S
        idx = order[-1]
 
        # push S in filtered predictions list
        keep.append(P[idx])
 
        # remove S from P
        order = order[:-1]
 
        # sanity check
        if len(order) == 0:
            break
         
        # select coordinates of BBoxes according to 
        # the indices in order
        xx1 = torch.index_select(x1,dim = 0, index = order)
        xx2 = torch.index_select(x2,dim = 0, index = order)
        yy1 = torch.index_select(y1,dim = 0, index = order)
        yy2 = torch.index_select(y2,dim = 0, index = order)
 
        # find the coordinates of the intersection boxes
        xx1 = torch.max(xx1, x1[idx])
        yy1 = torch.max(yy1, y1[idx])
        xx2 = torch.min(xx2, x2[idx])
        yy2 = torch.min(yy2, y2[idx])
 
        # find height and width of the intersection boxes
        w = xx2 - xx1
        h = yy2 - yy1
         
        # take max with 0.0 to avoid negative w and h
        # due to non-overlapping boxes
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
 
        # find the intersection area
        inter = w*h
 
        # find the areas of BBoxes according the indices in order
        rem_areas = torch.index_select(areas, dim = 0, index = order) 
 
        # find the union of every prediction T in P
        # with the prediction S
        # Note that areas[idx] represents area of S
        union = (rem_areas - inter) + areas[idx]
         
        # find the IoU of every prediction in P with S
        IoU = inter / union
 
        # keep the boxes with IoU less than thresh_iou
        mask = IoU < thresh_iou
        order = order[mask]
     
    return keep


def onnx_preprocessing(
    image: np.ndarray,
    image_size: tp.Tuple[int, int] = (224, 224),
) -> np.ndarray:
    # resize
    image = cv2.resize(image.copy(), image_size, interpolation=cv2.INTER_CUBIC)

    # normalize
    mean = np.array((0.485, 0.456, 0.406), dtype=np.float32) * 255.0
    std = np.array((0.229, 0.224, 0.225), dtype=np.float32) * 255.0
    denominator = np.reciprocal(std, dtype=np.float32)
    image = image.astype(np.float32)
    image -= mean
    image *= denominator

    # transpose
    image = image.transpose((2, 0, 1))[None]
    return image


def intersection(box1,box2):
    box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4]
    box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4]
    x1 = max(box1_x1,box2_x1)
    y1 = max(box1_y1,box2_y1)
    x2 = min(box1_x2,box2_x2)
    y2 = min(box1_y2,box2_y2)
    return (x2-x1)*(y2-y1) 


def union(box1,box2):
    box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4]
    box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4]
    box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1)
    box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1)
    return box1_area + box2_area - intersection(box1,box2)


def iou(box1,box2):
    return intersection(box1,box2)/union(box1,box2)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def binary_mask_iou(
        mask1: np.array,
        mask2: np.array,
        label_value: int = 1,
) -> float:
    mask1_area = np.count_nonzero(mask1 == label_value)
    mask2_area = np.count_nonzero(mask2 == label_value)
    intersection = np.count_nonzero(
        np.logical_and(mask1==label_value,  mask2==label_value)
    )
    return intersection / (mask1_area + mask2_area-intersection)

In [None]:
import json
from tqdm import tqdm

# Process annotations data
d_path_annot = '../IoU_test/annotations/balanced_test.json'
d_path_images = '../IoU_test/images'

processed_data = {
    'Id': [],
    'image_path': [],
    'semantic_masks': [],
    'granules_number': []
}

with open(d_path_annot, 'r', encoding="utf-8") as json_file:
    json_data_dir = json.load(json_file)
    # Process image data
    for image_inf in tqdm(json_data_dir['images'], desc="Process images: "):
        real_img_id = image_inf['id']
        for k in processed_data:
            processed_data[k].append([])
        processed_data['Id'][-1] = real_img_id
        img_name = image_inf['file_name'].split('/')[-1]
        img_path = os.path.join(
            d_path_images, img_name
        )
        processed_data['image_path'][-1] = str(img_path)
        SIZE = (image_inf['height'], image_inf['width'], 3)
        processed_data['semantic_masks'][-1] = np.zeros(SIZE[:2], dtype=np.uint8)
        processed_data['granules_number'][-1] = 0

with open(d_path_annot, 'r') as json_file:
    json_data_dir = json.load(json_file)
    image_id_old = ''
    skipped_counter = 1
    morph_kernel = cv2.getStructuringElement(
        cv2.MORPH_ELLIPSE, (3, 3)
    )

    for annotation_data in tqdm(
        json_data_dir['annotations'], desc="Process annotations: "
    ):
        # Data may be numerated to image of have through numeration
        process_image_id = annotation_data['image_id']

        image_data_indx = processed_data['Id'].index(process_image_id)
        label = annotation_data['category_id']
        # Process each granule
        for point_i, point in enumerate(annotation_data['segmentation']):
            if isinstance(point, list):
                point_xy = [
                    [point[j], point[j + 1]] for j in
                    range(0, len(point), 2)
                ]
                cnt = np.array(point_xy).reshape((-1, 1, 2)).astype(
                    np.int32
                )
                if len(cnt) < 3:  # bad contour
                    print('Cnt is bad')
                    continue

                single_mask = np.zeros((480, 480), dtype=np.uint8)
                _ = cv2.drawContours(
                    single_mask,
                    [cnt], -1, label, cv2.FILLED
                )
                processed_data['semantic_masks'][image_data_indx][single_mask != 0] = label
                x, y, w, h = annotation_data["bbox"]
                processed_data['granules_number'][image_data_indx] += 1
            else:
                continue

In [None]:
providers = [
    'CUDAExecutionProvider',
    'CPUExecutionProvider',
]

ort_session = ort.InferenceSession(
    MODEL_ONNX_FILE,
    providers=providers
)

In [None]:
# Load a YOLO model
for image_path in list(os.walk(IMAGE_DIR + '/preprocess/'))[0][2]:
    test_image = cv2.imread(IMAGE_DIR + '/preprocess/' + image_path)
    test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
    
    onnx_input = onnx_preprocessing(
        test_image,
        image_size=(480, 480)
    )
    onnx_input = np.concatenate([onnx_input] * BATCH_SIZE)
    ort_inputs = {ort_session.get_inputs()[0].name: onnx_input}
    ort_outputs = ort_session.run(None, ort_inputs)
    
    prediction = ort_outputs[0]
    number_of_classes = 1
    mask_index = 4 + number_of_classes
    prediction = prediction[0].transpose()
    pass_boxes_mask = prediction[:, 4:mask_index] >= NN_THRESHOLD
    pass_data = []
    for xi, x in enumerate(prediction):  # image index, image inference
        if pass_boxes_mask[xi][0]:
            xc, yc, w, h, prob = x[:5]
            x1 = xc - w / 2
            y1 = yc - h / 2
            x2 = xc + w / 2
            y2 = yc + h / 2
            masks = x[5:]
            pass_data.append(np.array([x1, y1, x2, y2, prob] + list(masks)))

    pass_data_result = []
    pass_data.sort(key=lambda x: x[4], reverse=True)
    while len(pass_data) > 0:
        pass_data_result.append(pass_data[0])
        pass_data = [
            box for box in pass_data if iou(
                box[:4], pass_data[0][:4]
            ) < NN_THRESHOLD
        ]

    green_masks = np.zeros(test_image.shape, dtype=np.uint8)
    if pass_data_result:
        pass_results = np.array(pass_data_result)
        masks = pass_results[:, 5:]
        boxes = pass_results[:, :5]

        mask_len, mask_h, mask_w = ort_outputs[1].shape[1:]
        output1 = ort_outputs[1][0].reshape(
            mask_len, mask_h * mask_w
        )
        masks = masks @ output1  # (n, 32) (32, 25600)

        for bbox_i, bbox in enumerate(pass_data_result):
            _ = cv2.rectangle(
                    test_image,
                    (int(bbox[0]), int(bbox[1])),
                    (int(bbox[2]), int(bbox[3])),
                    (255,0,0),
                    2
                )

            mask = masks[bbox_i].reshape(mask_h, mask_w)
            mask = sigmoid(mask)
            mask = (
                           mask > NN_THRESHOLD
                   ).astype('uint8') * 255
            mask_x1 = round(bbox[0] / test_image.shape[0] * mask_w)
            mask_y1 = round(bbox[1] / test_image.shape[1] * mask_h)
            mask_x2 = round(bbox[2] / test_image.shape[0] * mask_w)
            mask_y2 = round(bbox[3] / test_image.shape[1] * mask_h)

            submask = np.zeros((mask_w, mask_h), np.uint8)
            submask[mask_y1:mask_y2, mask_x1:mask_x2] = 255

            # compute the bitwise AND using the mask
            masked_mask = cv2.bitwise_and(mask, mask, mask=submask)
            # Resize mask directly to the bounding box size
            mask_resized = cv2.resize(
                masked_mask,
                (test_image.shape[0], test_image.shape[1]),
                interpolation=cv2.INTER_LINEAR
            )
            mask_resized[mask_resized > 200] = 255
            mask_resized[mask_resized <= 200] = 0

            green_masks[:,:,0][mask_resized>0] = 61
            green_masks[:,:,1][mask_resized>0] = 142
            green_masks[:,:,2][mask_resized>0] = 48

    test_image = cv2.addWeighted(test_image, 0.6, green_masks, 0.4, 0)
    cv2.imwrite(IMAGE_DIR + f'/final/yolov8m_{int(NN_THRESHOLD*10)}_' + image_path, cv2.cvtColor(test_image, cv2.COLOR_RGB2BGR))
    
    fig, ax = plt.subplots(figsize=(10, 10))
    _ = ax.imshow(test_image, cmap='gray')


In [None]:
total_granules_det = []
total_iou = []

for image_i, image_path in enumerate(tqdm(processed_data['image_path'])):
    test_image = cv2.imread(image_path)
    test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

    gt_mask = processed_data['semantic_masks'][image_i]

    onnx_input = onnx_preprocessing(
        test_image,
        image_size=(480, 480)
    )
    onnx_input = np.concatenate([onnx_input] * BATCH_SIZE)
    ort_inputs = {ort_session.get_inputs()[0].name: onnx_input}
    ort_outputs = ort_session.run(None, ort_inputs)

    prediction = ort_outputs[0]
    number_of_classes = 1
    mask_index = 4 + number_of_classes
    prediction = prediction[0].transpose()
    pass_boxes_mask = prediction[:, 4:mask_index] >= NN_THRESHOLD
    pass_data = []
    for xi, x in enumerate(prediction):  # image index, image inference
        if pass_boxes_mask[xi][0]:
            xc, yc, w, h, prob = x[:5]
            x1 = xc - w / 2
            y1 = yc - h / 2
            x2 = xc + w / 2
            y2 = yc + h / 2
            masks = x[5:]
            pass_data.append(np.array([x1, y1, x2, y2, prob] + list(masks)))

    pass_data_result = []
    pass_data.sort(key=lambda x: x[4], reverse=True)
    while len(pass_data) > 0:
        pass_data_result.append(pass_data[0])
        pass_data = [
            box for box in pass_data if iou(
                box[:4], pass_data[0][:4]
            ) < NN_THRESHOLD
        ]

    pass_results = np.array(pass_data_result)
    if pass_results.size == 0:
        total_granules_det.append(0)
        total_iou.append(0)
        continue
    masks = pass_results[:, 5:]
    boxes = pass_results[:, :5]

    mask_len, mask_h, mask_w = ort_outputs[1].shape[1:]
    output1 = ort_outputs[1][0].reshape(
        mask_len, mask_h * mask_w
    )
    masks = masks @ output1  # (n, 32) (32, 25600)

    pr_mask = np.zeros(test_image.shape[:2], dtype=np.uint8)
    for bbox_i, bbox in enumerate(pass_data_result):
        mask = masks[bbox_i].reshape(mask_h, mask_w)
        mask = sigmoid(mask)
        mask = (
                       mask > NN_THRESHOLD
               ).astype('uint8') * 255
        mask_x1 = round(bbox[0] / test_image.shape[0] * mask_w)
        mask_y1 = round(bbox[1] / test_image.shape[1] * mask_h)
        mask_x2 = round(bbox[2] / test_image.shape[0] * mask_w)
        mask_y2 = round(bbox[3] / test_image.shape[1] * mask_h)

        submask = np.zeros((mask_w, mask_h), np.uint8)
        submask[mask_y1:mask_y2, mask_x1:mask_x2] = 255

        # compute the bitwise AND using the mask
        masked_mask = cv2.bitwise_and(mask, mask, mask=submask)
        # Resize mask directly to the bounding box size
        mask_resized = cv2.resize(
            masked_mask,
            (test_image.shape[0], test_image.shape[1]),
            interpolation=cv2.INTER_LINEAR
        )
        mask_resized[mask_resized > 200] = 255
        mask_resized[mask_resized <= 200] = 0

        pr_mask[mask_resized>0] = 1
    total_granules_det.append(len(pass_data_result))
    total_iou.append(binary_mask_iou(gt_mask, pr_mask))


granules_deviation = np.abs((np.array(total_granules_det) - np.array(processed_data['granules_number']))) / np.array(processed_data['granules_number']) * 100
granules_detected = (np.array(total_granules_det) / np.array(processed_data['granules_number'])) * 100


print(f"Median and Average IoU for all dataset = {np.median(total_iou)} VS {np.average(total_iou)}, disp = {np.std(total_iou)}")
print(f"Median and Average granules devianion for all dataset = {np.median(granules_deviation)} VS {np.average(granules_deviation)}, disp = {np.std(granules_deviation)}")
print(f"Median and Average granules detected for all dataset = {np.median(granules_detected)} VS {np.average(granules_deviation)}, disp = {np.std(granules_detected)}")