In [4]:
DATA_PREFIX = "../Datasets/Dataset_640x360"
ELEMENTS_MODEL = "../Models/trained/Yolov8n-seg - Elements/best.pt"
TEXT_MODEL = "../Models/trained/Yolov8s - Text/best.pt"
CONTAINER_MODEL = "../Models/trained/CustomSAM - Container/best.pt"
APPLEVEL_MODEL = "../Models/trained/Yolov8s-seg - AppLevel/best.pt"
TOP_MODEL = "../Models/trained/Yolov8s-seg - Top/best.pt"

In [1]:
import copy
import json
import pprint

import matplotlib.pyplot as plt
import numpy as np
import torch
from hierarchy_constructor import labels_to_soms
from mapping import get_all_mapping_data, show_mappings
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
from sklearn.metrics import ConfusionMatrixDisplay
from ultralytics import YOLO
from utils import *

View settings with 'yolo settings' or at 'C:\Users\IWT2-PORT29\AppData\Roaming\Ultralytics\settings.yaml'
Update settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'.


In [12]:
img_path = f"{DATA_PREFIX}/Captura de pantalla (42)_2.png"
img_name = img_path.split("/")[-1]

In [6]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type="yolov8",
    model_path=ELEMENTS_MODEL,
    confidence_threshold=0.4,
)

img = cv2.imread(img_path)
result = get_sliced_prediction(
    img,
    detection_model,
    slice_height=240,
    slice_width=240,
    overlap_height_ratio=0.3,
    overlap_width_ratio=0.3,
    perform_standard_pred=True,
)
anns = result.to_coco_annotations()
shapes = coco_to_labelme(anns, type="bbox", id_start=0)
# shapes = coco_to_labelme(anns, type="seg")
detections = dict()
detections["shapes"] = shapes
detections["imageWidth"] = img.shape[1]
detections["imageHeight"] = img.shape[0]

# Unload model from memory
del detection_model
torch.cuda.empty_cache()

Performing prediction on 8 number of slices.


In [7]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type="yolov8",
    model_path=TEXT_MODEL,
    confidence_threshold=0.4,
)

img = cv2.imread(img_path)
result = get_sliced_prediction(
    img,
    detection_model,
    slice_height=240,
    slice_width=240,
    overlap_height_ratio=0.3,
    overlap_width_ratio=0.3,
    perform_standard_pred=True,
)
anns = result.to_coco_annotations()
shapes = coco_to_labelme(
    anns, type="bbox", id_start=len(detections["shapes"])
)
# shapes = coco_to_labelme(anns, type="seg")
detections["shapes"].extend(shapes)

# Unload model from memory
del detection_model
torch.cuda.empty_cache()

Performing prediction on 8 number of slices.


In [8]:
model = YOLO(CONTAINER_MODEL)

img = cv2.imread(img_path)
result = json.loads(model(img, conf=0.4)[0].tojson())
shapes = json_inference_to_labelme(
    result, type="bbox", id_start=len(detections["shapes"])
)
detections["shapes"].extend(shapes)

# Unload model from memory
del model
torch.cuda.empty_cache()


0: 384x640 1 TabActive, 2 TabInactives, 1 Sidebar, 1 Navbar, 4 Containers, 100.7ms
Speed: 4.0ms preprocess, 100.7ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)


In [9]:
model = YOLO(APPLEVEL_MODEL)

img = cv2.imread(img_path)
result = json.loads(model(img, conf=0.4)[0].tojson())
shapes = json_inference_to_labelme(
    result, type="seg", id_start=len(detections["shapes"])
)
# shapes = json_inference_to_labelme(result, type="seg")
detections["shapes"].extend(shapes)

# Unload model from memory
del model
torch.cuda.empty_cache()


0: 384x640 1 Header, 41.4ms
Speed: 2.0ms preprocess, 41.4ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)


In [10]:
model = YOLO(TOP_MODEL)

img = cv2.imread(img_path)
result = json.loads(model(img, conf=0.4)[0].tojson())
shapes = json_inference_to_labelme(
    result, type="seg", id_start=len(detections["shapes"])
)
# shapes = json_inference_to_labelme(result, type="seg")
detections["shapes"].extend(shapes)

# Unload model from memory
del model
torch.cuda.empty_cache()


0: 384x640 1 Application, 2 Taskbars, 72.3ms
Speed: 3.0ms preprocess, 72.3ms inference, 6.1ms postprocess per image at shape (1, 3, 384, 640)


In [14]:
detections = {img_name: detections}
predicted_soms = labels_to_soms(copy.deepcopy(detections))

100%|██████████| 1/1 [00:00<00:00,  2.36it/s]


In [23]:

def show_mappings(img_path, detected_shapes):
    tint_colors = {}
    for i in range(len(detected_shapes)):
        if detected_shapes[i]["label"] not in tint_colors:
            tint_colors[detected_shapes[i]["label"]] = (
                np.random.randint(0, 255),
                np.random.randint(0, 255),
                np.random.randint(0, 255),
            )

    img = cv2.imread(img_path)
    for i in range(len(detected_shapes)):
        # Show both polygons (labeled and detected)
        cv2.polylines(
            img,
            np.int32([detected_shapes[i]["points"]]),
            True,
            tint_colors[detected_shapes[i]["label"]],
            2,
        )

    cv2.imshow("mappings", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [24]:
show_mappings(
    img_path,
    detections[img_name]["shapes"],
)

In [21]:
pp = pprint.PrettyPrinter(indent=4).pprint

pp(predicted_soms)

{   'Captura de pantalla (42)_2.png': {   'children': [   {   'children': [   {   'children': [   ],
                                                                                  'depth': 2,
                                                                                  'id': 99,
                                                                                  'label': 'Text',
                                                                                  'points': [   [   34.153900146484375,
                                                                                                    0.0],
                                                                                                [   108.00135040283203,
                                                                                                    0.0],
                                                                                                [   108.00135040283203,
                                          

In [20]:
json.dump(
    predicted_soms,
    open(f"predicted_soms.json", "w"),
    indent=4,
    sort_keys=True,
)