In [59]:
from ultralytics import YOLO
import geopandas as gpd
from shapely.geometry import Polygon
import numpy as np
from pathlib import Path
import os
import json
import colorsys
from skimage.measure import find_contours

In [60]:
def collect_files(directory: str, suffixes=['.png', '.tiff', '.tif']):
    directory_path = Path(directory)
    if not directory_path.is_dir():
        raise NotADirectoryError(f"{directory} is not a valid directory")

    if not isinstance(suffixes, (list, tuple)):
        suffixes = [suffixes]

    # collect only depth 1 files
    files = [
        file for file in directory_path.iterdir()
        if file.is_file() and file.suffix.lower() in suffixes
    ]
    return files

In [67]:
# model = YOLO("../../runs/detect/train/weights/best.pt")
model = YOLO("/Users/simon/Downloads/best-loop3-640-deduped-merged.pt")
# model = YOLO("/Users/simon/Downloads/best-onepass-1024.pt")

In [68]:
IMAGES = [
    # '/Users/simon/Downloads/test nuclei/image_02.tif.tif',
    # '/Users/simon/Documents/000_fiit/09_semester/DP/notebooks/pleomorphy-analysis/dp-pleomorphy-analysis/data/processed/yolo-initial-640/yolo_dataset/images/val/tile_0004_2560_0.tif'
    '/Users/simon/Documents/000_fiit/09_semester/DP/notebooks/pleomorphy-analysis/dp-pleomorphy-analysis/data/processed/yolo-larger-1024/yolo_dataset/images/val/tile_0002_2048_0.tif'
]
# IMAGES = collect_files(
#     directory='/Users/simon/Documents/000_fiit/09_semester/DP/notebooks/pleomorphy-analysis/dp-pleomorphy-analysis/data/processed/yolo-initial-640/yolo_dataset/images/val',
#     suffixes=['.tif']
# )

results = model(
    IMAGES,
    imgsz=1024,
    conf=0.05
)




0: 1024x1024 115 podozrivé jadros, 1 referenčná bunky - lymfocyt, 9 hyperchrómne jadros, 276.4ms
Speed: 5.7ms preprocess, 276.4ms inference, 6.3ms postprocess per image at shape (1, 3, 1024, 1024)


In [69]:
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen
    result.save(filename="result-deduped.jpg")  # save to disk

In [67]:
results[0].names[int(results[0].boxes[0].cls.cpu().numpy())]

  results[0].names[int(results[0].boxes[0].cls.cpu().numpy())]


'veľké jadro'

# careful here, saves near the input files

- we should probably allow to specify out_dir
- the geojson is not great for QuPath
  - missing classification
  - all colors are the same
  - no class names

In [70]:
def class_to_color(class_id, num_classes):
    hsv_tuples = [(x / num_classes, 1.0, 1.0) for x in range(num_classes)]

    colors = []
    for hsv in hsv_tuples:
        rgb = colorsys.hsv_to_rgb(*hsv)
        # scale to [0, 255] and convert to integers
        rgb = tuple(int(255 * x) for x in rgb)
        colors.append(rgb)
    
    return colors[class_id]


def yolo_results_to_geojson(results, image_paths, save_dir=None):
    for idx, (result, image_path) in enumerate(zip(results, image_paths)):
        features = []

        boxes = result.boxes
        masks = result.masks
        names = result.names

        if boxes is not None:
            for box in boxes:
                xyxy = box.xyxy.cpu().numpy()  # [x_min, y_min, x_max, y_max]
                if len(xyxy) == 0:
                    continue

                x_min, y_min, x_max, y_max = xyxy[0]
                polygon = Polygon([
                    (x_min, y_min),
                    (x_max, y_min),
                    (x_max, y_max),
                    (x_min, y_max),
                    (x_min, y_min)
                ])
                # get class label and confidence
                class_id = int(box.cls.cpu().numpy()) if box.cls is not None else None
                conf = float(box.conf.cpu().numpy()) if box.conf is not None else None

                features.append({
                    'geometry': polygon,
                    'properties': {
                        'class_id': class_id,
                        'confidence': conf,
                        'type': 'bbox',
                        'image': str(image_path),
                        'classification': json.dumps({
                            'name': names[class_id],
                            'color': class_to_color(class_id=class_id, num_classes=len(names))
                        }),
                    }
                })

        if masks is not None:
            for mask, box in zip(masks.data, masks.boxes):
                mask_np = mask.cpu().numpy()
                # convert mask to polygon(s)
                contours = find_contours(mask_np, 0.5)  # find contours at threshold 0.5
                for contour in contours:
                    # contour is in (row, col) format; convert to (x, y)
                    contour = np.flip(contour, axis=1)  # flip to (x, y)
                    if len(contour) < 3:
                        continue  # skip invalid polygons
                    polygon = Polygon(contour)
                    if not polygon.is_valid:
                        continue  # skip invalid geometries
                    # get class label and confidence from corresponding box
                    class_id = int(box.cls.cpu().numpy()) if box.cls is not None else None
                    conf = float(box.conf.cpu().numpy()) if box.conf is not None else None

                    features.append({
                        'geometry': polygon,
                        'properties': {
                            'class_id': class_id,
                            'confidence': conf,
                            'type': 'mask',
                            'image': str(image_path),
                            'classification': json.dumps({
                                'name': names[class_id],
                                'color': class_to_color(class_id=class_id, num_classes=len(names))
                            }),
                        },
                    })


        if features:
            gdf = gpd.GeoDataFrame(
                [f['properties'] for f in features],
                geometry=[f['geometry'] for f in features]
            )

            image_path = Path(image_path)
            output_path = image_path.with_suffix('.geojson')
            if save_dir is not None:
                os.makedirs(save_dir, exist_ok=True)
                output_path = os.path.join(save_dir, os.path.basename(output_path))

            gdf.to_file(output_path, driver='GeoJSON')
            print(f"GeoJSON for {image_path} saved to {output_path}")
        else:
            print(f"No annotations found for {image_path}")

In [71]:
geojson_polygons = yolo_results_to_geojson(results, IMAGES, save_dir='raw-predict-results-640-deduped')

GeoJSON for /Users/simon/Documents/000_fiit/09_semester/DP/notebooks/pleomorphy-analysis/dp-pleomorphy-analysis/data/processed/yolo-larger-1024/yolo_dataset/images/val/tile_0002_2048_0.tif saved to raw-predict-results-640-deduped/tile_0002_2048_0.geojson


  class_id = int(box.cls.cpu().numpy()) if box.cls is not None else None
  conf = float(box.conf.cpu().numpy()) if box.conf is not None else None
  write(
