<a href="https://colab.research.google.com/github/Ace-Chrono/Coral_Lesion_Measurer/blob/main/Lesion_Measurer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install Packages

In [1]:
!pip install ultralytics
!git clone https://github.com/facebookresearch/sam2.git sam2_repo
%cd sam2_repo
!pip install -e . --no-build-isolation
!pip install ipympl
get_ipython().kernel.do_shutdown(restart=True)

Collecting ultralytics
  Downloading ultralytics-8.3.170-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

{'status': 'ok', 'restart': True}

## Setup

In [1]:
from google.colab import drive, files
import torch
from ultralytics import YOLO
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from google.colab import output
output.enable_custom_widget_manager()
%matplotlib widget
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
from PIL import Image
import datetime
import json
import pandas as pd
import ipywidgets as widgets
from ipywidgets import Button, Output, VBox, HBox, Label
from IPython.display import display, clear_output
import os
import gc
import re
drive.mount('/content/gdrive/')
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(DEVICE)
pvc_bbox_model = YOLO("/content/gdrive/MyDrive/Coral Lesion Measurer Folder/ML Models/YOLOV11_PVC.pt")
lesion_bbox_model = YOLO("/content/gdrive/MyDrive/Coral Lesion Measurer Folder/ML Models/YOLOV11_Lesion.pt")
ruler_bbox_model = YOLO("/content/gdrive/MyDrive/Coral Lesion Measurer Folder/ML Models/YOLOV11_Ruler.pt")
sam_location = "/content/gdrive/MyDrive/Coral Lesion Measurer Folder/ML Models/sam2.1_hiera_large.pt"
sam_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml"
lesion_sam_model = SAM2ImagePredictor(build_sam2(sam_cfg, sam_location))

def open_image(image_path):
    image = Image.open(image_path)
    image_np = np.array(image)
    height, width, channels = image_np.shape
    return image, height, width

def image_info(image_name):
    date = None
    repetition = None
    coral_id = None

    # Extract and remove repetition (e.g., (2)) at the end
    repetition_match = re.search(r'\((\d+)\)\s*$', image_name)
    if repetition_match:
        repetition = int(repetition_match.group(1))
        image_name = re.sub(r'\s*\(\d+\)\s*$', '', image_name)

    # Match date with separators: _, ., or space (e.g., 2023_06_05, 2023.06.05, 2023 06 05)
    date_match = re.search(r'(\d{4})[_.\s](\d{2})[_.\s](\d{2})', image_name)
    if date_match:
        date = f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}"

        # Get the part before the date
        pre_date = image_name[:date_match.start()].rstrip(' _.')

        # Extract last coral ID-like token (e.g., LC_007_ab → LC_007)
        coral_id_match = re.search(r'([A-Z]+[_-]\d+)', pre_date)
        if coral_id_match:
            coral_id = coral_id_match.group(1)

    return coral_id, date, repetition

def get_conversion_ratio(image, image_name):
    try:
        results = ruler_bbox_model.predict(image, verbose=False)
        if len(results) > 0 and len(results[0].boxes) > 0:
            bboxes = results[0].boxes
            if len(bboxes.xyxy) > 0:
                x_min, y_min, x_max, y_max = bboxes.xyxy[0].tolist()
                width = x_max - x_min
                height = y_max - y_min
                conversion_ratio = max(width, height) / 30.5
                return conversion_ratio, bboxes.xyxy[0].tolist()
    except Exception as e:
        print(f"Error in ruler detection for {image_name}: {str(e)}")

    # No detection
    return None, None

def get_pvc_ratio(image, image_name):
    try:
        results = pvc_bbox_model.predict(image, verbose=False)
        if len(results) > 0 and len(results[0].boxes) > 0:
            bboxes = results[0].boxes
            if len(bboxes.xyxy) > 0:
                x_min, y_min, x_max, y_max = bboxes.xyxy[0].tolist()
                width = x_max - x_min
                height = y_max - y_min
                conversion_ratio = max(width, height) / 7.15
                return conversion_ratio, bboxes.xyxy[0].tolist()
    except Exception as e:
        print(f"Error in PVC detection for {image_name}: {str(e)}")
    return None, None

def run_yolo_lesion(image):
    results = lesion_bbox_model.predict(image, verbose=False)
    for result in results:
        bboxes = result.boxes
        bboxes = bboxes.xyxy.tolist()
    return bboxes

def run_sam(image, bboxes):
    bboxes_np = []
    for bbox in bboxes:
        bbox_np = np.array(bbox)
        bboxes_np.append(bbox_np)
    input_boxes = np.array(bboxes_np).astype(np.float32)
    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
        lesion_sam_model.set_image(np.array(image))
        masks, _, _ = lesion_sam_model.predict(
            point_coords=None,
            point_labels=None,
            box=input_boxes,               # shape: (N, 4)
            multimask_output=False,
        )
    return masks

def overlap_filtering(bboxes, masks, iou_thresh=0.8):
    keep = [True] * len(masks)

    for i in range(len(masks)):
        for j in range(len(masks)):
            if i == j or not keep[j]:
                continue

            # Extract the (H, W) masks from (1, H, W)
            mask_i = masks[i][0].astype(np.uint8)
            mask_j = masks[j][0].astype(np.uint8)

            intersection = np.logical_and(mask_i, mask_j).sum()
            area_j = mask_j.sum()

            if area_j == 0:
                continue

            overlap_ratio = intersection / area_j

            # Remove smaller mask if it's mostly inside the larger one
            if overlap_ratio > iou_thresh and mask_i.sum() > mask_j.sum():
                keep[j] = False

    filtered = [(bbox, mask) for k, (bbox, mask) in enumerate(zip(bboxes, masks)) if keep[k]]
    return filtered

def masks_to_polygons(masks):  # Creates polygons from a list of SAM masks
    all_polygons = []

    for mask in masks:
        mask = np.squeeze(mask)  # Ensures (H, W)

        if mask is None:
            raise ValueError(f"Mask {i} is None.")
        if mask.ndim != 2:
            raise ValueError(f"Mask {i} must be 2D after squeeze, got shape {mask.shape}")
        if mask.shape[0] == 0 or mask.shape[1] == 0:
            raise ValueError(f"Mask {i} has invalid shape {mask.shape}")
        if not np.any(mask):
            continue  # Skip empty masks

        # Convert mask to binary if it's not already
        if mask.max() > 1:
            _, binary_mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
        else:
            binary_mask = (mask * 255).astype(np.uint8)

        # Find contours
        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        polygons = []
        for contour in contours:
            # Simplify the contour to reduce the number of points
            epsilon = 0.001 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)

            # Extract points and flatten the list
            polygon = approx.reshape(-1, 2).tolist()
            flat_polygon = [point for sublist in polygon for point in sublist]
            polygons.append(flat_polygon)

        all_polygons.append(polygons)

    return all_polygons

def get_perimeter(all_polygons, conversion_ratio):
    perimeters = []

    for polygon in all_polygons:  # each object’s list of polygons
        total_perimeter_px = 0

        for points in polygon:
            if len(points) < 6:
                continue  # must have at least 3 points

            try:
                # Force correct shape and dtype (OpenCV requires float32 or int32)
                contour = np.array(points, dtype=np.float32).reshape(-1, 1, 2)
                perimeter = cv2.arcLength(contour, True)
                perimeter_cm = perimeter / conversion_ratio
                total_perimeter_px += perimeter_cm

            except Exception as e:
                continue

        perimeter_um = perimeter_cm * 10_000
        perimeters.append(perimeter_um)

    return perimeters

def get_perimeter_list(all_polygons):
    all_perimeters = []

    for polygon in all_polygons:
        perimeter_points = []
        for points in polygon:
            if len(points) < 6:
              continue
            for i in range(0, len(points), 2):
                perimeter_points.append(points[i:i+2])
        all_perimeters.append(perimeter_points)

    return all_perimeters

def get_areas_and_centers(masks, bboxes, conversion_ratio):
    areas = []
    centers = []
    for mask in masks:
        area = np.count_nonzero(mask)
        area_um2 = area * ((1 / conversion_ratio)*10000)** 2
        areas.append(area_um2)
    for bbox in bboxes:
        x_min, y_min, x_max, y_max = bbox
        center_x = (x_min + x_max) / 2
        center_y = (y_min + y_max) / 2
        centers.append((center_x, center_y))  # (x, y) format
    return areas, centers

def get_areas_and_centers_from_polygons(polygons_per_obj, bboxes, conversion_ratio):
    areas = []
    centers = []

    for poly_list in polygons_per_obj:  # poly_list = list of polygons for one object
        total_area_px = 0

        for poly in poly_list:
            if not poly:
                continue
            coords = np.array(poly).reshape(-1, 2)
            x = coords[:, 0]
            y = coords[:, 1]
            area_px = 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
            total_area_px += area_px

        area_um2 = total_area_px * ((1 / conversion_ratio) * 10000) ** 2
        areas.append(area_um2)

    for bbox in bboxes:
        x_min, y_min, x_max, y_max = bbox
        center_x = (x_min + x_max) / 2
        center_y = (y_min + y_max) / 2
        centers.append((center_x, center_y))

    return areas, centers

def output_image_cv(image, conversion_ratio, conversion_bbox, bboxes, masks, segmentations, areas, centers, image_output_path):
    # Convert PIL Image to NumPy array if necessary
    if not isinstance(image, np.ndarray):
        image = np.array(image)

    img = image.copy()

    # Convert RGB to BGR for OpenCV display
    if img.shape[-1] == 3:  # Check for color image
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    if conversion_ratio:
        ratio_text = f"Conversion Ratio: {conversion_ratio:.0f} pixels/cm"  # Format as needed
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1
        thickness = 1
        text_color = (255, 255, 255)
        text_size, _ = cv2.getTextSize(ratio_text, font, font_scale, thickness)
        text_w, text_h = text_size
        if conversion_bbox:
            x1, y1, x2, y2 = map(int, conversion_bbox)
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            x = (x1 + x2) // 2
            y = (y1 + y2) // 2
            cv2.rectangle(img, (x, y - text_h), (x + text_w, y), (0, 0, 0), -1)
            cv2.putText(img, ratio_text, (x, y - 2), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
        else:
            x, y = 10, 20
            cv2.rectangle(img, (x, y - text_h), (x + text_w, y), (0, 0, 0), -1)
            cv2.putText(img, ratio_text, (x, y - 2), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)


    for box in bboxes:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

    if masks:
        for mask in masks:
            if mask.dtype != np.uint8:
                mask = (mask * 255).astype(np.uint8)

            if len(mask.shape) == 3:
                mask = mask.squeeze()

            # Create colored overlay
            color_mask = np.zeros_like(img, dtype=np.uint8)
            color_mask[:, :, 0] = 255  # Blue in BGR
            alpha = 0.2

            # Create 3-channel mask and blend it
            mask_3ch = np.stack([mask]*3, axis=-1)
            img = np.where(mask_3ch, (1 - alpha) * img + alpha * color_mask, img).astype(np.uint8)


    # Draw segmentation polygons (red outlines)
    for polygons in segmentations:
        formatted_polygons = [np.array(polygon, dtype=np.int32).reshape(-1, 2) for polygon in polygons]
        for polygon in formatted_polygons:
            cv2.polylines(img, [polygon], isClosed=True, color=(0, 0, 255), thickness=2)

    # Draw area annotations (white text with black background)
    for i, (area, center) in enumerate(zip(areas, centers)):
        x, y = map(int, center)
        text = f"{area:.2f} um^2"
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.6
        thickness = 1
        text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
        text_w, text_h = text_size

        # Draw background rectangle
        cv2.rectangle(img, (x, y - text_h), (x + text_w, y), (0, 0, 0), -1)
        # Put text
        cv2.putText(img, text, (x, y - 2), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)

    # Save image
    cv2.imwrite(image_output_path, img)

def output_csv(folder_name, image_name, lesion_count, conversion_ratio, areas, perimeters, csv_output_path):
    areas_str = "; ".join(f"{float(a):.2f}" for a in areas)
    perimeters_str = "; ".join(f"{float(p):.2f}" for p in perimeters)
    conversion_ratio_str = f"{float(conversion_ratio):.6f}"

    new_row = {
        "Folder": folder_name,
        "Image Name": image_name,
        "# Lesions": lesion_count,
        "Pixels Per um": conversion_ratio_str,
        "um^2": areas_str,
        "Perimeters": perimeters_str
        }
    new_row_df = pd.DataFrame([new_row], columns=csv_columns)
    new_row_df.to_csv(csv_output_path, mode='a', header=not os.path.exists(csv_output_path), index=False)

def output_ara(ara_location, file_directory, image_name, lesion_count, conversion_ratio, areas, perimeters, perimeters_list, height, width):
    for i in range(0, len(areas)):
        areas[i] = areas[i]/100000000
        areas[i] = float(areas[i])
        perimeters[i] = float(perimeters[i])

    folder_path = file_directory + "\\" + image_name

    if image_name.endswith(".JPG"):
        ara_file = image_name.replace(".JPG", ".ara")
    else:
        ara_file = image_name.replace(".jpg", ".ara")

    ara_file = os.path.join(ara_location, ara_file)

    with open(ara_file, "w") as file:
        file.write(f"\"{folder_path}\"\r\n")
        file.write(f"{conversion_ratio:.4f},{((height*width)/(conversion_ratio**2)):.4f},\"cm\"\r\n")
        file.write(f"{len(areas)}\r\n")

        for i in range(0, len(areas)):
            file.write(f"{(areas[i]):.4f},{14671839},{len(perimeters_list[i])}\r\n")
            for j in perimeters_list[i]:
                file.write(f"{','.join(map(str, j))}\r\n")

        file.write(f"5,5,\"Pixels/cm: {conversion_ratio:.4f}\"\r\n")
        file.write(f"{len(areas)},{8}\r\n")
        file.write(f"\"AREA #\"\r\n\"SPECIES\"\r\n\"SPECIES CODE\"\r\n\"MASTER AREA\"\r\n\"AREA\"\r\n\"INT. AREA\"\r\n\"EXT. AREA\"\r\n\"NET AREA\"\r\n\"COMMENTS\"\r\n")

        for a in range(0,len(areas)):
            file.write(f"\"{a+1}\"\r\n\"\"\r\n\"\"\r\n\"{(areas[a]):.4f}\"\r\n\"{(areas[a]):.4f}\"\r\n\"\"\r\n\"\"\r\n\"{(areas[a]):.4f}\"\r\n\"\"\r\n")

        file.write(f"{855}\r\n{2040}\r\n{1515}\r\n{1500}\r\n{1275}\r\n{1275}\r\n{1275}\r\n{1275}\r\n{14475}\r\n{0}\r\n{0}\r\n")

        for a in range(0, len(areas)):
            file.write(f"{0}\r\n")

        for a in range(0, len(areas)):
            file.write(f"0,0,0,0,0,\"\"\r\n")

        file.write(f"\"\"\r\n\"\"\r\n\"\"\r\n\"\"\r\n\"\"\r\n\"\"\r\n\"\"\r\n")

def append_row_to_excel(file_path, new_row_dict):
    if os.path.exists(file_path):
        df_existing = pd.read_excel(file_path)
        # Check if date already exists
        if new_row_dict['Date'] in df_existing['Date'].values:
            #print(f"Skipping duplicate date {new_row_dict['Date']} in {file_path}")
            return
        df_existing = pd.concat([df_existing, pd.DataFrame([new_row_dict])], ignore_index=True)
        df_existing = df_existing.sort_values(by='Date')
        df_existing.to_excel(file_path, index=False)
    else:
        pd.DataFrame([new_row_dict]).to_excel(file_path, index=False)

def output_conversion_ratios(path, conversion_ratios):
     with open(path, 'w') as f:
        json.dump(conversion_ratios, f, indent=4)

def get_metadata(index, image_name, height, width, bboxes, segmentations):
    image_info = {
        "id": index,
        "license": 1,
        "file_name": image_name,
        "height": height,
        "width": width,
        "date_captured": datetime.datetime.now().isoformat()
    }
    annotations = []
    for annotation_id, (bbox, segmentation) in enumerate(zip(bboxes, segmentations)):
        x_min, y_min, x_max, y_max = bbox
        width_box = x_max - x_min
        height_box = y_max - y_min
        area = width_box * height_box

        annotation_info = {
            "id": index * 1000 + annotation_id,  # ensures uniqueness
            "image_id": index,
            "category_id": 1,
            "bbox": [x_min, y_min, width_box, height_box],
            "area": area,
            "segmentation": segmentation,
            "iscrowd": 0
        }
        annotations.append(annotation_info)
    return image_info, annotations

def output_coco_json(image_info_list, annotations_list, output_path):
    coco_dict = {
        "info": {
            "description": "Coral Dataset",
            "version": "1.0",
            "year": 2025,
            "contributor": "Richard Zhao",
            "date_created": datetime.datetime.now().isoformat()
        },
        "licenses": [
            {
                "id": 1,
                "name": "Attribution-NonCommercial-ShareAlike License",
                "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
            }
        ],
        "images": image_info_list,
        "annotations": annotations_list,
        "categories": [
            {
                "id": 1,
                "name": "coral_lesion",
                "supercategory": "marine_life"
            }
        ]
    }

    # Save to JSON file
    with open(output_path, "w") as f:
        json.dump(coco_dict, f, indent=4)

class ClickCollector:
    def __init__(self, image, image_name="Image", on_done=None):
        self.image = image
        self.image_name = image_name
        self.on_done = on_done
        self.coords = []
        self.line = None
        self.dots = []
        self.click_mode = False

        self.out = Output()

        # Setup plot
        with self.out:
            self.fig, self.ax = plt.subplots(figsize=(8,6))
            self.ax.imshow(self.image)
            self.ax.set_title(f"{self.image_name}")
            self.cid = self.fig.canvas.mpl_connect('button_press_event', self.onclick)
            plt.show()

        # Buttons
        self.click_button = Button(description="Enable Click Mode", button_style='primary')
        self.click_button.on_click(self.toggle_click_mode)

        self.clear_button = Button(description="Clear", button_style='danger')
        self.clear_button.on_click(self.clear)

        self.finish_button = Button(description="Finish", button_style='success')
        self.finish_button.on_click(self.finish)

        self.reload_button = Button(description="Reload Image", button_style='info')
        self.reload_button.on_click(self.reload_image)

        # Distance input & submit button, hidden initially
        self.dist_input = widgets.FloatText(description="Real distance (cm):")
        self.dist_submit = widgets.Button(description="Submit distance")
        self.dist_submit.on_click(self.submit_distance)
        self.dist_input.layout.display = 'none'
        self.dist_submit.layout.display = 'none'

        # Manual ratio input & submit button
        self.ratio_input = widgets.FloatText(description="Pixels/cm:")
        self.ratio_submit = widgets.Button(description="Use Ratio Directly")
        self.ratio_submit.on_click(self.submit_ratio_directly)

        self.ui = VBox([ #Make ui more user friendly
            self.out,
            HBox([self.click_button, self.clear_button, self.finish_button, self.reload_button]),
            VBox([
                Label("Option 1: Calibrate by clicking two points on the image"),
                self.dist_input, self.dist_submit,
                Label("Option 2: Or enter pixels per cm directly"),
                self.ratio_input, self.ratio_submit
            ])
        ])
        display(self.ui)

    def toggle_click_mode(self, b):
        self.click_mode = not self.click_mode
        if self.click_mode:
            self.click_button.description = "Click Mode: ON (Click Image)"
            self.click_button.button_style = 'warning'
        else:
            self.click_button.description = "Click Mode: OFF"
            self.click_button.button_style = 'primary'

    def onclick(self, event):
        if not self.click_mode or event.inaxes != self.ax:
            return
        x, y = event.xdata, event.ydata
        self.coords.append((x, y))
        self.draw_dot(x, y)
        if len(self.coords) == 2:
            self.draw_line()
            self.toggle_click_mode(None)

    def draw_dot(self, x, y):
        dot = self.ax.plot(x, y, 'ro', markersize=6)[0]
        self.dots.append(dot)
        self.fig.canvas.draw()

    def draw_line(self):
        x_vals = [self.coords[0][0], self.coords[1][0]]
        y_vals = [self.coords[0][1], self.coords[1][1]]
        if self.line:
            self.line.remove()
        self.line, = self.ax.plot(x_vals, y_vals, 'r-', linewidth=2)
        self.fig.canvas.draw()

    def clear(self, b):
        # Clear all dots
        for dot in self.dots:
            dot.remove()
        self.dots = []

        # Clear line
        if self.line:
            self.line.remove()
            self.line = None

        self.coords = []
        self.fig.canvas.draw()

    def finish(self, b):
        if len(self.coords) != 2:
            with self.out:
                print("Please click exactly 2 points before finishing.")
            return
        with self.out:
            print(f"✅ Line set from {self.coords[0]} to {self.coords[1]}")
            print("Please enter the real-world distance (cm) below:")
        # Show distance input widgets
        self.dist_input.layout.display = None
        self.dist_submit.layout.display = None
        self.ratio_input.layout.display = None
        self.ratio_submit.layout.display = None

    def submit_distance(self, b):
        dist = self.dist_input.value
        if dist <= 0:
            with self.out:
                print("Distance must be positive.")
            return
        pixel_dist = np.linalg.norm(np.array(self.coords[0]) - np.array(self.coords[1]))
        ratio = pixel_dist / dist
        with self.out:
            print(f"➡️ Pixel distance: {pixel_dist:.2f}")
            print(f"➡️ Real distance: {dist:.2f} cm")
            print(f"➡️ Conversion ratio: {ratio:.2f} pixels/cm")
        if self.on_done:
            self.on_done(ratio)
            plt.close(self.fig) #May cause the image to be blank, need to check, maybe add a 4th button to reload the plot

    def submit_ratio_directly(self, b):
        ratio = self.ratio_input.value
        if ratio <= 0:
            with self.out:
                print("Ratio must be positive.")
            return
        with self.out:
            print(f"➡️ Using manually entered ratio: {ratio:.2f} pixels/cm")
        if self.on_done:
            self.on_done(ratio)
            plt.close(self.fig)

    def reload_image(self, b):
        # Clear previous plot
        plt.close(self.fig)
        self.coords = []
        self.line = None
        self.dots = []

        with self.out:
            clear_output(wait=True)  # Clear previous output safely
            self.fig, self.ax = plt.subplots(figsize=(8,6))
            self.ax.imshow(self.image)
            self.ax.set_title(f"{self.image_name}")
            self.cid = self.fig.canvas.mpl_connect('button_press_event', self.onclick)
            plt.show()

        # Reset button states
        self.click_button.description = "Enable Click Mode"
        self.click_button.button_style = 'primary'
        self.click_mode = False

def extract_coco_info(coco_data):
    # Step 1: Map image_id → original filename
    image_id_to_name = {}
    for img in coco_data["images"]:
        original_name = img.get("extra", {}).get("name")
        if original_name:
            image_id_to_name[img["id"]] = original_name

    # Step 2: Build result dict: original_name → list of annotations
    annotations_by_filename = {}
    for ann in coco_data["annotations"]:
        image_id = ann["image_id"]
        original_name = image_id_to_name.get(image_id)
        if original_name:
            annotations_by_filename.setdefault(original_name, []).append(ann)

    return annotations_by_filename

def get_original_filename(coco_data, roboflow_filename):
    for image in coco_data["images"]:
        if image["file_name"] == roboflow_filename:
            name = image.get("extra", {}).get("name")
            return name
    return None

def format_original_filename(name):
    new_name = re.sub(r'-(\d+)-', r' (\1)', name)
    return new_name

def extract_conversion_ratios_json(path):
    with open(path, 'r') as f:
        data = json.load(f)
    return data

def convert_coco_to_xyxy(bboxes):
    converted = []
    for bbox in bboxes:
        x_min, y_min, width, height = bbox
        x_max = x_min + width
        y_max = y_min + height
        converted.append([x_min, y_min, x_max, y_max])
    return converted

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Mounted at /content/gdrive/
2.6.0+cu124
cuda:0


## Run Measurer

In [2]:
image_input_root = "/content/gdrive/MyDrive/Coral Lesion Measurer Folder/Input/"

image_input_path = [
    os.path.join(root, d)
    for root, dirs, _ in os.walk(image_input_root)
    for d in dirs
]
image_input_path.append(image_input_root)

image_output_path = "/content/gdrive/MyDrive/Coral Lesion Measurer Folder/Output/"
csv_output_path = image_output_path + "/coral_lesion_data.csv"
csv_columns = ["Folder", "Image Name", "# Lesions", "Pixels Per um", "um^2", "Perimeters"]

areas_folder = os.path.join(image_output_path, "areas")
ara_folder = os.path.join(image_output_path, "ara")
os.makedirs(ara_folder, exist_ok=True)
error_folder = os.path.join(image_output_path, "error")
os.makedirs(error_folder, exist_ok=True)
file_directory = input("Enter directory where unnanotated coral lesion files are located (i.e. C:\\Users\\myself\\Downloads\\folder): ") #Make a big print to explain the weird aspect
perimeters_folder = os.path.join(image_output_path, "perimeters")
os.makedirs(areas_folder, exist_ok=True)
os.makedirs(perimeters_folder, exist_ok=True)

json_output_path = os.path.join(image_output_path, "_annotations.coco.json")
conversion_ratios_output_path = os.path.join(image_output_path, "conversion_ratios.json")

pending_conversion = {}
conversion_ratios = {}
image_metadata_list = []
annotation_metadata_list = []
image_index = 0

print("Starting Processing...")

for index, folder in enumerate(image_input_path):
    folder_name = os.path.basename(folder.rstrip('/'))
    for file in os.listdir(folder):
        if file.lower().endswith(('.jpg', '.jpeg')):
            old_image_path = os.path.join(folder, file)
            image_name, _ = os.path.splitext(file)
            new_image_path = os.path.join(image_output_path, file)
            image, height, width = open_image(old_image_path)

            conversion_ratio, conversion_bbox = get_pvc_ratio(image, file)
            if conversion_ratio is None:
                conversion_ratio, conversion_bbox = get_conversion_ratio(image, file)
                if conversion_ratio is None:
                    pending_conversion[file] = (image, height, width, old_image_path)
                    continue
            conversion_ratios[file] = conversion_ratio

            bboxes_lesion = run_yolo_lesion(image)
            if not bboxes_lesion:
                !cp "{old_image_path}" "{error_folder}"
                print("No BBoxes in " + file + "\n")
                continue
            masks_lesion = run_sam(image, bboxes_lesion)
            filtered = overlap_filtering(bboxes_lesion, masks_lesion, iou_thresh=0.85)
            filtered_bboxes, filtered_masks = zip(*filtered) if filtered else ([], [])
            segmentations = masks_to_polygons(filtered_masks)
            perimeters = get_perimeter(segmentations, conversion_ratio)
            perimeter_points = get_perimeter_list(segmentations)
            areas, centers = get_areas_and_centers_from_polygons(segmentations, filtered_bboxes, conversion_ratio)
            id, date, repetition = image_info(image_name)

            area_file = os.path.join(areas_folder, f"{id}_areas.xlsx")
            perim_file = os.path.join(perimeters_folder, f"{id}_perimeters.xlsx")
            output_ara(ara_folder, file_directory, file, len(masks_lesion), conversion_ratio, areas, perimeters, perimeter_points, height, width)

            area_row = {'Date': date}
            for i, a in enumerate(areas):
                area_row[f'Area {i+1}'] = float(f"{a:.2f}")

            perim_row = {'Date': date}
            for i, p in enumerate(perimeters):
                perim_row[f'Perimeter {i+1}'] = float(f"{p:.2f}")

            append_row_to_excel(area_file, area_row)
            append_row_to_excel(perim_file, perim_row)

            output_image_cv(image, conversion_ratio, conversion_bbox, filtered_bboxes, filtered_masks, segmentations, areas, centers, new_image_path)
            output_csv(folder_name, file, len(masks_lesion), conversion_ratio, areas, perimeters, csv_output_path)

            image_metadata, annotation_metadata = get_metadata(image_index, file, height, width, bboxes_lesion, segmentations)
            image_metadata_list.append(image_metadata)
            annotation_metadata_list.extend(annotation_metadata)

            print(f"Processed '{file}': Conversion ratio = {conversion_ratio:.2f} pixels/cm, "
                f"Lesions detected = {len(bboxes_lesion)}, "
                f"Areas calculated = {len(areas)}, Perimeters calculated = {len(perimeters)}")

            image_index += 1
            gc.collect()
            torch.cuda.empty_cache()

if image_metadata_list and annotation_metadata_list and conversion_ratio and not pending_conversion:
    output_coco_json(image_metadata_list, annotation_metadata_list, json_output_path)
    output_conversion_ratios(conversion_ratios_output_path, conversion_ratios)

print("Successfully Finished Processing Automatic Images")

resolved_ratios = {}
pending_items = list(pending_conversion.items())
current_idx = 0
main_output = widgets.Output()
display(main_output)

def process_next():
    global current_idx
    if current_idx >= len(pending_items):
        with main_output:
            clear_output()
            print("All manual ratios collected. Restarting Processing...")
        run_analysis()
        return

    image_name, (image, height, width, old_image_path) = pending_items[current_idx]
    current_idx += 1

    def handle_ratio(ratio):
        resolved_ratios[image_name] = ratio
        process_next()

    with main_output:
        clear_output(wait=True)
        ClickCollector(image, image_name, on_done=handle_ratio)

def run_analysis():
    global image_index
    for image_name, conversion_ratio in resolved_ratios.items():
        if conversion_ratio is None:
            continue
        conversion_ratios[image_name] = conversion_ratio

        image, height, width, old_image_path = pending_conversion[image_name]
        new_image_path = os.path.join(image_output_path, image_name)

        bboxes_lesion = run_yolo_lesion(image)
        if not bboxes_lesion:
              !cp "{old_image_path}" "{error_folder}"
              print("No BBoxes in " + file + "\n")
              continue
        masks_lesion = run_sam(image, bboxes_lesion)
        filtered = overlap_filtering(bboxes_lesion, masks_lesion, iou_thresh=0.85)
        filtered_bboxes, filtered_masks = zip(*filtered) if filtered else ([], [])
        segmentations = masks_to_polygons(filtered_masks)
        perimeters = get_perimeter(segmentations, conversion_ratio)
        areas, centers = get_areas_and_centers(filtered_masks, filtered_bboxes, conversion_ratio)
        id, date, repetition = image_info(image_name)

        # Excel
        area_file = os.path.join(areas_folder, f"{id}_areas.xlsx")
        perim_file = os.path.join(perimeters_folder, f"{id}_perimeters.xlsx")

        area_row = {'Date': date}
        for i, a in enumerate(areas):
            area_row[f'Area {i+1}'] = float(f"{a:.2f}")

        perim_row = {'Date': date}
        for i, p in enumerate(perimeters):
            perim_row[f'Perimeter {i+1}'] = float(f"{p:.2f}")

        append_row_to_excel(area_file, area_row)
        append_row_to_excel(perim_file, perim_row)

        output_image_cv(image, conversion_ratio, None, filtered_bboxes, filtered_masks, segmentations, areas, centers, new_image_path)
        output_csv("manual_conversion", image_name, len(masks_lesion), conversion_ratio, areas, perimeters, csv_output_path)

        image_metadata, annotation_metadata = get_metadata(image_index, image_name, height, width, bboxes_lesion, segmentations)
        image_metadata_list.append(image_metadata)
        annotation_metadata_list.extend(annotation_metadata)

        with main_output:
            print(f"Processed '{image_name}': Conversion ratio = {conversion_ratio:.2f} pixels/cm, "
                f"Lesions detected = {len(bboxes_lesion)}, "
                f"Areas calculated = {len(areas)}, Perimeters calculated = {len(perimeters)}")

        image_index += 1
        gc.collect()
        torch.cuda.empty_cache()

    if image_metadata_list and annotation_metadata_list and conversion_ratios:
        output_coco_json(image_metadata_list, annotation_metadata_list, json_output_path)
        output_conversion_ratios(conversion_ratios_output_path, conversion_ratios)

    with main_output:
        print("Successfully Finished Processing Manual Images")

if pending_conversion:
    process_next()

Enter directory where unnanotated coral lesion files are located (i.e. C:\Users\myself\Downloads\folder): C:\Users\myself\Downloads\folder
Starting Processing...
Processed 'LC-003_2022_03_02 (2).JPG': Conversion ratio = 67.72 pixels/cm, Lesions detected = 11, Areas calculated = 11, Perimeters calculated = 11
Processed 'RRC_S2_ECA_LC-003_ab_2021_11_29 (2).JPG': Conversion ratio = 68.03 pixels/cm, Lesions detected = 5, Areas calculated = 5, Perimeters calculated = 5
Processed 'LC-013 2023.05.03.JPG': Conversion ratio = 69.27 pixels/cm, Lesions detected = 2, Areas calculated = 2, Perimeters calculated = 2
Processed 'LC-016 2023.05.03.JPG': Conversion ratio = 69.61 pixels/cm, Lesions detected = 6, Areas calculated = 5, Perimeters calculated = 5
Successfully Finished Processing Automatic Images


Output()

## Run Reprocessing

In [None]:
adjusted_input_root = "/content/gdrive/MyDrive/Coral Lesion Measurer Folder/Reprocessing/"

adjusted_input_path = [
    os.path.join(root, d)
    for root, dirs, _ in os.walk(adjusted_input_root)
    for d in dirs
]
adjusted_input_path.append(adjusted_input_root)

conversion_ratios_input_path = "/content/gdrive/MyDrive/Coral Lesion Measurer Folder/Reprocessing/conversion_ratios.json"

image_output_path = "/content/gdrive/MyDrive/Coral Lesion Measurer Folder/Output/"
csv_output_path = image_output_path + "/coral_lesion_data.csv"
csv_columns = ["Folder", "Image Name", "# Lesions", "Pixels Per um", "um^2", "Perimeters"]

areas_folder = os.path.join(image_output_path, "areas")
perimeters_folder = os.path.join(image_output_path, "perimeters")
os.makedirs(areas_folder, exist_ok=True)
os.makedirs(perimeters_folder, exist_ok=True)

json_output_path = os.path.join(image_output_path, "_annotations.coco.json")
conversion_ratios_output_path = os.path.join(image_output_path, "conversion_ratios.json")

image_metadata_list = []
annotation_metadata_list = []
image_index = 0

print("Starting Reprocessing...")

for index, folder in enumerate(adjusted_input_path):
    folder_name = os.path.basename(folder.rstrip('/'))
    coco_path = os.path.join(folder, "_annotations.coco.json")
    if not os.path.isfile(coco_path):
        continue
    with open(coco_path, 'r') as f:
        coco_data = json.load(f)
    annotations_by_filename = extract_coco_info(coco_data)
    conversion_ratios = extract_conversion_ratios_json(conversion_ratios_input_path)

    for file in os.listdir(folder):
        if file.lower().endswith(('.jpg', '.jpeg')):
            old_image_path = os.path.join(folder, file)
            image_name = get_original_filename(coco_data, file)
            formatted_image_name = format_original_filename(image_name)
            new_image_path = os.path.join(image_output_path, formatted_image_name)
            image, height, width = open_image(old_image_path)

            conversion_ratio = conversion_ratios[formatted_image_name]

            annotations = annotations_by_filename.get(image_name)
            annotations = [a for a in annotations if a.get("segmentation")]
            bboxes_lesion = [a["bbox"] for a in annotations]
            bboxes_lesion = convert_coco_to_xyxy(bboxes_lesion)
            segmentations = [a["segmentation"] for a in annotations]
            areas = [a["area"] for a in annotations]
            perimeters = get_perimeter(segmentations, conversion_ratio)
            areas, centers = get_areas_and_centers_from_polygons(segmentations, bboxes_lesion, conversion_ratio)
            id, date, repetition = image_info(image_name)

            area_file = os.path.join(areas_folder, f"{id}_areas.xlsx")
            perim_file = os.path.join(perimeters_folder, f"{id}_perimeters.xlsx")

            area_row = {'Date': date}
            for i, a in enumerate(areas):
                area_row[f'Area {i+1}'] = float(f"{a:.2f}")

            perim_row = {'Date': date}
            for i, p in enumerate(perimeters):
                perim_row[f'Perimeter {i+1}'] = float(f"{p:.2f}")

            append_row_to_excel(area_file, area_row)
            append_row_to_excel(perim_file, perim_row)

            output_image_cv(image, conversion_ratio, None, bboxes_lesion, None, segmentations, areas, centers, new_image_path)
            output_csv(folder_name, formatted_image_name, len(areas), conversion_ratio, areas, perimeters, csv_output_path)

            image_metadata, annotation_metadata = get_metadata(image_index, formatted_image_name, height, width, bboxes_lesion, segmentations)
            image_metadata_list.append(image_metadata)
            annotation_metadata_list.extend(annotation_metadata)

            print(f"Processed '{formatted_image_name}': Conversion ratio = {conversion_ratio:.2f} pixels/cm, "
                f"Lesions detected = {len(bboxes_lesion)}, "
                f"Areas calculated = {len(areas)}, Perimeters calculated = {len(perimeters)}")

            image_index += 1
            gc.collect()
            torch.cuda.empty_cache()

if image_metadata_list and annotation_metadata_list and conversion_ratios:
    output_coco_json(image_metadata_list, annotation_metadata_list, json_output_path)
    output_conversion_ratios(conversion_ratios_output_path, conversion_ratios)

print("Successfully Finished Reprocessing Images")

Starting Reprocessing...
Processed 'LC-077_2022_04_08 (1).JPG': Conversion ratio = 65.72 pixels/cm, Lesions detected = 10, Areas calculated = 10, Perimeters calculated = 10
Processed 'MC-011_2022_04_08 (1).JPG': Conversion ratio = 65.96 pixels/cm, Lesions detected = 11, Areas calculated = 11, Perimeters calculated = 11
Processed 'LC-003_2025_01_07 (3).JPG': Conversion ratio = 132.44 pixels/cm, Lesions detected = 1, Areas calculated = 1, Perimeters calculated = 1
Processed 'RRC_S2_ECA_LC-003_ab_2021_10_06 (2).JPG': Conversion ratio = 77.62 pixels/cm, Lesions detected = 6, Areas calculated = 6, Perimeters calculated = 6
Successfully Finished Reprocessing Images
