In [80]:
from segment_anything import sam_model_registry, SamPredictor
from ultralytics import YOLO, settings
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import torchvision
import requests
import shutil
import torch
import json
import cv2
import sys
import os

torch.cuda.empty_cache()
CUDA = torch.cuda.is_available()
print("CUDA is available:", CUDA)

CUDA is available: True


In [81]:
# !{sys.executable} -m pip install git+https://github.com/facebookresearch/segment-anything.git
    
# url = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
# r = requests.get(url)

# with open("sam_vit_h_4b8939.pth", "wb") as f:
#     f.write(r.content)

In [82]:
def measure_scale_fixed_via_colorboard(image_path):
    """
    Function to measure the scale of the image using a fixed colorboard
    The colorboard is a set of 7 boxes with bright colors
    The width of the colorboard is 4.5 cm
    The function returns the pixels per cm
    """
    # Load the image
    image = cv2.imread(image_path)

    # Convert to HSV (Hue, Saturation, Value) color space for easier color segmentation
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Define range of bright colors in HSV
    lower_color = np.array([0, 100, 100])
    upper_color = np.array([179, 255, 255])

    # Threshold the HSV image to get only bright colors
    mask = cv2.inRange(hsv, lower_color, upper_color)

    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Filter out small contours that are not our boxes
    box_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 100]

    # Calculate bounding boxes for each contour
    bounding_boxes = [cv2.boundingRect(cnt) for cnt in box_contours]

    # Determine the midpoint of the image width
    midpoint = image.shape[1] / 2

    # Keep only the boxes that have an x-coordinate greater than the midpoint
    right_half_boxes = [box for box in bounding_boxes if box[0] > midpoint]

    # Sort these boxes by their x-coordinate to ensure rightmost first
    sorted_right_half_boxes = sorted(right_half_boxes, key=lambda x: x[0], reverse=True)

    # Draw these seven boxes on the image
    for (x, y, w, h) in sorted_right_half_boxes:
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)

    # Calculate the total width in pixels of these seven boxes
    total_width_in_pixels = sum([box[2] for box in sorted_right_half_boxes])

    # Since 7 boxes = 4.5 cm, calculate the pixels per cm
    pixels_per_cm = total_width_in_pixels / 5.79

    return pixels_per_cm

def generate_output(images, model, predictor):
    """
    Function to generate the output of the model.
    Output is a list of dictionaries with the following keys:
    - image_path: path to the image
    - image: the image
    - boxes: the boxes
    - masks: the masks
    """
    shutil.rmtree(settings['runs_dir'], ignore_errors=True)
    results = model(images, verbose=False)

    output = []

    for result in results:
        image_path = result.path
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        boxes = result.boxes.xyxy

        transformed_boxes = predictor.transform.apply_boxes_torch(boxes, image.shape[:2])

        predictor.set_image(image)

        masks, _, _ = predictor.predict_torch(
        point_coords=None,
        point_labels=None,
        boxes=transformed_boxes,
        multimask_output=False,
        )

        output.append({'image_path': image_path, 'image': image, 'boxes': boxes, 'masks': masks})
    
    return output

def transform_px_to_cm(box, px_per_cm):
    """
    Function to transform the width and height of a box from pixels to cm
    """
    w = np.abs((box[2] - box[0]).cpu())
    h = np.abs((box[3] - box[1]).cpu())
    return w / px_per_cm, h / px_per_cm

def get_masked_image(image, mask):
    """
    Apply a mask to an image with transparency
    """
    # Remove single-dimensional entry from the shape of the mask
    mask_squeezed = np.squeeze(mask)  # This should change mask shape to (5831, 3391)
    # Generate an alpha channel where mask is True (255) and False (0)
    alpha_channel = np.where(mask_squeezed, 255, 0).astype(np.uint8)
    # Ensure alpha channel is correctly shaped [H, W] -> [H, W, 1]
    alpha_channel_shaped = np.expand_dims(alpha_channel, axis=-1)
    # Concatenate the alpha channel with the image to create an RGBA image
    rgba_image = np.concatenate((image, alpha_channel_shaped), axis=-1)
    return rgba_image

def get_cropped_image(image, box):
    """
    Crop an image with a given box
    """
    x, y, w, h = int(box[0]), int(box[1]), int(box[2] - box[0]), int(box[3] - box[1])
    return image[y:y+h, x:x+w]

def apply_crop_mask(image, mask, box):
    """
    Apply a mask to an image and crop the image with a given box
    Returns a list of tuples with the masked image and the cropped image
    """
    images = []
    for i, m in enumerate(mask):
        m = m.cpu().numpy()
        m_img = get_masked_image(image, m)
        crop_img = get_cropped_image(m_img, box[i].cpu().numpy())
        images.append((m_img, crop_img))
    return images

def find_dominant_color(image, k=3):
    # Convert image to numpy array
    img_array = np.array(image)
    # Reshape it to a list of RGB values
    img_vector = img_array.reshape((-1, 3))
    # Run k-means on the pixel colors (fit only on a subsample to speed up)
    kmeans = KMeans(n_clusters=k, random_state=0).fit(img_vector[::50])
    # Get the dominant color
    dominant_color = kmeans.cluster_centers_[np.argmax(np.bincount(kmeans.labels_))]
    # Create a mask for pixels within a certain distance from the dominant color
    distances = np.sqrt(np.sum((img_vector - dominant_color) ** 2, axis=1))
    mask = distances < np.std(distances)
    # Turn the dominant color range to white
    img_vector[mask] = [255, 255, 255]
    result_img_array = img_vector.reshape(img_array.shape)
    # turn image back to PIL
    result_img = Image.fromarray(result_img_array.astype(np.uint8))
    return dominant_color, result_img

def calculate_mask_area(masked_pixels, pixels_per_cm):
    area_square_cm = masked_pixels / (pixels_per_cm ** 2)
    return area_square_cm

def get_images(path, range_left=0, range_right=-1):
    if not os.path.exists(path):
        print(f"Path {path} does not exist")
        return []
    if len(os.listdir(path)) == 0:
        print(f"Path {path} is empty")
        return []
    
    images = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg') and 'only' not in f and 'grid' not in f]
    return images[range_left:range_right]

def main(output):
    """
    Function to display the output of the model
    It displays the image with the boxes and masks, and the width and height of the boxes in cm
    """
    results = []

    for _, res in enumerate(output):

        path = res['image_path']
        image = res['image']
        mask = res['masks']
        boxes = res['boxes']
        name = os.path.basename(path)

        image = find_dominant_color(image)[1]
                
        # look for cropped scale
        scale_path = path.replace('.jpg', '_scale_only.jpg')
        px_per_cm = measure_scale_fixed_via_colorboard(scale_path)

        all_masks_with_sq_cm = []
        for m in mask:
            m_sum = m[0].sum().tolist()
            square_cm = calculate_mask_area(m_sum, px_per_cm)
            all_masks_with_sq_cm.append((m, square_cm))
            

        all_boxes = []
        for box in boxes:
            w_cm, h_cm = transform_px_to_cm(box, px_per_cm)
            all_boxes.append((box, {'width_cm': w_cm, 'height_cm': h_cm}))
        
        masked_and_cropped_images = apply_crop_mask(image, mask, boxes) # list of tuples (masked_image, cropped_image) per box / mask

        results.append({'image': image, 'image_path': path, 'image_name': name, 'boxes': all_boxes, 'masks_and_sqcm': all_masks_with_sq_cm, 'px_per_cm': px_per_cm, 'masked_and_cropped_images': masked_and_cropped_images})
    
    return results

In [83]:
def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)
    
def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   
    
def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))

def show_image(image, ax='Off', figsize=(10, 10)):
    plt.figure(figsize=figsize)
    plt.imshow(image)
    plt.axis(ax)
    plt.show()

def show_image_with_box(image, box):
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    show_box(box, plt.gca())
    plt.axis('off')
    plt.show()

def show_image_with_boxes_and_masks(image_rgb, boxes, masks):
    plt.figure(figsize=(10, 10))
    plt.imshow(image_rgb)
    for mask in masks:
        show_mask(mask.cpu().numpy(), plt.gca())
    for box in boxes:
        show_box(box.cpu().numpy(), plt.gca())
    plt.axis('off')
    plt.show() 

def show_img_mask_crop(image, mask, box):
    img = apply_crop_mask(image, mask, box)
    num_rows = len(img)
    num_cols = max(len(row) for row in img) 
    
    fig, ax = plt.subplots(num_rows, num_cols, figsize=(num_cols*5, num_rows*5))
    
    for i, row in enumerate(img):
        for j, img_ij in enumerate(row):
            if num_rows > 1:
                ax[i, j].imshow(img_ij)
            else: 
                ax[j].imshow(img_ij)
    
    plt.tight_layout()
    plt.show()

    return img

In [84]:
MODEL_PATH_PLANT = '../models/plant_detection_v2.pt'
MODEL_PATH_TAPE = '../models/tape_detector_v4.pt'
IMG_PATH_FIXED = '../images/cropped_scales/fixed'
IMG_PATH_RANDOM = '../images/cropped_scales/random'
DATA_PATH = '../data/processed'
SAM_CHECKPOINT = "../models/sam_vit_h_4b8939.pth"
MODEL_TYPE = "vit_h"
DEVICE = "cuda" if CUDA else "cpu"

settings.update({'runs_dir': rf'C:\Users\buyse\Workspace\NTNU\models\runs'})

In [85]:
model_plant = YOLO(MODEL_PATH_PLANT)
model_tape = YOLO(MODEL_PATH_TAPE)
sam = sam_model_registry[MODEL_TYPE](checkpoint=SAM_CHECKPOINT)
sam.to(device=DEVICE)
predictor = SamPredictor(sam)

In [86]:
images = get_images(IMG_PATH_FIXED, 2, 5)
output = generate_output(images, model_plant, predictor)
results = main(output)

In [108]:
for res in results:

    if not os.path.exists(DATA_PATH):
        os.makedirs(DATA_PATH, exist_ok=True)

    image = res['image']
    mask = [r[0] for r in res['masks_and_sqcm']]
    box = [box[0] for box in res['boxes']]

    imgs = apply_crop_mask(image, mask, box)
    for idx, i in enumerate(imgs):
        img_name = res['image_name'].replace('.jpg', f'_plant_mask_crop_{chr(idx + 97)}.png')
        img_path = os.path.join(DATA_PATH, img_name)

        # to turn the white pixels to transparent
        threshold = 250
        pil_img = Image.fromarray(i[1])
        datas = pil_img.getdata()
        new_image_data = []
        for item in datas:
            if item[0] > threshold and item[1] > threshold and item[2] > threshold:
                new_image_data.append((255, 255, 255, 0))
            else:
                new_image_data.append(item)

        pil_img.putdata(new_image_data)
        pil_img.save(img_path)

In [149]:
img_path = '../data/processed/1698019523_plant_mask_crop_b.png'
img = Image.open(img_path)

img_array = np.array(img)
non_transparent_pixels = np.where(img_array[:, :, 3] != 0)
non_transparent_pixels_count = len(non_transparent_pixels[0])

After removing all white from the picture, the square cm is 87.43

In [147]:
calculate_mask_area(non_transparent_pixels_count, results[0]['px_per_cm'])

87.43493050888728

Without removing all white from the picture, the square cm is 92.13

In [148]:
results[0]['masks_and_sqcm'][1][1]

92.13058401103808