In [1]:
import os
import numpy as np
from sklearn.cluster import KMeans
from PIL import Image, ImageDraw, ImageFilter

Downloading dataset

In [2]:
from roboflow import Roboflow
rf = Roboflow(api_key="x5pZ44ydkd9As40Mglzv")
project = rf.workspace("ntnuscaledetection").project("ntnu_herbarium_segmentation_v2")
version = project.version(7)
dataset = version.download("yolov8")

loading Roboflow workspace...
loading Roboflow project...
Dependency ultralytics==8.0.196 is required but found version=8.2.15, to fix: `pip install ultralytics==8.0.196`


Downloading Dataset Version Zip in NTNU_Herbarium_Segmentation_V2-7 to yolov8:: 100%|██████████| 145360/145360 [00:14<00:00, 10100.63it/s]





Extracting Dataset Version Zip to NTNU_Herbarium_Segmentation_V2-7 in yolov8:: 100%|██████████| 224/224 [00:00<00:00, 1274.48it/s]


Renaming

In [6]:
dataset_version = version.version

dataset_img = sorted(os.listdir(f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/images'))
dataset_mask = sorted(os.listdir(f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/labels'))

for idx, (img, mask) in enumerate(zip(dataset_img, dataset_mask)):
    new_name = img.split('.')[0].replace('_jpg', '')
    os.rename(f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/images/{img}', f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/images/{new_name}.png')
    os.rename(f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/labels/{mask}', f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/labels/{new_name}.png')

Only selecting a few (manually chosen)

In [7]:
selected = ['1701328720_a', '1701332829', '1701257719', '1701235632', '1701246629', '1701268081', '1701233161']

os.makedirs('./to_augment/images', exist_ok=True)
os.makedirs('./to_augment/labels', exist_ok=True)

for img in selected:
    os.rename(f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/images/{img}.png', f'./to_augment/images/{img}.png')
    os.rename(f'./NTNU_Herbarium_Segmentation_V2-{dataset_version}/train/labels/{img}.png', f'./to_augment/labels/{img}.txt')

Cropping out the masks and saving them in binary form

In [11]:
def get_mask_points(label_path):
    with open(label_path, 'r') as f:
        lines = f.readlines()
    lines = [line.strip().split(' ')[1:] for line in lines] # remove the first element which is the class label
    points = [[[float(line[i]), float(line[i+1])] for i in range(0, len(line), 2)] for line in lines] # convert to list of list of points where x and y are combined
    return points

def find_dominant_color(image: Image, k: int = 5) -> tuple:
    
    # Convert image to numpy array
    img_array = np.array(image)
    
    # Reshape it to a list of RGB values
    img_vector = img_array.reshape((-1, 3))
    
    # Run k-means on the pixel colors
    kmeans = KMeans(n_clusters=k, random_state=0).fit(img_vector)
    
    # Get the dominant color
    dominant_color = kmeans.cluster_centers_[np.argmax(np.bincount(kmeans.labels_))]
    
    # Create a mask for pixels within a certain distance from the dominant color
    distances = np.sqrt(np.sum((img_vector - dominant_color) ** 2, axis=1))
    mask = distances < np.std(distances)
    
    # Turn the dominant color range to white
    img_vector[mask] = [255, 255, 255]
    result_img_array = img_vector.reshape(img_array.shape)

    return dominant_color, result_img_array

def crop_out_image(image_path, points):

    image = Image.open(image_path) # open the image
    _, white_bg = find_dominant_color(image) # find the dominant color
    white_bg = Image.fromarray(white_bg) # convert to image
    image = white_bg.convert('RGBA') # convert to RGBA

    for idx, point in enumerate(points):

        mask = Image.new('L', image.size, 0) # create a mask
        draw = ImageDraw.Draw(mask)

        unnormalized_points = [(int(x * image.width), int(y * image.height)) for [x, y] in point] # un-normalize the points

        draw.polygon(unnormalized_points, fill=255)

        transparent_bg = Image.new('RGBA', image.size, (0, 0, 0, 0)) # create a white background

        result = Image.composite(image, transparent_bg, mask) # crop out the image

        # Turn all the white pixels to transparent
        data = np.array(result)
        white = np.all(data[:, :, :3] == 255, axis=-1)
        data[white, -1] = 0
        result = Image.fromarray(data)

        name = os.path.basename(image_path).split('.')[0]
        result.save(f'{name}_object_{idx}.png', 'PNG')

def remove_objects_from_image(image_path, points):
    image = Image.open(image_path) # open the image
    _, result_img_array = find_dominant_color(image) # find the dominant color
    image = Image.fromarray(result_img_array) # convert the numpy array to image

    for point in points:

        unnormalized_points = [(int(x * image.width), int(y * image.height)) for [x, y] in point] # un-normalize the points

        ImageDraw.Draw(image).polygon(unnormalized_points, fill='white') # fill the polygon with white color

    name = os.path.basename(image_path).split('.')[0]
    image.save(f'{name}_augmented_0.png', 'PNG')

def rotate_point(cx, cy, angle, px, py):
    radians = np.radians(angle)
    cos_angle = np.cos(radians)
    sin_angle = np.sin(radians)
    px -= cx
    py -= cy
    xnew = px * cos_angle - py * sin_angle
    ynew = px * sin_angle + py * cos_angle
    px = xnew + cx
    py = ynew + cy
    return px, py

def update_points(points, perc_offset_x, perc_offset_y, rotation_angle, image_width, image_height):
    # Calculate pixel offsets from percentages
    offset_x = perc_offset_x * image_width / 100
    offset_y = perc_offset_y * image_height / 100

    # Image center as rotation center
    rotation_center = (image_width / 2, image_height / 2)

    # Rotate and then translate points
    updated_points = []
    for x, y in points:
        new_x, new_y = rotate_point(rotation_center[0], rotation_center[1], rotation_angle, x, y)
        new_x += offset_x
        new_y += offset_y
        updated_points.append((new_x, new_y))
    return updated_points

In [34]:
image_name = '1701233161'
image_extension = 'png'
original_points = get_mask_points(f'./to_augment/labels/{image_name}.txt')


original_image = Image.open(f'./to_augment/images/{image_name}.{image_extension}')
image_width, image_height = original_image.size

remove_objects_from_image(f'./to_augment/images/{image_name}.{image_extension}', original_points)
crop_out_image(f'./to_augment/images/{image_name}.{image_extension}', original_points)

len(original_points)

3

Manually update the settings dictionary with the changes you made in the photo-editing software

In [35]:
settings = {
    '0': {
        'x_perc_offset': -27.47,
        'y_perc_offset': -5.69,
        'rotation_angle': -37.77
    },
    '1': {
        'x_perc_offset': 8.70,
        'y_perc_offset': -14.06,
        'rotation_angle': -173.29
    },
    '2': {
        'x_perc_offset': -4.62,
        'y_perc_offset': -54.21,
        'rotation_angle': -14.47
    },
    # '3': {
    #     'x_perc_offset': 21.45,
    #     'y_perc_offset': 33.06,
    #     'rotation_angle': -3.74
    # },
    # '4': {
    #     'x_perc_offset': 5.29,
    #     'y_perc_offset': 60.88,
    #     'rotation_angle': 0
    # }
}

In [36]:
os.makedirs('./augmented/images', exist_ok=True)
os.makedirs('./augmented/labels', exist_ok=True)

for idx, point in enumerate(original_points):

    if idx >= len(settings):
        break
    
    unnormalized_points = [(int(x * image_width), int(y * image_height)) for [x, y] in point]
    
    x_perc_offset = settings[str(idx)]['x_perc_offset']
    y_perc_offset = settings[str(idx)]['y_perc_offset']
    rotation_angle = settings[str(idx)]['rotation_angle']

    new_points = update_points(unnormalized_points, x_perc_offset, y_perc_offset, rotation_angle, image_width, image_height)

    with open(f'./augmented/labels/{image_name}_augmented_0.txt', 'a') as f:
        f.write('0 ')
        for x, y in new_points:
            f.write(f'{x/image_width} {y/image_height}')
            f.write(' ')
        f.write('\n')