In [5]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

True
1
NVIDIA GeForce RTX 3060 Laptop GPU


Augment image

In [231]:
import os
import numpy as np
import cv2
from PIL import Image
from sklearn.cluster import KMeans

def process_image(image_path, label_path, output_directory, color=(255, 255, 255), threshold=220):

    # Load image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    
    # find dominant color
    dominant_color, image = find_dominant_color(image)
    
    # save image
    image_to_save = Image.fromarray(image)
    image_to_save.save(os.path.join(output_directory, 'image_without_dominant_color.png'))

    # Load labels
    with open(label_path, 'r') as f:
        lines = f.readlines()
    lines = [line.strip().split(' ') for line in lines] # removes classnumber and \n

    # Process each polyline
    for idx, line in enumerate(lines):
        # Skip the class label (at index 0) and process points
        points = [[int(float(line[i]) * image.shape[1]), int(float(line[i+1]) * image.shape[0])] for i in range(1, len(line), 2)]
        
        # Check if we have enough points to form a polyline
        if len(points) > 1:
            # create mask
            mask = np.zeros_like(image)

            # Convert points into a numpy array and reshape for polylines
            polyline_points = np.array(points, dtype=np.int32).reshape((-1, 1, 2))

            # Draw the polyline on the mask with white color and increased thickness
            cv2.fillPoly(mask, [polyline_points], color=color)

            # Extracting the polyline to a transparent background
            extracted = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8)

            # Convert mask to grayscale (easier to handle)
            mask_gray = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)

            # Copy color from the original image where mask is white
            for i in range(3):  # Assuming RGB
                extracted[:, :, i] = np.where(mask_gray == 255, image[:, :, i], 0)

            # Set the alpha channel: full opacity where mask is white, transparent elsewhere
            extracted[:, :, 3] = np.where(mask_gray == 255, 255, 0)

            # Call the function to convert white pixels to transparent
            final_image = white_to_transparent(extracted, threshold)

            # Save the processed image
            output_path = os.path.join(output_directory, f'extracted_polyline_{idx}.png')
            final_image.save(output_path)
        else:
            print(f"Not enough points to form a polyline for line {idx}.")

def white_to_transparent(image, threshold=220):
    pil_img = Image.fromarray(image)
    assert pil_img.mode == 'RGBA'
    datas = pil_img.getdata()
    new_image_data = []
    for item in datas:
        # Checking the RGB channels for whiteness, ignore the alpha channel
        if item[0] > threshold and item[1] > threshold and item[2] > threshold:
            new_image_data.append((255, 255, 255, 0))  # Full transparency
        else:
            new_image_data.append(item)
    pil_img.putdata(new_image_data)
    return pil_img

def find_dominant_color(image, k=5):
    # Convert image to numpy array
    img_array = np.array(image)
    # Reshape it to a list of RGB values
    img_vector = img_array.reshape((-1, 3))
    # Run k-means on the pixel colors (fit only on a subsample to speed up)
    kmeans = KMeans(n_clusters=k, random_state=0).fit(img_vector)
    # Get the dominant color
    dominant_color = kmeans.cluster_centers_[np.argmax(np.bincount(kmeans.labels_))]
    # Create a mask for pixels within a certain distance from the dominant color
    distances = np.sqrt(np.sum((img_vector - dominant_color) ** 2, axis=1))
    mask = distances < np.std(distances)
    # Turn the dominant color range to white
    img_vector[mask] = [255, 255, 255]
    result_img_array = img_vector.reshape(img_array.shape)

    return dominant_color, result_img_array

image_name = '2452362457_jpg.rf.5b826e8d82db1a98cf9422f2dc09ee75'
image_path = f'../data/Ntnu_segmentation-24/train/images/{image_name}.jpg'
label_path = f'../data/Ntnu_segmentation-24/train/labels/{image_name}.txt'
output_path = './test'

os.makedirs(output_path, exist_ok=True)

process_success = process_image(image_path, label_path, output_path)


Augment dataset

In [54]:
from torchvision.transforms.functional import adjust_hue
from torchvision import transforms
from roboflow import Roboflow
from PIL import Image
import shutil
import torch
import os

def hue_degree_to_hue_value():
    return (torch.rand(1).item() * 60 - 30) / 360.0  # Random hue between -30 and +30 degrees

def augment_image(pil_img, name):
    images = [pil_img.copy()]  # Start with the original image

    # Apply a noticeable hue shift
    hue_adjustment = hue_degree_to_hue_value()
    images.append(adjust_hue(pil_img.copy(), hue_adjustment))

    # Apply a more pronounced saturation change
    images.append(transforms.ColorJitter(saturation=0.5)(pil_img.copy()))  # 50% less to 50% more saturation

    # Apply a more noticeable brightness change
    images.append(transforms.ColorJitter(brightness=0.5)(pil_img.copy()))  # 50% darker to 50% brighter

    # Apply a stronger contrast change
    images.append(transforms.ColorJitter(contrast=0.5)(pil_img.copy()))  # 50% less to 50% more contrast

    names = [name] + [name.replace('.jpg', '_hue.jpg'), name.replace('.jpg', '_sat.jpg'), name.replace('.jpg', '_bright.jpg'), name.replace('.jpg', '_cont.jpg')]
    return images, names

def augment_images(image_paths):
    augmented_batch = []
    for img_path in image_paths:
        pil_img = Image.open(img_path)  # Open the image file
        augmented_batch.append((augment_image(pil_img, os.path.basename(img_path))))  # Apply augmentation and extend the batch list
    return augmented_batch

# download dataset
rf = Roboflow(api_key="x5pZ44ydkd9As40Mglzv")
project = rf.workspace("ntnuscaledetection").project("ntnu_segmentation")
version = project.version(10)
dataset = version.download("yolov8")

# get paths
path = os.path.join(dataset.location, 'train', 'images')
label_path = os.path.join(dataset.location, 'train', 'labels')

image_paths = [os.path.join(path, i) for i in os.listdir(path) if i.endswith(('.png', '.jpg', '.jpeg'))]

# augment images
augmented_images = augment_images(image_paths)

# save images and labels
for idx, item in enumerate(augmented_images):
    original_name = item[1][0].replace('.jpg', '.txt')
    original_label = os.path.join(label_path, original_name)
    # copy original label with new names (hue, sat, bright, cont)
    for name in item[1][1:]:
        new_label = os.path.join(label_path, name.replace('.jpg', '.txt'))
        shutil.copy(original_label, new_label)
    
    # save images
    for name, image in zip(item[1], item[0]):
        image.save(os.path.join(path, name))

loading Roboflow workspace...
loading Roboflow project...
Dependency ultralytics==8.0.196 is required but found version=8.1.34, to fix: `pip install ultralytics==8.0.196`


Downloading Dataset Version Zip in Ntnu_segmentation-10 to yolov8:: 100%|██████████| 2731/2731 [00:00<00:00, 2856.91it/s]





Extracting Dataset Version Zip to Ntnu_segmentation-10 in yolov8:: 100%|██████████| 87/87 [00:00<00:00, 5856.84it/s]


Deploy model

In [9]:
from roboflow import Roboflow
rf = Roboflow(api_key="x5pZ44ydkd9As40Mglzv")
project = rf.workspace("ntnuscaledetection").project("ntnu_segmentation")
version = project.version(5)
version.deploy("yolov8-seg", "../models", "plant_segmentation_v6.pt")

loading Roboflow workspace...
loading Roboflow project...
Dependency ultralytics==8.0.196 is required but found version=8.1.34, to fix: `pip install ultralytics==8.0.196`
View the status of your deployment at: https://app.roboflow.com/ntnuscaledetection/ntnu_segmentation/5
Share your model with the world at: https://universe.roboflow.com/ntnuscaledetection/ntnu_segmentation/model/5


Test model

In [None]:
from ultralytics import YOLO
import os

images = [os.path.join('../images/gbif_images', image) for image in os.listdir('../images/gbif_images')]


for batch in range(0, len(images), 10):
    model = YOLO('../models/plant_segmentation_v15.pt')
    res = model(images[batch:batch+10], save=True, iou=0.4, conf=0.6, verbose=False)

In [5]:
from ultralytics import YOLO
import os

model = YOLO('../models/plant_segmentation_v15.pt')
res = model('../images/gbif_images/1701288394.jpg', save=True, conf=0.2, iou=0.4, verbose=False)

Results saved to [1m/home/floris/Projects/NTNU/models/runs/segment/predict3[0m


In [4]:
import os
import shutil

path = '../models/runs/segment'
save_path = '/home/floris/Projects/NTNU/images/segment_eval_v15'

os.makedirs(save_path, exist_ok=True)

preds = os.listdir(path)
for dir in preds:
    for img in os.listdir(os.path.join(path, dir)):
        shutil.move(os.path.join(path, dir, img), save_path)
        
shutil.rmtree(path)

Resize images

In [31]:
from PIL import Image
import numpy as np
import cv2


def resize_image_with_aspect_ratio(img_path, target_width, target_height):
    # Open the original image
    img = Image.open(img_path)
    
    # Calculate the ratio of the target dimensions
    target_ratio = target_width / target_height
    # Calculate the ratio of the original dimensions
    img_ratio = img.width / img.height
    
    # Determine the size to which the original image is to be resized
    if img_ratio > target_ratio:
        # Width is the limiting dimension
        new_width = target_width
        new_height = round(target_width / img_ratio)
    else:
        # Height is the limiting dimension
        new_height = target_height
        new_width = round(target_height * img_ratio)
    
    # Resize the original image
    resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
    
    # Create a new image with a black background and the target size
    new_img = Image.new("RGB", (target_width, target_height), (0, 0, 0))
    
    # Calculate the position to paste the resized image onto the new image
    x = (target_width - new_width) // 2
    y = (target_height - new_height) // 2
    
    # Paste the resized image onto the new image
    new_img.paste(resized_img, (x, y))
    
    return new_img


image = cv2.imread('../images/upload/1802796785.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

from ultralytics import YOLO

model = YOLO('../models/plant_segmentation_v2.pt')
results = model(image)

import matplotlib.pyplot as plt

new_image = resize_image_with_aspect_ratio('../images/upload/1802796785.jpg', 640, 640)

plt.imshow(new_image)