In [8]:
image_path = "images/20220329_4_1_P10t001.jpg"
label_path = "labels/20220329_4_1_P10t001.txt"
split_width = split_height = 640 
output_img_folder = 'cropoutput/images'
output_label_folder = 'cropoutput/labels'
additional_crops = 3

# Stable split


In [3]:
from PIL import Image
import os

# Specify the paths to the original image and label file
original_image_path = "images/20220329_4_1_P10t001.jpg"
label_file_path = "labels/20220329_4_1_P10t001.txt"

# Usage
output_images_folder = 'cropoutput/images'
output_labels_folder = 'cropoutput/labels'



In [36]:
import os
from PIL import Image

def process_image_and_labels(image_path, label_path, patch_width, patch_height, img_output_folder, label_output_folder):
    """
    Process the image and labels, splitting the image into patches and adjusting the labels accordingly.

    :param image_path: Path to the original image file.
    :param label_path: Path to the YOLOv5 label file.
    :param patch_width: The width of each image patch.
    :param patch_height: The height of each image patch.
    :param output_folder: Folder where the outputs will be saved.
    """

    # Create output directories if they don't exist
#     os.makedirs(output_folder, exist_ok=True)
#     img_output_folder = os.path.join(output_folder, "images")
#     label_output_folder = os.path.join(output_folder, "labels")
    os.makedirs(img_output_folder, exist_ok=True)
    os.makedirs(label_output_folder, exist_ok=True)

    # Open the image and get its size
    img = Image.open(image_path)
    img_width, img_height = img.size

    # Determine the number of patches and the overlap required
    cols = (img_width // patch_width) + (1 if img_width % patch_width > 0 else 0)
    rows = (img_height // patch_height) + (1 if img_height % patch_height > 0 else 0)
    overlap_x = (cols * patch_width - img_width) / (cols - 1) if cols > 1 else 0
    overlap_y = (rows * patch_height - img_height) / (rows - 1) if rows > 1 else 0

    # Load the labels from the file
    with open(label_path, 'r') as file:
        labels = [line.strip().split() for line in file.readlines()]

    # Function to recalculate labels for a patch
    def recalculate_labels_for_patch(old_labels, patch_coords):
        new_labels = []
        patch_x, patch_y, patch_right, patch_lower = patch_coords

        for label in old_labels:
            # Extract data from the old label
            category, x_center, y_center, width, height = label
            x_center, y_center, width, height = map(float, [x_center, y_center, width, height])

            # Calculate the absolute coordinates in the original image
            box_x_center = x_center * img_width
            box_y_center = y_center * img_height
            box_width = width * img_width
            box_height = height * img_height
            box_xmin = box_x_center - (box_width / 2)
            box_ymin = box_y_center - (box_height / 2)
            box_xmax = box_x_center + (box_width / 2)
            box_ymax = box_y_center + (box_height / 2)

            # Check if the bounding box falls within the patch
            if box_xmin < patch_right and box_xmax > patch_x and box_ymin < patch_lower and box_ymax > patch_y:
                # Calculate new coordinates in relation to the patch
                new_x_center = (box_x_center - patch_x) / patch_width
                new_y_center = (box_y_center - patch_y) / patch_height
                new_width = box_width / patch_width
                new_height = box_height / patch_height

                # Construct the new label
                new_label = [category, str(new_x_center), str(new_y_center), str(new_width), str(new_height)]
                new_labels.append(new_label)

        return new_labels

    # Iterate over the image and create patches
    for i in range(rows):
        for j in range(cols):
            # Calculate patch coordinates, considering the overlap
            patch_x = j * patch_width - j * overlap_x
            patch_y = i * patch_height - i * overlap_y
            patch_right = patch_x + patch_width
            patch_lower = patch_y + patch_height

            # Correct the coordinates if they are out of bounds
            patch_x, patch_y = max(0, patch_x), max(0, patch_y)
            patch_right, patch_lower = min(img_width, patch_right), min(img_height, patch_lower)

            # Crop the image based on the patch coordinates
            patch = img.crop((patch_x, patch_y, patch_right, patch_lower))
            patch_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_patch_{i}_{j}.png"
            patch.save(os.path.join(img_output_folder, patch_filename))

            # Calculate the new labels for the patch
            new_labels = recalculate_labels_for_patch(labels, (patch_x, patch_y, patch_right, patch_lower))
            label_filename = f"{os.path.splitext(os.path.basename(label_path))[0]}_patch_{i}_{j}.txt"
            with open(os.path.join(label_output_folder, label_filename), 'w') as file:
                for new_label in new_labels:
                    file.write(' '.join(new_label) + '\n')

# Usage
# process_image_and_labels(
#     "path/to/your/image.jpg", 
#     "path/to/your/labels.txt", 
#     640,  # Patch width
#     640,  # Patch height
#     "path/to/your/output/folder"
# )
# process_image_and_labels(image_path, label_path, split_width,split_width, output_img_folder, output_label_folder)

In [None]:
import os

def process_directory(image_folder, label_folder, output_img_folder, output_label_folder, split_width, split_height, additional_crops=0):
    """
    Process all image files in the specified directory with their corresponding label files.

    :param image_folder: Path to the folder containing image files.
    :param label_folder: Path to the folder containing label files.
    :param output_img_folder: Path to the folder where segmented images will be saved.
    :param output_label_folder: Path to the folder where new label files will be saved.
    :param split_width: Width of the split images.
    :param split_height: Height of the split images.
    :param additional_crops: Number of additional random crops around each object (default is 0).
    """

    # Create output folders if they don't exist
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    # Retrieve all image files in the specified directory
    image_files = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for image_filename in image_files:
        base_filename = os.path.splitext(image_filename)[0]
        label_filename = f"{base_filename}.txt"

        # Construct paths to the current image and label file
        image_path = os.path.join(image_folder, image_filename)
        label_path = os.path.join(label_folder, label_filename)

        # Check if the corresponding label file exists
        if not os.path.isfile(label_path):
            print(f"Warning: Corresponding label file for {image_filename} not found.")
            continue

        # Process the current image and label file
        split_image_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops)

    print("Processing completed.")

# Usage:
input_images_folder = 'path/to/input/images'
input_labels_folder = 'path/to/input/labels'
output_images_folder = 'path/to/output/images'
output_labels_folder = 'path/to/output/labels'
split_width = 640
split_height = 640
additional_crops = 3  # Optional

process_directory(input_images_folder, input_labels_folder, output_images_folder, output_labels_folder, split_width, split_height, additional_crops)


# random cropping

In [5]:
#  not working

import os
import random
from PIL import Image

def split_image_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops=0):
    # Create the folders if they don't exist
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    # Load the image
    img = Image.open(image_path)
    original_width, original_height = img.size

    # Define rows and columns based on the desired segment count
    rows = 2
    cols = 3

    # Overlapping: Calculate the overlap needed in pixels for the segments
    overlap_x = (cols * split_width - original_width) / (cols - 1)
    overlap_y = (rows * split_height - original_height) / (rows - 1)

    # Function to split the image with overlap
    def split_image(image):
        imgs = []
        for i in range(rows):
            for j in range(cols):
                # Adjusting the coordinates to account for overlap
                left = max(0, j * split_width - j * overlap_x)
                upper = max(0, i * split_height - i * overlap_y)
                right = min(original_width, left + split_width)
                lower = min(original_height, upper + split_height)

                # Crop and save the image segment
                imgs.append(image.crop((left, upper, right, lower)))
        return imgs

    # Split the image
    segments = split_image(img)

    # Save the segments
    for count, segment in enumerate(segments):
        segment_filename = os.path.join(output_img_folder, f'{os.path.splitext(os.path.basename(image_path))[0]}_segment_{count}.jpg')
        segment.save(segment_filename)

    # Read the original labels
    with open(label_path, 'r') as file:
        labels = file.readlines()

    # Function to check if a bounding box is within the segment's coordinates
    def is_within_segment(x, y, left, right, upper, lower):
        return left <= x <= right and upper <= y <= lower

    # Parse labels and transform coordinates
    segment_labels = {i: [] for i in range(rows * cols)}
    for label in labels:
        parts = label.strip().split()
        class_id, x_center, y_center, width, height = parts  # Keep these as strings to avoid precision loss

        # Convert from relative to absolute coordinates for calculation
        abs_x_center = float(x_center) * original_width
        abs_y_center = float(y_center) * original_height
        abs_width = float(width) * original_width
        abs_height = float(height) * original_height

        for i in range(rows):
            for j in range(cols):
                # Adjusting the coordinates to account for overlap
                left = max(0, j * split_width - j * overlap_x)
                upper = max(0, i * split_height - i * overlap_y)
                right = min(original_width, left + split_width)
                lower = min(original_height, upper + split_height)

                segment_index = i * cols + j

                # Check if the bounding box is within this segment
                if is_within_segment(abs_x_center, abs_y_center, left, right, upper, lower):
                    # Calculate new coordinates for the bounding box, relative to the segment
                    new_x_center = (abs_x_center - left) / (right - left)
                    new_y_center = (abs_y_center - upper) / (lower - upper)

                    # Adjust the width and height of the bounding box based on the cropped segment's scale
                    segment_width = right - left
                    segment_height = lower - upper

                    new_width = abs_width / segment_width  # New width relative to the segment
                    new_height = abs_height / segment_height  # New height relative to the segment

                    # Ensure the values are within [0, 1] range
                    new_x_center = max(0, min(1, new_x_center))
                    new_y_center = max(0, min(1, new_y_center))
                    new_width = max(0, min(1, new_width))
                    new_height = max(0, min(1, new_height))

                    # Construct the new label text, keeping class_id as is
                    new_label = f"{class_id} {new_x_center} {new_y_center} {new_width} {new_height}\n"
                    segment_labels[segment_index].append(new_label)

    # Write the new label files
    for segment_index in range(rows * cols):
        # Check if there are labels for the segment. If not, an empty list will be used
        labels = segment_labels.get(segment_index, [])
        
        # Constructing the filename based on the segment index
        label_filename = os.path.join(output_label_folder, f'{os.path.splitext(os.path.basename(label_path))[0]}_segment_{segment_index}.txt')
        
        # Writing labels or creating an empty file if there are no labels
        with open(label_filename, 'w') as file:
            file.writelines(labels)

    # New function to create random crops around an object
    def create_random_crops(original_img, obj_bbox, num_crops):
        crops = []
        orig_x, orig_y, orig_w, orig_h = obj_bbox

        for _ in range(num_crops):
            # Create random variations
            variation = random.uniform(-0.5, 0.5)  # Change this range to allow more/less shift

            # Calculate new bounding box with variation
            new_x = orig_x + orig_w * variation
            new_y = orig_y + orig_h * variation

            # Ensure the new bounding box is within the image dimensions
            new_x = max(0, min(original_width - split_width, new_x))
            new_y = max(0, min(original_height - split_height, new_y))

            # Crop the image based on the new bounding box
            crop = original_img.crop((new_x, new_y, new_x + split_width, new_y + split_height))
            crops.append(crop)

        return crops

    # Additional crops for each label
    additional_images = []
    additional_image_labels = []

    for label in labels:
        parts = label.strip().split()
        class_id, x_center, y_center, width, height = parts

        # Convert from relative coordinates to absolute coordinates for calculation
        abs_x_center = float(x_center) * original_width
        abs_y_center = float(y_center) * original_height
        abs_width = float(width) * original_width
        abs_height = float(height) * original_height

        # Define the original bounding box
        orig_left = abs_x_center - abs_width / 2
        orig_top = abs_y_center - abs_height / 2
        bbox = (orig_left, orig_top, abs_width, abs_height)

        # Create random crops around this object
        crops = create_random_crops(img, bbox, additional_crops)

        for crop_img in crops:
            new_label = f"{class_id} {x_center} {y_center} {width} {height}\n"  # Reusing original label
            additional_images.append(crop_img)
            additional_image_labels.append(new_label)

    # Save the additional cropped images and their labels
    for count, (segment, label) in enumerate(zip(additional_images, additional_image_labels)):
        # Save the image
        img_filename = os.path.join(output_img_folder, f'{os.path.splitext(os.path.basename(image_path))[0]}_additional_{count}.jpg')
        segment.save(img_filename)

        # Save the label
        label_filename = os.path.join(output_label_folder, f'{os.path.splitext(os.path.basename(label_path))[0]}_additional_{count}.txt')
        with open(label_filename, 'w') as file:
            file.write(label)

# # Usage
# output_images_folder = 'path/to/output/images'
# output_labels_folder = 'path/to/output/labels'
num_additional_crops = 3  # for example
# split_image_and_labels('example.jpg', 'example.txt', 640, 640, output_images_folder, output_labels_folder, additional_crops=num_additional_crops)
output_images_folder = 'cropoutput/images'
output_labels_folder = 'cropoutput/labels'
split_image_and_labels(original_image_path, label_file_path, 640, 640, output_images_folder, output_labels_folder, additional_crops=num_additional_crops)

In [37]:
import os
import random
from PIL import Image

def split_image_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops=0):
    # Ensure output directories exist
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    # Open the original image and get its dimensions
    original_img = Image.open(image_path)
    original_width, original_height = original_img.size

    # Function to create random crops
    def create_random_crops(object_bbox, num_crops):
        crops = []
        orig_left, orig_top, width, height = object_bbox
        for _ in range(num_crops):
            crop_left = random.randint(max(0, int(orig_left + width - split_width)), int(orig_left))
            crop_top = random.randint(max(0, int(orig_top + height - split_height)), int(orig_top))

            crop_left = min(crop_left, original_width - split_width)
            crop_top = min(crop_top, original_height - split_height)

            crop_right = crop_left + split_width
            crop_bottom = crop_top + split_height

            crops.append((crop_left, crop_top, original_img.crop((crop_left, crop_top, crop_right, crop_bottom))))

        return crops

    # Read original labels
    with open(label_path, 'r') as file:
        labels = file.readlines()

    # Process each label for additional crops
    for idx, label in enumerate(labels):
        parts = label.strip().split()
        class_id, x_center, y_center, width, height = parts

        abs_x_center = float(x_center) * original_width
        abs_y_center = float(y_center) * original_height
        abs_width = float(width) * original_width
        abs_height = float(height) * original_height

        orig_left = abs_x_center - abs_width / 2
        orig_top = abs_y_center - abs_height / 2

        bbox = (orig_left, orig_top, abs_width, abs_height)
        crops = create_random_crops(bbox, additional_crops)

        for count, (crop_left, crop_top, crop) in enumerate(crops):
            crop_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{idx}_crop_{count}.jpg"
            crop.save(os.path.join(output_img_folder, crop_filename))

            new_left = (orig_left - crop_left) / split_width
            new_top = (orig_top - crop_top) / split_height
            new_width = abs_width / split_width
            new_height = abs_height / split_height

            new_x_center = new_left + new_width / 2
            new_y_center = new_top + new_height / 2

            new_x_center = max(0, min(1, new_x_center))
            new_y_center = max(0, min(1, new_y_center))
            new_width = max(0, min(1, new_width))
            new_height = max(0, min(1, new_height))

            new_label = f"{class_id} {new_x_center} {new_y_center} {new_width} {new_height}\n"
            crop_label_filename = f"{os.path.splitext(os.path.basename(label_path))[0]}_{idx}_crop_{count}.txt"
            with open(os.path.join(output_label_folder, crop_label_filename), 'w') as f:
                f.write(new_label)


num_additional_crops = 3  # for example
# split_image_and_labels('example.jpg', 'example.txt', 640, 640, output_images_folder, output_labels_folder, additional_crops=num_additional_crops)
output_images_folder = 'cropoutput/images'
output_labels_folder = 'cropoutput/labels'
split_image_and_labels(original_image_path, label_file_path, 640, 640, output_images_folder, output_labels_folder, additional_crops=num_additional_crops)

   # Crops and tiling

In [24]:
import os
import random
from PIL import Image

def split_image_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops=0):
    # Ensure output directories exist
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    # Open the original image and get its dimensions
    original_img = Image.open(image_path)
    original_width, original_height = original_img.size

    # Calculate the number of splits based on the original image dimensions and desired split size
    cols = original_width // split_width
    rows = original_height // split_height

    # Read original labels
    with open(label_path, 'r') as file:
        labels = file.readlines()

    def calculate_relative_coordinates(box, crop_area):
        x_center, y_center, width, height = box
        crop_x, crop_y, _, _ = crop_area

        new_x_center = (x_center - crop_x) / split_width
        new_y_center = (y_center - crop_y) / split_height

        return new_x_center, new_y_center, width / split_width, height / split_height

    def create_random_crops(bbox, num_crops):
        crops = []
        orig_left, orig_top, width, height = bbox
        for _ in range(num_crops):
            crop_left = random.randint(max(0, int(orig_left + width - split_width)), int(orig_left))
            crop_top = random.randint(max(0, int(orig_top + height - split_height)), int(orig_top))
            crop_left = min(crop_left, original_width - split_width)
            crop_top = min(crop_top, original_height - split_height)

            crop_right = crop_left + split_width
            crop_bottom = crop_top + split_height

            crops.append(original_img.crop((crop_left, crop_top, crop_right, crop_bottom)))
        return crops

    # Tile cropping
    for i in range(rows):
        for j in range(cols):
            crop_left = j * split_width
            crop_top = i * split_height
            crop_right = crop_left + split_width
            crop_bottom = crop_top + split_height

            if crop_right > original_width or crop_bottom > original_height:
                continue

            cropped_img = original_img.crop((crop_left, crop_top, crop_right, crop_bottom))
            cropped_img_labels = []

            for label in labels:
                parts = label.strip().split()
                class_id, x_center, y_center, width, height = parts

                abs_x_center = float(x_center) * original_width
                abs_y_center = float(y_center) * original_height
                abs_width = float(width) * original_width
                abs_height = float(height) * original_height

                orig_left = abs_x_center - abs_width / 2
                orig_top = abs_y_center - abs_height / 2
                orig_right = orig_left + abs_width
                orig_bottom = orig_top + abs_height

                if orig_right < crop_left or orig_bottom < crop_top or orig_left > crop_right or orig_top > crop_bottom:
                    continue  # Skip labels outside the cropped area

                new_x_center, new_y_center, new_width, new_height = calculate_relative_coordinates(
                    (abs_x_center, abs_y_center, abs_width, abs_height), (crop_left, crop_top, crop_right, crop_bottom))

                cropped_label = f"{class_id} {new_x_center} {new_y_center} {new_width} {new_height}\n"
                cropped_img_labels.append(cropped_label)

            crop_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{i}_{j}.jpg"
            cropped_img.save(os.path.join(output_img_folder, crop_filename))

            crop_label_filename = f"{os.path.splitext(os.path.basename(label_path))[0]}_{i}_{j}.txt"
            with open(os.path.join(output_label_folder, crop_label_filename), 'w') as f:
                f.writelines(cropped_img_labels)

    # Additional random crops per object
    if additional_crops > 0:
        for idx, label in enumerate(labels):
            parts = label.strip().split()
            class_id, x_center, y_center, width, height = parts

            abs_x_center = float(x_center) * original_width
            abs_y_center = float(y_center) * original_height
            abs_width = float(width) * original_width
            abs_height = float(height) * original_height

            orig_left = abs_x_center - abs_width / 2
            orig_top = abs_y_center - abs_height / 2

            bbox = (orig_left, orig_top, abs_width, abs_height)
            crops = create_random_crops(bbox, additional_crops)

            for count, crop in enumerate(crops):
                crop_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{idx}_crop_{count}.jpg"
                crop.save(os.path.join(output_img_folder, crop_filename))

                new_x_center, new_y_center, new_width, new_height = calculate_relative_coordinates(
                    (abs_x_center, abs_y_center, abs_width, abs_height), (crop_left, crop_top, crop_right, crop_bottom))

                new_label = f"{class_id} {new_x_center} {new_y_center} {new_width} {new_height}\n"
                crop_label_filename = f"{os.path.splitext(os.path.basename(label_path))[0]}_{idx}_crop_{count}.txt"
                with open(os.path.join(output_label_folder, crop_label_filename), 'w') as f:
                    f.write(new_label)

split_image_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops)
 

In [35]:
import os
import random
from PIL import Image

def convert_labels_to_pixel_coordinates(labels, img_width, img_height):
    """
    Convert YOLO format labels into pixel coordinates.
    """
    pixel_coordinates_labels = []
    for label in labels:
        parts = label.strip().split()
        if len(parts) < 5:
            continue  # Skip labels that don't have enough data
        cls, x_center, y_center, width, height = parts[:5]
        x_center, y_center, width, height = float(x_center), float(y_center), float(width), float(height)

        # Convert YOLO coordinates to pixel coordinates
        box_width = int(width * img_width)
        box_height = int(height * img_height)
        x_center = int(x_center * img_width)
        y_center = int(y_center * img_height)

        x_min = x_center - (box_width // 2)
        y_min = y_center - (box_height // 2)

        pixel_coordinates_labels.append([cls, x_min, y_min, x_min + box_width, y_min + box_height])

    return pixel_coordinates_labels

def convert_to_yolo_labels(labels, img_width, img_height):
    """
    Convert pixel coordinates into YOLO format labels.
    """
    yolo_labels = []
    for label in labels:
        cls, x_min, y_min, x_max, y_max = label
        box_width = x_max - x_min
        box_height = y_max - y_min

        # Convert back to percentages of overall image dimensions
        x_center = ((x_min + box_width // 2) / img_width)
        y_center = ((y_min + box_height // 2) / img_height)
        width = box_width / img_width
        height = box_height / img_height

        yolo_labels.append(f"{cls} {x_center} {y_center} {width} {height}\n")

    return yolo_labels

def calculate_systematic_crops(original_width, original_height, crop_size):
    """
    Calculate systematic crop coordinates for the entire image area, allowing overlap.
    """
    crops = []
    stride = crop_size

    num_tiles_width = (original_width - 1) // stride + 1
    num_tiles_height = (original_height - 1) // stride + 1

    for i in range(num_tiles_width):
        for j in range(num_tiles_height):
            start_x = i * stride
            start_y = j * stride
            end_x = start_x + crop_size
            end_y = start_y + crop_size
            crops.append((start_x, start_y, end_x, end_y))

    return crops

def split_image_and_labels(image_path, label_path, output_image_folder, output_label_folder, crop_size, num_random_crops=0):
    """
    Split the image and labels into systematic crops and optional random crops.
    """
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)
    image = Image.open(image_path)
    original_width, original_height = image.size

    with open(label_path, 'r') as file:
        labels = file.readlines()

    pixel_coordinates_labels = convert_labels_to_pixel_coordinates(labels, original_width, original_height)

    systematic_crops = calculate_systematic_crops(original_width, original_height, crop_size)

    # Process systematic crops
    for index, (start_x, start_y, end_x, end_y) in enumerate(systematic_crops, 1):
        cropped_image = image.crop((start_x, start_y, end_x, end_y))
        cropped_image_labels = []

        for label in pixel_coordinates_labels:
            cls, x_min, y_min, x_max, y_max = label

            # Check if the bounding box is within the cropped area
            if x_max > start_x and x_min < end_x and y_max > start_y and y_min < end_y:
                # Adjust labels to the new coordinate system (cropped)
                new_x_min = max(x_min - start_x, 0)
                new_y_min = max(y_min - start_y, 0)
                new_x_max = min(x_max - start_x, crop_size)
                new_y_max = min(y_max - start_y, crop_size)

                cropped_image_labels.append([cls, new_x_min, new_y_min, new_x_max, new_y_max])

        # Save the cropped image and labels
        image_filename = os.path.basename(image_path)
        image_name, image_ext = os.path.splitext(image_filename)
        cropped_image_path = os.path.join(output_image_folder, f"{image_name}_crop{index}{image_ext}")
        cropped_image.save(cropped_image_path)

        label_filename = os.path.basename(label_path)
        label_name, _ = os.path.splitext(label_filename)
        cropped_label_path = os.path.join(output_label_folder, f"{label_name}_crop{index}.txt")

        with open(cropped_label_path, 'w') as file:
            yolo_labels = convert_to_yolo_labels(cropped_image_labels, crop_size, crop_size)
            file.writelines(yolo_labels)

    # Process random crops
    for label in pixel_coordinates_labels:
        cls, x_min, y_min, x_max, y_max = label

        for i in range(num_random_crops):
            # Calculate random crop around the bounding box
            box_width = x_max - x_min
            box_height = y_max - y_min

            # Ensure the random crop is within the original image dimensions
            rand_x = max(min(x_min - random.randint(0, crop_size - box_width), original_width - crop_size), 0)
            rand_y = max(min(y_min - random.randint(0, crop_size - box_height), original_height - crop_size), 0)

            cropped_image = image.crop((rand_x, rand_y, rand_x + crop_size, rand_y + crop_size))

            # Adjust label for the new crop
            new_x_min = x_min - rand_x
            new_y_min = y_min - rand_y
            new_x_max = x_max - rand_x
            new_y_max = y_max - rand_y
            cropped_image_labels = [[cls, new_x_min, new_y_min, new_x_max, new_y_max]]

            # Save the cropped image and labels
            image_filename = os.path.basename(image_path)
            image_name, image_ext = os.path.splitext(image_filename)
            cropped_image_path = os.path.join(output_image_folder, f"{image_name}_random{index}_{i+1}{image_ext}")
            cropped_image.save(cropped_image_path)

            label_filename = os.path.basename(label_path)
            label_name, _ = os.path.splitext(label_filename)
            cropped_label_path = os.path.join(output_label_folder, f"{label_name}_random{index}_{i+1}.txt")

            with open(cropped_label_path, 'w') as file:
                yolo_labels = convert_to_yolo_labels(cropped_image_labels, crop_size, crop_size)
                file.writelines(yolo_labels)

In [23]:
split_image_and_labels(image_path, label_path, output_img_folder, output_label_folder, split_width, additional_crops)
 

# Full code

In [1]:
import os
from PIL import Image

def process_image_and_labels(image_path, label_path, patch_width, patch_height, img_output_folder, label_output_folder):
    """
    Process the image and labels, splitting the image into patches and adjusting the labels accordingly.

    :param image_path: Path to the original image file.
    :param label_path: Path to the YOLOv5 label file.
    :param patch_width: The width of each image patch.
    :param patch_height: The height of each image patch.
    :param output_folder: Folder where the outputs will be saved.
    """

    # Create output directories if they don't exist
#     os.makedirs(output_folder, exist_ok=True)
#     img_output_folder = os.path.join(output_folder, "images")
#     label_output_folder = os.path.join(output_folder, "labels")
    os.makedirs(img_output_folder, exist_ok=True)
    os.makedirs(label_output_folder, exist_ok=True)

    # Open the image and get its size
    img = Image.open(image_path)
    img_width, img_height = img.size

    # Determine the number of patches and the overlap required
    cols = (img_width // patch_width) + (1 if img_width % patch_width > 0 else 0)
    rows = (img_height // patch_height) + (1 if img_height % patch_height > 0 else 0)
    overlap_x = (cols * patch_width - img_width) / (cols - 1) if cols > 1 else 0
    overlap_y = (rows * patch_height - img_height) / (rows - 1) if rows > 1 else 0

    # Load the labels from the file
    if label_path:
        with open(label_path, 'r') as file:
            labels = [line.strip().split() for line in file.readlines()]

    # Function to recalculate labels for a patch
    def recalculate_labels_for_patch(old_labels, patch_coords):
        new_labels = []
        
        patch_x, patch_y, patch_right, patch_lower = patch_coords

        for label in old_labels:
            # Extract data from the old label
            category, x_center, y_center, width, height = label
            x_center, y_center, width, height = map(float, [x_center, y_center, width, height])

            # Calculate the absolute coordinates in the original image
            box_x_center = x_center * img_width
            box_y_center = y_center * img_height
            box_width = width * img_width
            box_height = height * img_height
            box_xmin = box_x_center - (box_width / 2)
            box_ymin = box_y_center - (box_height / 2)
            box_xmax = box_x_center + (box_width / 2)
            box_ymax = box_y_center + (box_height / 2)

            # Check if the bounding box falls within the patch
            if box_xmin < patch_right and box_xmax > patch_x and box_ymin < patch_lower and box_ymax > patch_y:
                # Calculate new coordinates in relation to the patch
                new_x_center = max(0, min(1, (box_x_center - patch_x) / patch_width))
                new_y_center = max(0, min(1, (box_y_center - patch_y) / patch_height))
                new_width = box_width / patch_width
                new_height = box_height / patch_height

                # Construct the new label
                new_label = [category, str(new_x_center), str(new_y_center), str(new_width), str(new_height)]
                new_labels.append(new_label)

        return new_labels

    # Iterate over the image and create patches
    for i in range(rows):
        for j in range(cols):
            # Calculate patch coordinates, considering the overlap
            patch_x = j * patch_width - j * overlap_x
            patch_y = i * patch_height - i * overlap_y
            patch_right = patch_x + patch_width
            patch_lower = patch_y + patch_height

            # Correct the coordinates if they are out of bounds
            patch_x, patch_y = max(0, patch_x), max(0, patch_y)
            patch_right, patch_lower = min(img_width, patch_right), min(img_height, patch_lower)

            # Crop the image based on the patch coordinates
            patch = img.crop((patch_x, patch_y, patch_right, patch_lower))
            patch_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_patch_{i}_{j}.png"
            patch.save(os.path.join(img_output_folder, patch_filename))

            # Calculate the new labels for the patch
            #         check the first label isnt empty
#             print(old_labels)
#             print(len(old_labels))
#             print(len(old_labels[0]))
            if label_path:
                new_labels = []
                if len(labels) != 0:
                    new_labels = recalculate_labels_for_patch(labels, (patch_x, patch_y, patch_right, patch_lower))

                
                label_filename = f"{os.path.splitext(os.path.basename(label_path))[0]}_patch_{i}_{j}.txt"
                with open(os.path.join(label_output_folder, label_filename), 'w') as file:
                    for new_label in new_labels:
                        file.write(' '.join(new_label) + '\n')

# Usage
# process_image_and_labels(
#     "path/to/your/image.jpg", 
#     "path/to/your/labels.txt", 
#     640,  # Patch width
#     640,  # Patch height
#     "path/to/your/output/folder"
# )


In [2]:
import os
import random
from PIL import Image

def random_crops_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops=0):
    # Ensure output directories exist
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    # Open the original image and get its dimensions
    original_img = Image.open(image_path)
    original_width, original_height = original_img.size

    # Function to create random crops
    def create_random_crops(bbox, num_crops):
        crops = []
        orig_left, orig_top, width, height = bbox
        print(bbox)
        for _ in range(num_crops):
            crop_left = random.randint(max(0, int(orig_left + width - split_width)), int(orig_left))
            crop_top = random.randint(max(0, int(orig_top + height - split_height)), int(orig_top))

            crop_left = min(crop_left, original_width - split_width)
            crop_top = min(crop_top, original_height - split_height)

            crop_right = crop_left + split_width
            crop_bottom = crop_top + split_height

            crops.append((crop_left, crop_top, crop_right, crop_bottom, original_img.crop((crop_left, crop_top, crop_right, crop_bottom))))

        return crops

    # Read original labels
    with open(label_path, 'r') as file:
        labels = file.readlines()

    # Process labels and prepare them for comparison
    bboxes = []

    if len(labels) != 0:
        for label in labels:

            parts = label.strip().split()
            class_id, x_center, y_center, width, height = parts

            abs_x_center = float(x_center) * original_width
            abs_y_center = float(y_center) * original_height
            abs_width = float(width) * original_width
            abs_height = float(height) * original_height

            orig_left = max(0, abs_x_center - abs_width / 2)
            orig_top = max(0, abs_y_center - abs_height / 2)

            # Store the class ID and bounding box coordinates for later use
            bboxes.append((class_id, orig_left, orig_top, abs_width, abs_height))

    def bbox_intersects_crop(bbox, crop_box):
        # Unpack the bounding box and crop box coordinates
        _, orig_left, orig_top, abs_width, abs_height = bbox
        crop_left, crop_top, crop_right, crop_bottom = crop_box

        # Calculate the original bounding box corners
        orig_right = orig_left + abs_width
        orig_bottom = orig_top + abs_height

        # Calculate the intersection coordinates (max of left coordinates and min of right coordinates)
        inter_left = max(orig_left, crop_left)
        inter_top = max(orig_top, crop_top)
        inter_right = min(orig_right, crop_right)
        inter_bottom = min(orig_bottom, crop_bottom)

        # Calculate the areas
        orig_area = abs_width * abs_height
        inter_width = max(0, inter_right - inter_left)  # ensure width isn't negative
        inter_height = max(0, inter_bottom - inter_top)  # ensure height isn't negative
        inter_area = inter_width * inter_height

        # Check if at least 80% of the bounding box is within the cropped area
        return inter_area >= 0.8 * orig_area and inter_area > 0

    # Create additional crops for each object
    for idx, bbox in enumerate(bboxes):
        class_id, orig_left, orig_top, abs_width, abs_height = bbox
        crops = create_random_crops((orig_left, orig_top, abs_width, abs_height), additional_crops)

        for count, (crop_left, crop_top, crop_right, crop_bottom, crop) in enumerate(crops):
            crop_box = (crop_left, crop_top, crop_right, crop_bottom)

            intersecting_labels = []
            for other_bbox in bboxes:
                if bbox_intersects_crop(other_bbox, crop_box):
                    # Calculate new bounding box for label within cropped image
                    other_class_id, other_orig_left, other_orig_top, other_abs_width, other_abs_height = other_bbox
                    new_left = (other_orig_left - crop_left) / split_width
                    new_top = (other_orig_top - crop_top) / split_height
                    new_width = other_abs_width / split_width
                    new_height = other_abs_height / split_height

                    new_x_center = new_left + new_width / 2
                    new_y_center = new_top + new_height / 2

                    # Clamp the values between 0 and 1
                    new_x_center = max(0, min(1, new_x_center))
                    new_y_center = max(0, min(1, new_y_center))
                    new_width = max(0, min(1, new_width))
                    new_height = max(0, min(1, new_height))

                    new_label = f"{other_class_id} {new_x_center} {new_y_center} {new_width} {new_height}\n"
                    intersecting_labels.append(new_label)

            if intersecting_labels:
                # Save the cropped image and labels if any bounding box intersects
                crop_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{idx}_crop_{count}.png"
                crop.save(os.path.join(output_img_folder, crop_filename))

                crop_label_filename = f"{os.path.splitext(os.path.basename(label_path))[0]}_{idx}_crop_{count}.txt"
                with open(os.path.join(output_label_folder, crop_label_filename), 'w') as f:
                    f.writelines(intersecting_labels)  # Write all intersecting labels to the file

# After defining the function, you can call it with the appropriate parameters as per your use case.


In [3]:
# output_images_folder = 'cropped/images'
# output_labels_folder = 'cropped/labels'
# image_path = "images/20220329_C2_P01t001.png"
# label_path = "labels/20220329_C2_P01t001.txt"
# split_width = split_height = 640 
# additional_crops = 1

# # process_image_and_labels(image_path, label_path, split_width, split_width, output_images_folder, output_labels_folder)

# num_additional_crops = 1  # for example
# split_image_and_labels('example.jpg', 'example.txt', 640, 640, output_images_folder, output_labels_folder, additional_crops=num_additional_crops)

# random_crops_and_labels(image_path, label_path, split_width, split_width, output_images_folder, output_labels_folder, additional_crops=num_additional_crops)

In [3]:
import os

def process_directory(image_folder, label_folder, output_img_folder, output_label_folder, split_width, split_height, additional_crops=0):
    """
    Process all image files in the specified directory with their corresponding label files.

    :param image_folder: Path to the folder containing image files.
    :param label_folder: Path to the folder containing label files.
    :param output_img_folder: Path to the folder where segmented images will be saved.
    :param output_label_folder: Path to the folder where new label files will be saved.
    :param split_width: Width of the split images.
    :param split_height: Height of the split images.
    :param additional_crops: Number of additional random crops around each object (default is 0).
    """

    # Create output folders if they don't exist
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    # Retrieve all image files in the specified directory
    image_files = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for image_filename in image_files:
        base_filename = os.path.splitext(image_filename)[0]
        label_filename = f"{base_filename}.txt"

        # Construct paths to the current image and label file
        image_path = os.path.join(image_folder, image_filename)
        label_path = os.path.join(label_folder, label_filename)

        # Check if the corresponding label file exists
        if not os.path.isfile(label_path):
            print(f"Warning: Corresponding label file for {image_filename} not found.")
            label_path = None
        
        print(image_path)
        # Process the current image and label file
#         split_image_and_labels(image_path, label_path, split_width, split_height, output_img_folder, output_label_folder, additional_crops)
        process_image_and_labels(image_path, label_path, split_width, split_height, output_images_folder, output_labels_folder)
        
        # commented out as not sure useful
        # random_crops_and_labels(image_path, label_path, split_width, split_height, output_images_folder, output_labels_folder, additional_crops=additional_crops)

    print("Processing completed.")

# Usage:
# input_images_folder = 'path/to/input/images'
# input_labels_folder = 'path/to/input/labels'
# output_images_folder = 'path/to/output/images'
# output_labels_folder = 'path/to/output/labels'
# split_width = 640
# split_height = 640
# additional_crops = 3  # Optional

dataset = "20231110"
# dataset = "20240417"

user = "Scott"

output_images_folder = "E:/Scott/Data/"+dataset+"/cropped/images"
output_labels_folder = "E:/Scott/Data/"+dataset+"/cropped/labels"

input_images_folder = "E:/Scott/Data/"+dataset+"/images"
input_labels_folder = "E:/Scott/Data/"+dataset+"/"+user+"_labels"
split_width = split_height = 640 
additional_crops = 1
from glob import glob
machine = '/mnt/Raid_partition_1/internal_tmp/brooks/dockerdir/Github/TrainingData/*'
imgsets = glob(machine)
# imgset = '20231110'
# imgset = '20240417'
user = 'Scott'
for imgpath in imgsets:
    input_images_folder = imgpath+'/images
    input_labels_folder = imgpath+'/'+user+'_labels'
    output_images_folder = imgpath+"/"+user+"/cropped/images"
    output_labels_folder = imgpath+"/"+user+"/cropped/labels"
    convert_selections_multiphase(imgpath+'/user_selections/Scott.xml', imgpath+'/images/cell_reigons.xml', imgpath+'/'+user+'_labels', user, imgpath)
    process_directory(input_images_folder, input_labels_folder, output_images_folder, output_labels_folder, split_width, split_height, additional_crops)




E:/Scott/Data/20231110/images\20231110_P01_t001.png
E:/Scott/Data/20231110/images\20231110_P01_t002.png
E:/Scott/Data/20231110/images\20231110_P01_t003.png
E:/Scott/Data/20231110/images\20231110_P01_t004.png
E:/Scott/Data/20231110/images\20231110_P01_t005.png
E:/Scott/Data/20231110/images\20231110_P01_t006.png
E:/Scott/Data/20231110/images\20231110_P01_t007.png
E:/Scott/Data/20231110/images\20231110_P01_t008.png
E:/Scott/Data/20231110/images\20231110_P01_t009.png
E:/Scott/Data/20231110/images\20231110_P01_t010.png
E:/Scott/Data/20231110/images\20231110_P01_t011.png
E:/Scott/Data/20231110/images\20231110_P01_t012.png
E:/Scott/Data/20231110/images\20231110_P01_t013.png
E:/Scott/Data/20231110/images\20231110_P01_t014.png
E:/Scott/Data/20231110/images\20231110_P01_t015.png
E:/Scott/Data/20231110/images\20231110_P01_t016.png
E:/Scott/Data/20231110/images\20231110_P01_t017.png
E:/Scott/Data/20231110/images\20231110_P01_t018.png
E:/Scott/Data/20231110/images\20231110_P01_t019.png
E:/Scott/Dat

In [75]:
import os
import shutil
import random
import glob

# Specify the paths to your source "images" and "labels" folders
dataset = "20231110"
dataset = "20240417"

source_images_folder = "E:/Scott/Data/"+dataset+"/cropped/images"
source_labels_folder = "E:/Scott/Data/"+dataset+"/cropped/labels"

# Specify the paths to your target "training" and "validation" folders
target_training_folder = "E:/Scott/Data/"+dataset+"/cropped/training"
target_validation_folder = "E:/Scott/Data/"+dataset+"/cropped/validation"

# Define the split ratio (e.g., 80% training, 20% validation)
split_ratio = 0.8

# Create the target "training" and "validation" folders if they don't exist
os.makedirs(target_training_folder, exist_ok=True)
os.makedirs(target_validation_folder, exist_ok=True)

# List all image files in the source "images" folder using glob
image_files = glob.glob(os.path.join(source_images_folder, "*.png"))

# Randomly shuffle the image files to ensure a random split
random.shuffle(image_files)

# Calculate the number of files for training based on the split ratio
num_training = int(len(image_files) * split_ratio)

# Split the image files into training and validation sets
training_images = image_files[:num_training]
validation_images = image_files[num_training:]

def copy_image_and_label_files(source_paths, target_image_folder, target_label_folder):
    for source_path in source_paths:
        # Determine the relative path from the source folder to the source image file
        relative_path = os.path.relpath(source_path, source_images_folder)
        
        # Construct the target image path in the target image folder
        target_image_path = os.path.join(target_image_folder, relative_path)
        
        # Construct the target label path in the target label folder
        target_label_path = os.path.join(target_label_folder, relative_path.replace(".png", ".txt"))
        
        # Create any necessary subdirectories in the target folders
        os.makedirs(os.path.dirname(target_image_path), exist_ok=True)
        os.makedirs(os.path.dirname(target_label_path), exist_ok=True)
        
        # Copy the image file to the target image folder
        shutil.copy(source_path, target_image_path)
        
        # Copy the label file from the source "labels" folder to the target label folder
        label_file = os.path.join(source_labels_folder, relative_path.replace(".png", ".txt"))
        shutil.copy(label_file, target_label_path)

# Copy training images and labels to the training folder
copy_image_and_label_files(training_images, os.path.join(target_training_folder, "images"), os.path.join(target_training_folder, "labels"))

# Copy validation images and labels to the validation folder
copy_image_and_label_files(validation_images, os.path.join(target_validation_folder, "images"), os.path.join(target_validation_folder, "labels"))

print("Data split and copied into training and validation sets.")


Data split and copied into training and validation sets.


In [5]:
import os
import shutil
import glob

# Specify the paths to your source "images" and "labels" folders
source_images_folder = "cropped/images"
source_labels_folder = "cropped/labels"

# Specify the paths to your target "training" and "validation" folders
target_training_folder = "cropped/training"
target_validation_folder = "cropped/validation"

# Define the list of substrings for training and validation images
training_substrings = ["20220329_C3_","20220329_C2_", "20220727_C1_"]
validation_substrings = ["20220727_C2_"]

# Create the target "training" and "validation" folders if they don't exist
os.makedirs(target_training_folder, exist_ok=True)
os.makedirs(target_validation_folder, exist_ok=True)

# List all image files in the source "images" folder using glob
image_files = glob.glob(os.path.join(source_images_folder, "*.png"))

# Initialize lists to store training and validation images
training_images = []
validation_images = []

# Iterate through image files and filter based on substrings
for image_file in image_files:
    if any(substring in image_file for substring in training_substrings):
        training_images.append(image_file)
    elif any(substring in image_file for substring in validation_substrings):
        validation_images.append(image_file)

def copy_image_and_label_files(source_paths, target_image_folder, target_label_folder):
    for source_path in source_paths:
        # Determine the relative path from the source folder to the source image file
        relative_path = os.path.relpath(source_path, source_images_folder)

        # Construct the target image path in the target image folder
        target_image_path = os.path.join(target_image_folder, relative_path)

        # Construct the target label path in the target label folder
        target_label_path = os.path.join(target_label_folder, relative_path.replace(".png", ".txt"))

        # Create any necessary subdirectories in the target folders
        os.makedirs(os.path.dirname(target_image_path), exist_ok=True)
        os.makedirs(os.path.dirname(target_label_path), exist_ok=True)

        # Copy the image file to the target image folder
        shutil.copy(source_path, target_image_path)

        # Copy the label file from the source "labels" folder to the target label folder
        label_file = os.path.join(source_labels_folder, relative_path.replace(".png", ".txt"))
        shutil.copy(label_file, target_label_path)

# Copy training images and labels to the training folder
copy_image_and_label_files(training_images, os.path.join(target_training_folder, "images"), os.path.join(target_training_folder, "labels"))

# Copy validation images and labels to the validation folder
copy_image_and_label_files(validation_images, os.path.join(target_validation_folder, "images"), os.path.join(target_validation_folder, "labels"))

print("Data split and copied into training and validation sets.")


Data split and copied into training and validation sets.


In [6]:
import os

def verify_labels(directory):
    """
    Check YOLOv5 label files for out-of-bounds or incomplete coordinates.

    :param directory: Directory containing label files.
    """
    # List to store reports of files with issues
    issues_report = []

    # Iterate through each file in the directory
    for filename in os.listdir(directory):
        # Check if the current file is a text file (assumed label file)
        if filename.endswith('.txt'):
            filepath = os.path.join(directory, filename)
            
            with open(filepath, 'r') as file:
                lines = file.readlines()

                for line_number, line in enumerate(lines, start=1):
                    line_content = line.strip()
                    
                    # Skip empty lines
                    if not line_content:
                        continue

                    try:
                        # Parse values from the line
                        parts = line_content.split()
                        if len(parts) != 5:
                            raise ValueError("Incomplete coordinates")

                        category, x, y, width, height = parts
                        # Convert string coordinates to float
                        x, y, width, height = map(float, (x, y, width, height))

                        # Check if coordinates are out of bounds
                        if not (0 <= x <= 1 and 0 <= y <= 1 and 0 <= width <= 1 and 0 <= height <= 1):
                            raise ValueError("Out-of-bounds coordinates")

                    except ValueError as e:
                        # Report the issue with specific details (filename, line number, issue)
                        issue_detail = f"{filename}, line {line_number}: {str(e)}"
                        issues_report.append(issue_detail)

    return issues_report


# Usage
directory_path = 'cropped/labels'
issues = verify_labels(directory_path)

if issues:
    print("Issues found in label files:")
    for issue in issues:
        print(issue)
else:
    print("No issues found in label files.")


No issues found in label files.
