In [None]:
# @title Image Segmentation

def generate_training_segments(image_name, coordinates, train_size):
    """
    Generates image segments centered around given coordinates, with random
    shifts applied, for training purposes. Returns the cropped segments along
    with shifted and normalized coordinates.

    Parameters:
    -----------
    image_name : str
        The name of the image file to be segmented, located in the
        "Processed images" directory.

    coordinates : list of tuples
        A list of (x, y) coordinates around which the segments will be
        generated.

    train_size : int
        The size (width and height) of each square segment to be generated.

    Returns:
    --------
    tuple
        A tuple containing three lists:
        - segments: A list of cropped image segments (PIL Image objects).
        - shifted_coordinates: A list of tuples representing the shifted
          coordinates relative to each segment, normalized to the segment size.
        - norm_coords: A list of tuples representing the normalized width and
          height of the segment relative to the original image dimensions.
    """

    image = Image.open(f"Processed images/{image_name}")
    width, height = image.size

    segments = []
    shifted_coordinates = []
    norm_coords = []

    half_size = train_size // 2
    max_shift = int(0.35 * train_size)

    for coord in coordinates:
        x, y = coord

        # Randomly shift the coordinate by up to ±25% of the segment size
        x_shift = randint(-max_shift, max_shift)
        y_shift = randint(-max_shift, max_shift)

        shifted_x = x + x_shift
        shifted_y = y + y_shift

        # Define and crop the bounding box for the segment
        left = max(0, shifted_x - half_size)
        upper = max(0, shifted_y - half_size)
        right = min(width, shifted_x + half_size)
        lower = min(height, shifted_y + half_size)
        segment = image.crop((left, upper, right, lower))

        # Calculate the new shifted coordinate relative to the segment
        relative_shifted_x = half_size - x_shift
        relative_shifted_y = half_size - y_shift

        normalized_x = relative_shifted_x / train_size
        normalized_y = relative_shifted_y / train_size

        # Calculate normalized width and height relative to the full image
        normalized_width = train_size / width
        normalized_height = train_size / height

        segments.append(segment)
        shifted_coordinates.append((normalized_x, normalized_y))
        norm_coords.append((normalized_width, normalized_height))

    return segments, shifted_coordinates, norm_coords

In [None]:
# @title Image Augmentation

def random_augmentation(images, coords):
    """
    Applies random augmentations to a list of images and adjusts the associated
    coordinates accordingly. The augmentations include noise addition, brightness
    change, blurring, and rotation.

    Parameters:
    -----------
    images : list of PIL.Image
        A list of images to be augmented.

    coords : list of tuples
        A list of (x, y) coordinates associated with each image, normalized to
        the image size.

    Returns:
    --------
    tuple
        A tuple containing two lists:
        - augmented_images: A list of augmented images (PIL Image objects).
        - augmented_coords: A list of tuples representing the coordinates
          adjusted according to the applied augmentations.
    """

    augmented_images = []
    augmented_coords = []

    for img, (x, y) in zip(images, coords):
        img = img.convert("L")
        img_np = np.array(img)

        # Randomly apply salt/pepper noise
        if choice([True, False]):
            noise_prob = uniform(0.01, 0.05)

            noise = np.random.choice([0, 255, None],
                                     size=img_np.shape,
                                     p=[noise_prob/2,
                                        noise_prob/2,
                                        1-noise_prob])

            # Salt and pepper noise applied at random based on noise mask.
            img_np[noise == 0] = 0
            img_np[noise == 255] = 255

        img_augmented = Image.fromarray(img_np)

        # Randomly apply brightness change
        if choice([True, False]):
            enhancer = ImageEnhance.Brightness(img_augmented)
            factor = uniform(0.3, 3.0)
            img_augmented = enhancer.enhance(factor)

        # Randomly apply light blurring
        if choice([True, False]):
            img_augmented = img_augmented.filter(
                ImageFilter.GaussianBlur(radius=uniform(0.2, 0.7)))

        # Randomly apply rotation (90, 180, or 270 degrees)
        rotation_angle = choice([0, 90, 180, 270])
        if rotation_angle != 0:
            img_augmented = img_augmented.rotate(rotation_angle, expand=True)

            if rotation_angle == 90:
                rotated_x, rotated_y = y, 1 - x
            elif rotation_angle == 180:
                rotated_x, rotated_y = 1 - x, 1 - y
            elif rotation_angle == 270:
                rotated_x, rotated_y = 1 - y, x
            else:
                rotated_x, rotated_y = x, y

            augmented_coords.append((rotated_x, rotated_y))
        else:
            augmented_coords.append((x, y))

        img_augmented = img_augmented.convert("RGB")

        augmented_images.append(img_augmented)

    return augmented_images, augmented_coords

In [None]:
# @title Train/Validation data generating

def generate_train_data(data,
                        classes,
                        encode_type = "Height",
                        augmentation_variations = 6,
                        train_size = 128):
    """
    Generates training and validation datasets by creating image segments,
    applying augmentations, and saving the resulting images and labels.

    Parameters:
    -----------
    data : dict
        A dictionary containing file names as keys, with corresponding
        point cloud data, including 'ids' and 'coordinates'.

    classes : dict
        A dictionary mapping class IDs to class names.

    encode_type : str, optional
        The type of encoding used for the images (e.g., "Height", "Height
        Difference", "Point Count"). Default is "Height".

    augmentation_variations : int, optional
        The number of augmentation variations to apply to each image segment.
        Default is 6.

    train_size : int, optional
        The size (width and height) of the square segments to be generated
        from the images. Default is 128.

    Returns:
    --------
    tuple
        A tuple containing two lists:
        - all_train_images: A list of all training images generated (PIL
          Image objects).
        - all_titles: A list of strings representing the class and coordinates
          for each image.
    """

    all_train_images, all_titles = [], []

    for file_name in list(data.keys()):

        image_name = f"{encode_type} Encoded/{file_name}.png"
        ids = data[file_name]["ids"]
        coordinates = data[file_name]["coordinates"]

        all_norm_val = []
        augment_images = []
        augment_coords = []
        train_ids = []

        for i in range(augmentation_variations):
            seg, shifted, norm_val = generate_training_segments(image_name,
                                                                coordinates,
                                                                train_size)
            augment_images.extend(seg)
            augment_coords.extend(shifted)
            train_ids.extend(ids)
            all_norm_val.extend(norm_val)

        train_images, train_coords = random_augmentation(augment_images,
                                                         augment_coords)

        val_images = seg
        val_coords = shifted
        val_ids = ids

        for i, item in enumerate(train_images):
            item.save(f"dataset/images/train/image{i+1}.jpg")
            x, y = train_coords[i]
            width, height = all_norm_val[i]
            with open(f"dataset/labels/train/image{i+1}.txt", "w") as f:
                f.write(
                    f"{train_ids[i]} {x:.6f} {y:.6f}"
                    f" {width:.6f} {height:.6f} \n")

        for i, item in enumerate(val_images):
            item.save(f"dataset/images/val/image{i+1}.jpg")
            x, y = val_coords[i]
            width, height = all_norm_val[i]
            with open(f"dataset/labels/val/image{i+1}.txt", "w") as f:
                f.write(
                    f"{val_ids[i]} {x:.6f} {y:.6f}"
                    f" {width:.6f} {height:.6f} \n")

        titles = [
            f"{classes[train_ids[i]]} {train_coords[i]}"
            for i in range(len(train_images))
        ]
        all_train_images.extend(train_images)
        all_titles.extend(titles)

    return all_train_images, all_titles