# Convert Binary Mask Dataset to YOLO Segmentation Format

For Recod.ai/LUC - Scientific Image Forgery Detection Competition

https://docs.ultralytics.com/datasets/segment/#ultralytics-yolo-format

### Dataset Structure Expected:
- Images folder: Contains original images
- Masks folder: Contains binary masks (white=forgery, black=background)

### YOLO Segmentation Format:
- Each image has a corresponding .txt file
- Format: <class_id> <x1> <y1> <x2> <y2> ... <xn> <yn>
- Coordinates are normalized (0-1) relative to image dimensions

In [None]:
import os
import cv2
import numpy as np

INPUT_DIR = "/kaggle/input/recodai-luc-scientific-image-forgery-detection"

# Path to the training forged images and masks
train_forged_images_dir = os.path.join(INPUT_DIR, 'train_images', 'forged')
# Corrected path to masks directory
train_masks_dir = os.path.join(INPUT_DIR, 'train_masks')

# Get a list of sample forged images
sample_image_name = os.listdir(train_forged_images_dir)[0]
sample_image_path = os.path.join(train_forged_images_dir, sample_image_name)

# Assuming the mask has the same name as the image
sample_mask_name = sample_image_name.replace('.png', '.npy') # Masks are .npy files
sample_mask_path = os.path.join(train_masks_dir, sample_mask_name)

def load_mask(mask_path: str) -> np.ndarray:
    """Load and binarize mask from .npy file."""
    mask_raw = np.load(mask_path)
    # Binarize the 2D mask: 1 if value > 0, 0 otherwise.
    mask = np.sum(mask_raw, axis=0)
    return mask > 0


# Load the sample image and mask
sample_image = cv2.imread(sample_image_path)
sample_mask = load_mask(sample_mask_path)

print(f"Loaded sample image: {sample_image_name}")
print(f"Loaded sample mask: {sample_mask_name}")

In [None]:
import matplotlib.pyplot as plt

# Display the image and mask side by side
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].imshow(cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB)) # Convert BGR to RGB for matplotlib
axes[0].set_title("Sample Image")
axes[0].axis('off')

axes[1].imshow(sample_mask) # Multiply by 255 is done in the line above
axes[1].set_title("Sample Mask")
axes[1].axis('off')

plt.tight_layout()
plt.show()

In [None]:
def mask_to_yolo_segmentation(mask: np.ndarray, tolerance: float = 0.005) -> list:
    """
    Converts a binary mask to YOLO segmentation format with contour approximation.

    Args:
        mask: A 2D numpy array representing the binary mask (1 for foreground, 0 for background).
        tolerance: The approximation accuracy. A smaller value gives a more precise approximation
                   with more vertices.

    Returns:
        A string in YOLO segmentation format: "<class_id> x1 y1 x2 y2 ...".
        Returns an empty string if no foreground pixels are found.
    """
    # Convert the mask to uint8
    mask_uint8 = mask.astype(np.uint8)
    # Assuming mask is the same size as the image
    height, width = mask.shape

    # Find contours in the mask
    contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    yolo_contours = []

    for contour in contours:
        # Approximate the contour
        epsilon = tolerance * cv2.arcLength(contour, True)
        approx_contour = cv2.approxPolyDP(contour, epsilon, True)

        # Reshape approximated contour for YOLO format
        segmentation = approx_contour.flatten().tolist()

        # Normalize the coordinates
        normalized_segmentation = [
            segmentation[i] / (width if i % 2 == 0 else height) for i in range(len(segmentation))
        ]
        yolo_contours.append(normalized_segmentation)

        # Format the string
        #class_id = 1
        #segmentation_string = " ".join([f"{coord:.6f}" for coord in normalized_segmentation])
        #yolo_format_string += f"{class_id} {segmentation_string}\n"

    return yolo_contours


def format_yolo_segmentation(yolo_contours: list, class_id: int = 0) -> str:
    """
    Formats YOLO segmentation contours into a string.

    Args:
        yolo_contours: A list of YOLO segmentation contours.
        class_id: The class ID to use in the format string.

    Returns:
        A string in YOLO segmentation format: "<class_id> x1 y1 x2 y2 ...".
    """
    yolo_format_string = ""
    for contour in yolo_contours:
        segmentation_string = " ".join([f"{coord:.6f}" for coord in contour])
        yolo_format_string += f"{class_id} {segmentation_string}\n"
    return yolo_format_string.strip() # Remove trailing newline


# Convert the sample mask to YOLO segmentation format
sample_yolo_annotation = mask_to_yolo_segmentation(sample_mask)

print("Sample YOLO Annotation:")
print(format_yolo_segmentation(sample_yolo_annotation))

In [None]:
def draw_yolo_segmentation(image: np.ndarray, yolo_annotation: list, color=(0, 255, 0), thickness=2) -> np.ndarray:
    """
    Draws YOLO segmentation contours on an image.

    Args:
        image: The input image (NumPy array).
        yolo_annotation: A string in YOLO segmentation format.
        color: The color of the contour (B, G, R).
        thickness: The thickness of the contour lines.

    Returns:
        The image with drawn contours.
    """
    h, w, _ = image.shape
    annotated_image = image.copy()
    if not yolo_annotation:
        return annotated_image

    for points in yolo_annotation:
        # Reshape to (n_points, 1, 2) for cv2.polylines
        contour = np.array(points).reshape(-1, 2)
        # Denormalize the coordinates
        denormalized_contour = np.array([[int(p[0] * w), int(p[1] * h)] for p in contour])
        # Draw the contour
        cv2.polylines(annotated_image, [denormalized_contour], isClosed=True, color=color, thickness=thickness)

    return annotated_image

# Draw the YOLO segmentation on the sample image
annotated_sample_image = draw_yolo_segmentation(sample_image, sample_yolo_annotation)

# Display the annotated image and the YOLO annotation side by side
plt.imshow(cv2.cvtColor(annotated_sample_image, cv2.COLOR_BGR2RGB))
plt.title("Annotated Sample Image")
plt.axis('off')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Define output directories
OUTPUT_DIR = '.'
# OUTPUT_DIR = './yolo_dataset'
os.makedirs(os.path.join(OUTPUT_DIR, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'images', 'val'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'labels', 'val'), exist_ok=True)

# Paths to original data
train_forged_images_dir = os.path.join(INPUT_DIR, 'train_images', 'forged')
train_authentic_images_dir = os.path.join(INPUT_DIR, 'train_images', 'authentic') # Add path to authentic images
train_masks_dir = os.path.join(INPUT_DIR, 'train_masks')

# Get list of forged and authentic image names
forged_image_names = [f for f in os.listdir(train_forged_images_dir) if f.endswith('.png')]
authentic_image_names = [f for f in os.listdir(train_authentic_images_dir) if f.endswith('.png')] # Get authentic image names

# Split forged data into training and validation sets
train_forged_image_names, val_forged_image_names = train_test_split(forged_image_names, test_size=0.2, random_state=42)

# Split authentic data into training and validation sets
train_authentic_image_names, val_authentic_image_names = train_test_split(authentic_image_names, test_size=0.2, random_state=42)


# Prepare data for processing in a single loop
dataset_info = []
# Add forged images
for img_name in train_forged_image_names:
    dataset_info.append((img_name, 'train', 'forged'))
for img_name in val_forged_image_names:
    dataset_info.append((img_name, 'val', 'forged'))

# Add authentic images
for img_name in train_authentic_image_names:
    dataset_info.append((img_name, 'train', 'authentic'))
for img_name in val_authentic_image_names:
    dataset_info.append((img_name, 'val', 'authentic'))


print("Processing dataset...")
for image_name, dataset_type, image_type in tqdm(dataset_info):
    if image_type == 'forged':
        image_path = os.path.join(train_forged_images_dir, image_name)
        mask_name = image_name.replace('.png', '.npy')
        mask_path = os.path.join(train_masks_dir, mask_name)

        if not os.path.exists(mask_path):
            print(f"Missing mask for {mask_path}")
            continue

        mask = load_mask(mask_path)
        yolo_annotation = mask_to_yolo_segmentation(mask, tolerance=0.005)

        # Save annotation
        label_filename = f'{image_type}_{image_name.replace(".png", ".txt")}' # Add image_type prefix
        label_path = os.path.join(OUTPUT_DIR, 'labels', dataset_type, label_filename)
        with open(label_path, 'w') as fl:
            fl.write(format_yolo_segmentation(yolo_annotation))

    elif image_type == 'authentic':
        image_path = os.path.join(train_authentic_images_dir, image_name)
        # No mask or label file for authentic images

    # Copy image to output directory
    image = cv2.imread(image_path)
    output_image_name = f'{image_type}_{image_name}' # Add image_type prefix
    output_image_path = os.path.join(OUTPUT_DIR, 'images', dataset_type, output_image_name)
    cv2.imwrite(output_image_path, image)


print("Dataset conversion to YOLO format complete.")

In [None]:
import yaml

# Define the content of the dataset.yaml file
dataset_yaml_content = {
    # Absolute path when the generated dataset is added to your notebook
    'path': "/kaggle/input/forgerydetection-yolo-segmentation",
    'train': 'images/train',  # Relative path to the training images folder
    'val': 'images/val',      # Relative path to the validation images folder
    'nc': 1,                  # Number of classes
    'names': ['forged']       # Class names
}

# Define the path to save the dataset.yaml file
dataset_yaml_path = os.path.join(OUTPUT_DIR, 'dataset.yaml')

# Write the content to the dataset.yaml file
with open(dataset_yaml_path, 'w') as f:
    yaml.dump(dataset_yaml_content, f, default_flow_style=None)

print(f"Dataset YAML file created at: {dataset_yaml_path}")