# Imports & Variables

In [None]:
import os
import random
from PIL import Image
import shutil

In [None]:
#Will be done for train/test/val
original_labels_dir = "D:/Datasets/YOLO_Dataset/train/labels"
original_images_dir = "D:/Datasets/YOLO_Dataset/train/images"

nodule_dir = "D:/Datasets/YOLO_Caps/train/nodule"
non_nodule_dir = "D:/Datasets/YOLO_Caps/train/non-nodule"

nodule_output_folder = "D:/Datasets/YOLO_Caps/train/cropped_nodule"
non_nodule_output_folder = "D:/Datasets/YOLO_Caps/train/cropped_non_nodule"


# Copy all Images into the YOLO_Caps Folder in train/test/val folder

In [None]:
# Ensure target directories exist
os.makedirs(nodule_dir, exist_ok=True)
os.makedirs(non_nodule_dir, exist_ok=True)

# Loop through all label text files in the label folder
for label_file in os.listdir(original_labels_dir):
    if label_file.endswith(".txt"): # Process only text files
        label_path = os.path.join(original_labels_dir, label_file)
        
        # Check if the file is empty
        with open(label_path, "r") as file:
            content = file.read().strip()
        
        # Find the corresponding image file
        image_name = label_file.replace(".txt", ".jpeg")  # or .jpg depending on the image format
        image_path = os.path.join(original_images_dir, image_name)
        
        # Copy the image to the appropriate folder
        if os.path.exists(image_path):
            if content: # File contains data (nodule)
                shutil.copy(image_path, nodule_dir)
            else:   # Empty file (Non-Nodule)
                shutil.copy(image_path, non_nodule_dir)

print("Kopieren abgeschlossen.")


Kopieren abgeschlossen.


        non-nodule  	nodule
    test    2670		2700
    val     2654		2714
    train   21520		21430


# Crop Images based on nodules -> crop random non - nodule image 

In [None]:
# Function to crop and resize an image based on a bounding box
def crop_and_resize(image, bbox, target_size=128):
    x_center, y_center, width, height = bbox
    # Calculate the maximum side length of the bounding box
    max_side = max(width, height)

    # Adjust the center and side lengths based on the image size (512x512)
    x_center=x_center*512
    y_center=y_center*512
    max_side=max_side*512

    # Calculate the new square region of interest (ROI)
    x_min = int(x_center - max_side / 2)
    y_min = int(y_center - max_side / 2)
    x_max = int(x_center + max_side / 2)
    y_max = int(y_center + max_side / 2)

    # Ensure the ROI stays within the bounds of the image
    x_min = max(0, x_min)
    y_min = max(0, y_min)
    x_max = min(image.width, x_max)
    y_max = min(image.height, y_max)

    # Crop the image using the calculated ROI
    cropped_image = image.crop((x_min, y_min, x_max, y_max))

    # Resize the cropped image to the target size
    cropped_image = cropped_image.resize((target_size, target_size), Image.LANCZOS)

    # Return the cropped and resized image
    return cropped_image

In [None]:
# Function to load bounding boxes from a YOLO-style text file
def load_bboxes(label_path):
    bboxes = []
    with open(label_path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 5:
                # YOLO Format: class_id x_center y_center width height
                class_id, x_center, y_center, width, height = map(float, parts)
                bboxes.append((x_center, y_center, width, height))
    return bboxes

In [None]:
# Counter for non-nodule images
non_nodule_counter = 0

# Ensure the output folders exist
os.makedirs(nodule_output_folder, exist_ok=True)
os.makedirs(non_nodule_output_folder, exist_ok=True)

# Loop through all nodule images
for nodule_image_name in os.listdir(nodule_dir):
    if nodule_image_name.endswith(".jpeg"):
        # Load the nodule image
        nodule_image_path = os.path.join(nodule_dir, nodule_image_name)
        nodule_image = Image.open(nodule_image_path)

        # Load the corresponding label file
        label_path = os.path.join(original_labels_dir, nodule_image_name.replace(".jpeg", ".txt"))
        
        # Load bounding boxes from the label 
        bboxes = load_bboxes(label_path)

        # Loop through each bounding box
        for i, bbox in enumerate(bboxes):
            # Crop and resize the nodule image based on the bounding box
            cropped_nodule_image = crop_and_resize(nodule_image, bbox)

            # Save the cropped nodule image
            nodule_filename = f"n{nodule_image_name}"
            cropped_nodule_image.save(os.path.join(nodule_output_folder, nodule_filename))


            # Select a random non-nodule image
            random_non_nodule_image_name = random.choice(os.listdir(non_nodule_dir))
            if random_non_nodule_image_name.endswith(".jpeg"):
                random_non_nodule_image_path = os.path.join(non_nodule_dir, random_non_nodule_image_name)
                random_non_nodule_image = Image.open(random_non_nodule_image_path)
                
                # Crop and resize the random non-nodule image at the same position as nodule image
                cropped_non_nodule_image = crop_and_resize(random_non_nodule_image, bbox)

                # Save the cropped non-nodule image
                non_nodule_filename = f"nn{random_non_nodule_image_name.replace('.jpeg', f'_{non_nodule_counter}.jpeg')}"
                cropped_non_nodule_image.save(os.path.join(non_nodule_output_folder, non_nodule_filename))
                non_nodule_counter += 1
