In [1]:
!pip install Pillow



In [2]:
from PIL import Image
import os
import cv2
import glob
import numpy as np
import shutil
import random

In [3]:
def resize_images(input_output_folder, target_size=(640, 640)):
    # Ensure the folder exists
    os.makedirs(input_output_folder, exist_ok=True)

    # List all files in the folder
    input_files = os.listdir(input_output_folder)

    for filename in input_files:
        input_path = os.path.join(input_output_folder, filename)
        output_path = os.path.join(input_output_folder, filename)

        # Open the image
        with Image.open(input_path) as img:
            # Resize the image
            resized_img = img.resize(target_size, resample=Image.LANCZOS)

            # Save the resized image
            resized_img.save(output_path)

    print(f'...finished resizing {len(input_files)} images')

In [4]:
print(f'resizing train images...')
folder = 'datasets/data/train/images'
resize_images(folder)
print(f'resizing train labels...')
folder = 'datasets/data/train/labels'
resize_images(folder)
print(f'resizing test images...')
folder = 'datasets/data/test/images'
resize_images(folder)
print(f'resizing test labels...')
folder = 'datasets/data/test/labels'
resize_images(folder)

resizing train images...
...finished resizing 3916 images
resizing train labels...
...finished resizing 3916 images
resizing test images...
...finished resizing 599 images
resizing test labels...
...finished resizing 599 images


In [5]:
def bounding_box_extraction(image_folder):
    # Iterate over all images in the folder
    for image_path in glob.glob(os.path.join(image_folder, '*.png')):
        # Load the grayscale mask image
        mask_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Check the loaded image
        if mask_image is None:
            print(f"Error: Unable to load the image {image_path}")
            continue

        # Threshold the image: Values greater than or equal to 1 become 255
        _, binary_mask = cv2.threshold(mask_image, 0.5, 255, cv2.THRESH_BINARY)

        # Find contours in the binary mask
        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Output file details
        output_file_path = os.path.splitext(image_path)[0] + '.txt'

        # Write bounding box coordinates to the text file
        with open(output_file_path, 'w') as f:
            for contour in contours:
                flattened_contour = contour.reshape(-1, 2)  # Flatten the nested structure
                
                if len(flattened_contour) >= 2:
                    normalized_contour = (flattened_contour.astype(float) / np.array([mask_image.shape[1], mask_image.shape[0]])).tolist()

                    # Write to the text file
                    f.write("1")
                    for point in normalized_contour:
                        f.write(" {} {}".format(point[0], point[1]))
                    f.write("\n")

        # Deleting the image file
        os.remove(image_path)
    print("...finished")

In [6]:
print("extracting bounding boxes for train...")
bounding_box_extraction('./datasets/data/train/labels')
print("extracting bounding boxes for test...")
bounding_box_extraction('./datasets/data/test/labels')
print("...finished extracting")

extracting bounding boxes for train...
extracting bounding boxes for test...
...finished extracting


In [7]:
def move_related_files(source_dir1, source_dir2, dest_dir1, dest_dir2, percentage):
    files1 = os.listdir(source_dir1)
    files2 = os.listdir(source_dir2)

    os.makedirs(dest_dir1, exist_ok=True)
    os.makedirs(dest_dir2, exist_ok=True)

    num_files_to_move = int(min(len(files1), len(files2)) * (percentage / 100.0))

    files_to_move = random.sample(list(zip(files1, files2)), num_files_to_move)
    #print(files_to_move)

    for file_pair in files_to_move:
        file1, file2 = file_pair
        #print(f"Moving: {file1} and {file2}")
        source_path1 = os.path.join(source_dir1, file1)
        source_path2 = os.path.join(source_dir2, file2)
        dest_path1 = os.path.join(dest_dir1, file1)
        dest_path2 = os.path.join(dest_dir2, file2)

        try:
            shutil.move(source_path1, dest_path1)
            shutil.move(source_path2, dest_path2)
        except Exception as e:
            print(e)

        print(f"Moved: {file1} and {file2}")
        

In [8]:
source_directory1 = "./datasets/data/train/images"
source_directory2 = "./datasets/data/train/labels"
destination_directory1 = "./datasets/data/val/images"
destination_directory2 = "./datasets/data/val/labels"
percentage_to_move = 20  # Adjust this percentage as needed

move_related_files(source_directory1, source_directory2, destination_directory1, destination_directory2, percentage_to_move)


Moved: 01040VANE_seg6_110.png and 01040VANE_seg6_110.txt
Moved: 01016SACH_seg4_076.png and 01016SACH_seg4_076.txt
Moved: 08002CHJE_seg1_088.png and 08002CHJE_seg1_088.txt
Moved: 07043SEME_seg3_150.png and 07043SEME_seg3_150.txt
Moved: 07040DORE_seg1_089.png and 07040DORE_seg1_089.txt
Moved: 08002CHJE_seg2_065.png and 08002CHJE_seg2_065.txt
Moved: 07040DORE_seg1_185.png and 07040DORE_seg1_185.txt
Moved: 07043SEME_seg6_181.png and 07043SEME_seg6_181.txt
Moved: 07040DORE_seg6_167.png and 07040DORE_seg6_167.txt
Moved: 07043SEME_seg1_116.png and 07043SEME_seg1_116.txt
Moved: 08002CHJE_seg3_152.png and 08002CHJE_seg3_152.txt
Moved: 07040DORE_seg6_106.png and 07040DORE_seg6_106.txt
Moved: 01040VANE_seg3_098.png and 01040VANE_seg3_098.txt
Moved: 08002CHJE_seg6_194.png and 08002CHJE_seg6_194.txt
Moved: 08002CHJE_seg5_040.png and 08002CHJE_seg5_040.txt
Moved: 01040VANE_seg1_097.png and 01040VANE_seg1_097.txt
Moved: 08002CHJE_seg1_042.png and 08002CHJE_seg1_042.txt
Moved: 07040DORE_seg2_155.png a