In [2]:
import os
from ultralytics import SAM
import matplotlib.pyplot as plt
import glob
import cv2
import numpy as np
from PIL import Image
from ultralytics.data.utils import visualize_image_annotations

In [13]:
# Class folders should be organized as follows:
#
# raw/
# └── screwdriver_kaggle/
#    ├── screwdriver/
#    │   ├── images/
#    │   │   ├── 1.jpg
#    │   │   └── ...
#    │   ├── labels/
#    │   │   ├── 1.txt
#    │   │   └── ...
#    │   ├── masks/
#    │   │   ├── 1_mask.png
#    │   │   └── ...  
#    ├── hammer/
#    │   └── .../
#    ├── .../
#    └── classes.txt

# Darknet files should have the following format:
# class_index x_center y_center width height
#
# e.g.
# 0 0.5 0.5 0.2 0.2

# classes.txt should contain the class names, one per line:
# e.g.
# screwdriver
# hammer
# ...

project_name = 'kaggle_v0'
object_classes = ['screwdriver', 'hammer']
class_dirs = [f'../data/raw/{project_name}/{object_class}/' for object_class in object_classes]
output_dirs = [f'../data/raw/{project_name}/cut_and_paste_root/{object_class}/' for object_class in object_classes]


In [28]:
def resize_images(input_dir, output_dir, size=(640//3, 480//3)):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for img_file in glob.glob(os.path.join(input_dir, '*.jpg')):
        img = Image.open(img_file)
        img = img.resize(size, Image.LANCZOS)
        img.save(os.path.join(output_dir, os.path.basename(img_file)))

In [None]:
for class_dir in class_dirs:
    resize_images(os.path.join(class_dir, 'images'), os.path.join(class_dir, 'images_resized'))

In [None]:
for label_path in glob.glob('../data/raw/kaggle_v0/hammer/labels/*.txt'):
    with open(label_path, 'r') as f:
        line_parts = f.readline().strip().split()
    
    line_parts[0] = '1'  # Change class index to 1 (hammer)
    with open(label_path, 'w') as f:
        f.write(' '.join(line_parts))

In [16]:

def read_darknet_bboxes(bbox_path, image_width, image_height):
	"""Read bounding boxes from darknet format file and convert to pixel coordinates"""
	bboxes = []
	
	with open(bbox_path, 'r') as f:
		for line in f:
			parts = line.strip().split()
			assert len(parts) == 5, f"Invalid bbox line: {line.strip()}"
			
			# Darknet format: class_id x_center y_center width height (normalized)
			class_id = int(parts[0])
			x_center = float(parts[1])
			y_center = float(parts[2])
			width = float(parts[3])
			height = float(parts[4])
			
			# Convert from normalized coordinates to pixel coordinates
			x_center_px = x_center * image_width
			y_center_px = y_center * image_height
			width_px = width * image_width
			height_px = height * image_height
			
			# Convert to x1, y1, x2, y2 format
			x1 = int(x_center_px - width_px / 2)
			y1 = int(y_center_px - height_px / 2)
			x2 = int(x_center_px + width_px / 2)
			y2 = int(y_center_px + height_px / 2)
			
			# Ensure coordinates are within image bounds
			x1 = max(0, min(x1, image_width - 1))
			y1 = max(0, min(y1, image_height - 1))
			x2 = max(0, min(x2, image_width - 1))
			y2 = max(0, min(y2, image_height - 1))
			
			bboxes.append([x1, y1, x2, y2])

	return bboxes

def segment_images_from_folder_bbox(class_dir, output_dir):
    """
    Segments images in the specified folder using the SAM model with bbox information.
    Assumes class_dir contains two folders: 'images_resized' and 'labels'.
    Each image in 'images_resized' should have a corresponding label file in 'labels' with
    bounding box information in the format: x y w h (where x, y are the
    top-left corner coordinates and w, h are the width and height of the bounding box).
    """
    model = SAM("sam2.1_l.pt")

    for image_path, bbox_path in list(zip(sorted(glob.glob(os.path.join(class_dir, 'images', '*'))), 
                                          sorted(glob.glob(os.path.join(class_dir, 'labels', '*.txt'))))):
        image_dimensions = cv2.imread(image_path).shape
        bboxes = read_darknet_bboxes(bbox_path, image_dimensions[1], image_dimensions[0])
        
        # Predict segmentation using the SAM model with bounding box
        results = model(image_path, bboxes=bboxes)
        # visualize_image_annotations(image_path, bbox_path, output_dir)
        for result in results:
            # Access the masks
            masks = results[0].masks

            # Assuming single class segmentation for simplicity, adjust as needed
            mask = masks[0].data.squeeze().cpu().numpy()  # For multi-class, iterate over masks
            mask = mask.astype(np.uint8) # Convert mask to uint8 if needed)
            mask = cv2.resize(mask, (image_dimensions[1], image_dimensions[0]))
            
            image = cv2.imread(image_path)
            image = cv2.resize(image, (image_dimensions[1], image_dimensions[0]))
            
            # Negate the mask and mask the image
            negative_mask = 1-mask
            negative_image = cv2.bitwise_not(image)
            negative_image = cv2.bitwise_and(negative_image, negative_image, mask=mask)
            masked_image = cv2.bitwise_not(negative_image)
            
            # Uncomment to see the negated masks
            # plt.imshow(negative_mask, cmap='gray')
            # plt.axis('off')
            # plt.show()

            # Uncomment to plot the masked images
            # plt.imshow(cv2.cvtColor(masked_image, cv2.COLOR_BGR2RGB))
            # plt.axis('off')
            # plt.show()

            os.mkdir(output_dir) if not os.path.exists(output_dir) else None
            for subdir in ['images', 'masks', 'labels']:
            # Create subdirectories if they do not exist
                subdir_path = os.path.join(output_dir, subdir)
                if not os.path.exists(subdir_path): 
                    os.mkdir(subdir_path)
            
            cv2.imwrite(os.path.join(output_dir, 'images', os.path.basename(image_path)), image)
            # cv2.imwrite(os.path.join(output_dir, os.path.basename(image_path).split('.')[0] + '_masked.jpg'), masked_image)
            cv2.imwrite(os.path.join(output_dir, 'masks', os.path.basename(image_path).split('.')[0] + '_mask.png'), negative_mask*255)
            # os.system(f"cp {bbox_path} {os.path.join(output_dir, 'labels/')}")


In [17]:
os.mkdir(f'../data/raw/{project_name}/cut_and_paste_root/') if not os.path.exists(f'../data/raw/{project_name}/cut_and_paste_root/') else None
os.system(f"cp {f'../data/raw/{project_name}/classes.txt'} {f'../data/raw/{project_name}/cut_and_paste_root/'}")
for class_dir, output_dir in list(zip(class_dirs, output_dirs)):
    segment_images_from_folder_bbox(class_dir, output_dir)


image 1/1 /home/decla_5ay7wb/RIPS25-AnalogDevices-ObjectDetection/src/../data/raw/kaggle_v0/screwdriver/images/053e6bbb-40f916dd-8a37-43d1-ae0f-8466096ea779.jpg: 1024x1024 1 0, 243.0ms
Speed: 5.1ms preprocess, 243.0ms inference, 0.7ms postprocess per image at shape (1, 3, 1024, 1024)

image 1/1 /home/decla_5ay7wb/RIPS25-AnalogDevices-ObjectDetection/src/../data/raw/kaggle_v0/screwdriver/images/05c2f1ad-4f3c093b-5186-483c-a5ed-f1c1b379dcb5.jpg: 1024x1024 1 0, 217.9ms
Speed: 4.2ms preprocess, 217.9ms inference, 0.6ms postprocess per image at shape (1, 3, 1024, 1024)

image 1/1 /home/decla_5ay7wb/RIPS25-AnalogDevices-ObjectDetection/src/../data/raw/kaggle_v0/screwdriver/images/1582021c-6a5b5593-e15a-4beb-8d2c-e71ab1f8aaf9.jpg: 1024x1024 1 0, 217.5ms
Speed: 4.0ms preprocess, 217.5ms inference, 0.7ms postprocess per image at shape (1, 3, 1024, 1024)

image 1/1 /home/decla_5ay7wb/RIPS25-AnalogDevices-ObjectDetection/src/../data/raw/kaggle_v0/screwdriver/images/18d1f472-ed195d37-aeac-4039-81

In [18]:
os.system(f'python Cut-and-Paste/dataset_generator.py --scale --rotation --num 1 ../data/raw/{project_name}/cut_and_paste_root ../data/processed/{project_name}/cut_and_paste_data')

Number of background images : 8128
List of distractor files collected: []
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/1_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/6_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/11_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/16_none.jpg


  backgrounds[i] = Image.fromarray(background_array, 'RGB')
  backgrounds[i] = Image.fromarray(background_array, 'RGB')
  backgrounds[i] = Image.fromarray(background_array, 'RGB')
  backgrounds[i] = Image.fromarray(background_array, 'RGB')
  blurred_img = Image.fromarray(blurred_img, 'RGB')


Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/2_none.jpg


  blurred_img = Image.fromarray(blurred_img, 'RGB')


Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/17_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/3_none.jpg


  blurred_img = Image.fromarray(blurred_img, 'RGB')


Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/12_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/18_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/4_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/13_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/5_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/19_none.jpg


  blurred_img = Image.fromarray(blurred_img, 'RGB')


Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/7_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/20_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/21_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/8_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/14_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/26_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/22_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/9_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/23_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/27_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/24_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/28_none.jpg
Working on ../data/processed/kaggle_v0/cut_and_paste_data/images/10_none.jpg
Wo

0

In [None]:
# Get image size in bytes
print(cv2.imread('../data/processed/screwdriver_kaggle/cut_and_paste_data/train/images/1_box.jpg').shape)

True
(1920, 2560, 3)
