In [2840]:
import os
import random
import json
import numpy as np
import cv2
from PIL import Image, ImageDraw
from pycocotools import mask
from skimage import measure

In [2841]:
# Define the paths to the three folders containing the images
fg_folders = [
    ("/home/aldo/Documents/Test_Object_Detection/Starbucks_fotos_resized/","coffee" ),
    ("/home/aldo/Documents/Test_Object_Detection/Ciel_fotos_resized/", "water"),
    ("/home/aldo/Documents/Test_Object_Detection/Cel_fotos_resized/", "cellphone"),
]
bg_folder = "/home/aldo/Documents/Test_Object_Detection/backgrounds/"

In [2842]:
#put manually all the categories
annotations_dict = {
    "coffee": 0,
    "water": 1,
    "cellphone": 2
}
categories=[]

for categor in annotations_dict:
    categories.append({"id": annotations_dict[categor],"name": categor,})
print(categories)

[{'id': 0, 'name': 'coffee'}, {'id': 1, 'name': 'water'}, {'id': 2, 'name': 'cellphone'}]


In [2843]:
# Load the list of files in each of the three folders
fg_files = {}
for folder, category in fg_folders:
    fg_files[category] = os.listdir(folder)

In [2857]:
# Select a random number of objects (0 to 3) to place on the background image
num_objects = random.randint(0, 3)
num_objects

1

In [2858]:
# Select random foreground images from the three folders, with replacement
fg_categories = random.choices(["coffee", "water", "cellphone"], k=num_objects)
fg_files_selected = []
for category in fg_categories:
    fg_files_selected.append([category,random.choice(fg_files[category])])
fg_files_selected

[['coffee', '20230221_123213-transformed.png']]

In [2860]:
# Load the selected foreground images using Pillow
fg_imgs = []
for img in fg_files_selected:
    folder = [f[0] for f in fg_folders if f[1] == img[0]][0]
    fg_imgs.append([img[0],Image.open(folder + img[1]),folder+img[1]])

In [2861]:
# Randomly resize, rotate, and flip the foreground images using Pillow's transform module
for img in fg_imgs:
    fg_img=img[1]
    angle = random.randint(-45, 45)
    scale = random.uniform(0.3, 1.0)

    fg_img = fg_img.rotate(angle, resample=Image.BICUBIC, expand=True)
    fg_img = fg_img.resize((int(fg_img.width * scale), int(fg_img.height * scale)))

    img[1] = fg_img


In [2862]:
for img in fg_imgs:
    fg_img=img[1]
    blur_intensity = random.uniform(0, 1.0)*0.01
    kernel_size = (2 * round(blur_intensity * 20) + 1, 2 * round(blur_intensity * 20) + 1)
    fg_arr = np.array(fg_img)
    fg_arr = cv2.GaussianBlur(fg_arr, kernel_size, 0)
    img[1] = Image.fromarray(fg_arr)
    

In [2863]:
# Load the background image using Pillow
bg_files = os.listdir(bg_folder)
bg_file = random.choice(bg_files)
bg_img = Image.open(bg_folder + bg_file)


In [2864]:
# Define the maximum overlap as a percentage
max_overlap_pct = 10

# Calculate the maximum overlap area
max_overlap_area = (bg_img.width * bg_img.height) * (max_overlap_pct / 100)

# Define an array to keep track of occupied areas
occupied = np.zeros((bg_img.height, bg_img.width))

images=[]
annotations=[]

img_id=0
anno_id=0


for img in fg_imgs:
    fg_img=img[1]
    


    # Load the image with alpha channel
    image = cv2.imread(img[2], cv2.IMREAD_UNCHANGED)

    # Extract the alpha channel and convert it to a binary mask
    alpha = image[:,:,3]
    mask = alpha > 0

    # Find contours and polygonal approximations
    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Format the segmentation coordinates in COCO format
    segmentation = []
    aux_segmentation = []
    for contour in contours:
        polygon = contour.flatten().tolist()
        if len(polygon) % 2 != 0:
            print("Error: polygon has an odd number of coordinates")
            continue
        polygon = np.array(polygon).reshape((-1, 2))
        aux_segmentation.append(polygon)
    
    for sublist in aux_segmentation[0]:
        for item in sublist:
            segmentation.append(item)


    # Calculate the maximum allowed position for the top-left corner
    max_x = bg_img.width - fg_img.width
    max_y = bg_img.height - fg_img.height
    max_area = fg_img.width * fg_img.height

        # Generate a random location until an unoccupied area is found that meets the overlap limit

    for i in range (10):
        x = random.randint(0, max_x)
        y = random.randint(0, max_y)

        # Calculate the overlap area
        overlap_area = np.sum(occupied[y:y+fg_img.height, x:x+fg_img.width])

        # Check if the area is unoccupied and the overlap limit is not exceeded
        if (max_overlap_area - max_area) >= np.sum(occupied):
            break
        if i==10:
            continue

    for i in range(0, len(segmentation)):
        if i % 2:
            i=i+x
        else :
            i=i+y
            
    # Update the occupied array
    occupied[y:y+fg_img.height, x:x+fg_img.width] = 1

    bg_img.paste(fg_img, (x, y), fg_img)
    annotations.append({"id": anno_id,"image_id": img_id,"category_id": annotations_dict[img[0]],"bbox": [x, y, fg_img.width, fg_img.height],"segmentation": segmentation,"area": 45969,"iscrowd": 0})
    anno_id=anno_id+1
    draw = ImageDraw.Draw(bg_img)
    draw.rectangle((x, y, x+fg_img.width, y+fg_img.height), outline='red', width=3)
images.append({"id": img_id, "file_name": bg_file,"height": bg_img.height,"width": bg_img.width})
img_id=img_id+1


In [2874]:
images=[]
annotations=[]

img_id=0
anno_id=0
for j in range(10):
    num_objects = random.randint(0, 3)
    print("number of objects",num_objects)
    # Select random foreground images from the three folders, with replacement
    fg_categories = random.choices(["coffee", "water", "cellphone"], k=num_objects)
    fg_files_selected = []
    for category in fg_categories:
        fg_files_selected.append([category,random.choice(fg_files[category])])
    print("seleccion",fg_files_selected)
    # Load the selected foreground images using Pillow
    fg_imgs = []
    for img in fg_files_selected:
        folder = [f[0] for f in fg_folders if f[1] == img[0]][0]
        fg_imgs.append([img[0],Image.open(folder + img[1]),folder+img[1]])

    # Randomly resize, rotate, and flip the foreground images using Pillow's transform module
    for img in fg_imgs:
        fg_img=img[1]
        angle = random.randint(-45, 45)
        scale = random.uniform(0.3, 1.0)

        fg_img = fg_img.rotate(angle, resample=Image.BICUBIC, expand=True)
        fg_img = fg_img.resize((int(fg_img.width * scale), int(fg_img.height * scale)))

        img[1] = fg_img

    #change bluur randomly
    for img in fg_imgs:
        fg_img=img[1]
        blur_intensity = random.uniform(0, 1.0)*0.01
        kernel_size = (2 * round(blur_intensity * 20) + 1, 2 * round(blur_intensity * 20) + 1)
        fg_arr = np.array(fg_img)
        fg_arr = cv2.GaussianBlur(fg_arr, kernel_size, 0)
        img[1] = Image.fromarray(fg_arr)

    # Load the background image using Pillow
    bg_files = os.listdir(bg_folder)
    bg_file = random.choice(bg_files)
    bg_img = Image.open(bg_folder + bg_file)


    # Define the maximum overlap as a percentage
    max_overlap_pct = 10

    # Calculate the maximum overlap area
    max_overlap_area = (bg_img.width * bg_img.height) * (max_overlap_pct / 100)

    # Define an array to keep track of occupied areas
    occupied = np.zeros((bg_img.height, bg_img.width))



    for img in fg_imgs:
        fg_img=img[1]
        


        # Load the image with alpha channel
        image = cv2.imread(img[2], cv2.IMREAD_UNCHANGED)

        # Extract the alpha channel and convert it to a binary mask
        alpha = image[:,:,3]
        mask = alpha > 0

        # Find contours and polygonal approximations
        contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Format the segmentation coordinates in COCO format
        segmentation = []
        aux_segmentation = []
        for contour in contours:
            polygon = contour.flatten().tolist()
            if len(polygon) % 2 != 0:
                print("Error: polygon has an odd number of coordinates")
                continue
            polygon = np.array(polygon).reshape((-1, 2))
            aux_segmentation.append(polygon)
        polygon_area = cv2.contourArea(aux_segmentation[0])
        for sublist in aux_segmentation[0]:
            for item in sublist:
                segmentation.append(item)


        # Calculate the maximum allowed position for the top-left corner
        max_x = bg_img.width - fg_img.width
        max_y = bg_img.height - fg_img.height
        max_area = fg_img.width * fg_img.height

            # Generate a random location until an unoccupied area is found that meets the overlap limit

        for i in range (10):
            x = random.randint(0, max_x)
            y = random.randint(0, max_y)

            # Calculate the overlap area
            overlap_area = np.sum(occupied[y:y+fg_img.height, x:x+fg_img.width])

            # Check if the area is unoccupied and the overlap limit is not exceeded
            if (max_overlap_area - max_area) >= np.sum(occupied):
                break
            if i==10:
                continue

        for i in range(0, len(segmentation)):
            if i % 2:
                i=i+x
            else :
                i=i+y
                
        # Update the occupied array
        occupied[y:y+fg_img.height, x:x+fg_img.width] = 1

        bg_img.paste(fg_img, (x, y), fg_img)
        annotations.append({"id": anno_id,"image_id": img_id,"category_id": annotations_dict[img[0]],"bbox": [x, y, fg_img.width, fg_img.height],"segmentation": segmentation,"area": polygon_area,"iscrowd": 0})
        anno_id=anno_id+1
        #draw = ImageDraw.Draw(bg_img)
        #draw.rectangle((x, y, x+fg_img.width, y+fg_img.height), outline='red', width=3)
    bg_img.save(str(img_id)+".jpg", quality=100)
    images.append({"id": img_id, "file_name": str(img_id)+".jpg","height": bg_img.height,"width": bg_img.width})
    img_id=img_id+1



number of objects 1
seleccion [['coffee', '20230221_123049-transformed.png']]
number of objects 0
seleccion []
number of objects 0
seleccion []
number of objects 0
seleccion []
number of objects 1
seleccion [['coffee', '20230221_123215-transformed.png']]
number of objects 3
seleccion [['cellphone', '20230221_123134-transformed.png'], ['coffee', '20230221_123049-transformed.png'], ['coffee', '20230221_123218-transformed.png']]
number of objects 3
seleccion [['cellphone', '20230221_123137-transformed.png'], ['water', '20230221_123207-transformed.png'], ['coffee', '20230221_123052-transformed.png']]
number of objects 3
seleccion [['water', '20230221_123149-transformed.png'], ['water', '20230221_123201-transformed.png'], ['coffee', '20230221_123213-transformed.png']]
number of objects 2
seleccion [['water', '20230221_123204-transformed.png'], ['water', '20230221_123200-transformed.png']]
number of objects 3
seleccion [['cellphone', '20230221_123135-transformed.png'], ['cellphone', '2023022

In [2875]:
# Define the COCO dictionary
coco_dict = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}

In [2876]:
coco_dict

{'images': [{'id': 9, 'file_name': '9.jpg', 'height': 480, 'width': 740}],
 'annotations': [{'id': 13,
   'image_id': 9,
   'category_id': 2,
   'bbox': [574, 179, 96, 93],
   'segmentation': [6, 68],
   'area': 0.0,
   'iscrowd': 0},
  {'id': 14,
   'image_id': 9,
   'category_id': 2,
   'bbox': [331, 283, 124, 118],
   'segmentation': [29,
    5,
    28,
    6,
    25,
    6,
    24,
    7,
    23,
    7,
    22,
    8,
    21,
    8,
    16,
    13,
    16,
    14,
    15,
    15,
    15,
    17,
    14,
    18,
    14,
    21,
    13,
    22,
    13,
    26,
    12,
    27,
    12,
    31,
    11,
    32,
    11,
    35,
    10,
    36,
    10,
    40,
    9,
    41,
    9,
    44,
    8,
    45,
    8,
    49,
    7,
    50,
    7,
    53,
    6,
    54,
    6,
    58,
    5,
    59,
    5,
    63,
    4,
    64,
    4,
    67,
    5,
    68,
    5,
    70,
    6,
    71,
    6,
    74,
    7,
    75,
    7,
    76,
    12,
    81,
    13,
    81,
    14,
    82,
    15,
    82,
 