### Imports

In [None]:
import os
import random
import cv2
import numpy as np

import sys
sys.path.append('..')
from global_utils import visualize_image

### Parameter Dashboard

In [None]:
# Parameters
min_image_size = 800        # Minimum image size (pixels)
max_image_size = 1200       # Maximum image size (pixels)

min_component_size = 80     # Minimum component size (pixels)
max_component_size = 160    # Maximum component size (pixels)

uses_per_component = 2      # Number of times each component is used

val_ratio = 0.15            # Validation set ratio (0-1)

# Define the images and labels folder paths
components_dataset = '../dataset/components'
images_folder = os.path.join(components_dataset, 'images')
labels_folder = os.path.join(components_dataset, 'labels')

train_images_folder = os.path.join(components_dataset, 'images/train')
val_images_folder = os.path.join(components_dataset, 'images/val')

# Define the labels folder paths
train_labels_folder = os.path.join(components_dataset, 'labels/train')
val_labels_folder = os.path.join(components_dataset, 'labels/val')

In [None]:
# Check if the train labels folder exists, if not create it
if not os.path.exists(train_labels_folder):
    os.makedirs(train_labels_folder)

# Check if the val labels folder exists, if not create it
if not os.path.exists(val_labels_folder):
    os.makedirs(val_labels_folder)

# Check if the images folder exists, if not create it
if not os.path.exists(images_folder):
    os.makedirs(images_folder)
    os.makedirs(images_folder+"/val")
    os.makedirs(images_folder+"/train")

# Check if the labels folder exists, if not create it
if not os.path.exists(labels_folder):
    os.makedirs(labels_folder)
    os.makedirs(labels_folder+"/val")
    os.makedirs(labels_folder+"/train")


# Get component folders
folders = os.listdir(components_dataset)
folders = [f for f in folders if f not in ['images', 'labels']]
folders.sort()

# Save original folders for later
original_folders = folders.copy()


# Check if the train labels folder exists
if os.path.exists(train_labels_folder):
    # Remove all files in the train labels folder
    for file in os.listdir(train_labels_folder):
        os.remove(os.path.join(train_labels_folder, file))

# Check if the val labels folder exists
if os.path.exists(val_labels_folder):
    # Remove all files in the val labels folder
    for file in os.listdir(val_labels_folder):
        os.remove(os.path.join(val_labels_folder, file))

if os.path.exists(train_images_folder):
    # Remove all files in the train labels folder
    for file in os.listdir(train_images_folder):
        os.remove(os.path.join(train_images_folder, file))

# Check if the val labels folder exists
if os.path.exists(val_images_folder):
    # Remove all files in the val labels folder
    for file in os.listdir(val_images_folder):
        os.remove(os.path.join(val_images_folder, file))


### Dataset Generation

In [None]:
# Total number of components
total_components = 0

# Placed components
placed_components = []

# Image number
image_number = 0

while total_components < uses_per_component*2952: # all components sum to 2952
    if total_components < uses_per_component*(1-val_ratio)*2952:
        folder_type = 'train'
    else:
        folder_type = 'val'
    
    # Output paths
    output_image_path = os.path.join(components_dataset, 'images', folder_type, f'image_{image_number}.jpg')
    output_label_path = os.path.join(components_dataset, 'labels', folder_type, f'image_{image_number}.txt')

    # Create a bigger image
    image_size = (random.randint(min_image_size, max_image_size), random.randint(min_image_size, max_image_size))
    bigger_image = np.zeros((image_size[1], image_size[0], 3), dtype=np.uint8)

    # Add random lines, circles, and rectangles
    for _ in range(random.randint(10, 15)):
        # Randomly select the shape to draw
        shape = random.randint(0, 10)
        if shape <= 6:
            # Draw a line
            x1 = random.randint(0, image_size[0])
            y1 = random.randint(0, image_size[1])
            x2 = random.randint(0, image_size[0])
            y2 = random.randint(0, image_size[1])
            gray = random.randint(175, 255)
            color = (255, 255, 255)
            thickness = random.randint(1, 3)
            cv2.line(bigger_image, (x1, y1), (x2, y2), color, thickness)
        elif shape <= 8:
            # Draw a circle
            x = random.randint(0, image_size[0])
            y = random.randint(0, image_size[1])
            radius = random.randint(10, 50)
            color = (255, 255, 255)
            thickness = random.randint(1, 3)
            cv2.circle(bigger_image, (x, y), radius, color, thickness)
        else:
            # Draw a rectangle
            x1 = random.randint(0, image_size[0])
            y1 = random.randint(0, image_size[1])
            x2 = random.randint(0, image_size[0])
            y2 = random.randint(0, image_size[1])
            color = (255, 255, 255)
            thickness = random.randint(1, 3)
            cv2.rectangle(bigger_image, (x1, y1), (x2, y2), color, thickness)

    # Randomly select number of components to place on the image
    max_num_components_on_image = random.randint(3, 7)
    num_components_on_image = 0
    
    # Randomly shuffle the order of the folders
    random.shuffle(folders)

    # Save component location to prevent overlapping
    component_locations = []

    # Randomly select components from selected folders
    for folder in folders:
        folder_path = os.path.join(components_dataset, folder)
        components = os.listdir(folder_path)
        random.shuffle(components)
        
        # Paste components onto the bigger image and generate labels
        for component in components:
            component_path = os.path.join(folder_path, component)

            # Check how many times this component has been placed
            count = placed_components.count(component_path)

            # If the component has been placed more than 3 times, skip it
            if count >= uses_per_component:
                continue

            # Add the component to the list of placed components
            placed_components.append(component_path)
            num_components_on_image += 1
            total_components += 1

            component_image = cv2.imread(component_path)
            component_image_size = random.randint(min_component_size, max_component_size)
            component_image = cv2.resize(component_image, (component_image_size, component_image_size))

            # Randomly rotate the component (steps of 90 degrees)
            if 'battery' in component_path or 'cap' in component_path or 'dc_volt_src_2' in component_path or 'diode' in component_path or 'inductor' in component_path or 'resistor' in component_path:
                rotation = random.randint(0, 3)
                component_image = np.rot90(component_image, rotation)
            
            # Randomly select position to paste the component
            x = random.randint(0, image_size[0] - component_image.shape[1])
            y = random.randint(0, image_size[1] - component_image.shape[0])

            # Check if the component overlaps with other components
            overlap = False
            if len(component_locations) != 0:
                for loc in component_locations:
                    if (x < loc[0] + loc[2] and x > loc[0] - component_image.shape[1]) and (y < loc[1] + loc[3] and y > loc[1] - component_image.shape[0]):
                        overlap = True
                        break

            if overlap:
                continue

            # Save component location
            component_locations.append((x, y, component_image.shape[1], component_image.shape[0]))
            
            # Paste component onto the bigger image
            bigger_image[y:y+component_image.shape[0], x:x+component_image.shape[1]] = component_image

            # Generate label in YOLO format
            class_label = original_folders.index(folder)
            label = [class_label, 0, 0, 0, 0]

            # Find the bounding box coordinates
            nonzero_pixels = np.nonzero(bigger_image[y:y+component_image.shape[0], x:x+component_image.shape[1]])
            min_x = np.min(nonzero_pixels[1])
            max_x = np.max(nonzero_pixels[1])
            min_y = np.min(nonzero_pixels[0])
            max_y = np.max(nonzero_pixels[0])

            # Adjust the label coordinates based on the bounding box
            label[1] = (x + (min_x + max_x) / 2) / image_size[0]
            label[2] = (y + (min_y + max_y) / 2) / image_size[1]
            label[3] = (max_x - min_x) / image_size[0]
            label[4] = (max_y - min_y) / image_size[1]

            # Create new .txt file to save the label
            with open(output_label_path, 'a') as f:
                f.write(' '.join(map(str, label)) + '\n')

            break  # Only place one component from each folder

        if num_components_on_image >= max_num_components_on_image:
            break

    # Now add noise
    noise = np.random.randint(0, 256, bigger_image.shape)

    # Add noise to the image
    bigger_image = cv2.addWeighted(bigger_image, 0.75, noise, 0.25, 0, dtype=cv2.CV_8U)

    # To grayscale
    bigger_image = cv2.cvtColor(bigger_image, cv2.COLOR_BGR2GRAY)
    
    # Save the output image
    print(f"saving image to: {output_image_path}")
    if uses_per_component % 20 == 0:
        print(f"uses_per_component {uses_per_component} and total_components {total_components}")
    cv2.imwrite(output_image_path, bigger_image)

    image_number += 1

### Show labels

In [None]:
# Get image and label folders
images_folder = os.path.join(components_dataset, 'images', 'val')
labels_folder = os.path.join(components_dataset, 'labels', 'val')

# Randomly select an image
image_files = os.listdir(images_folder)
random.shuffle(image_files)
image_file = image_files[0]


# Get the image and label paths
output_image_path = os.path.join(images_folder, image_file)
output_label_path = os.path.join(labels_folder, image_file.replace('.jpg', '.txt'))

# Visualize the output image
visualize_image(output_image_path, output_label_path)