# Dataset generator

In this file, we have the code required to segment a deck of cards on a black background. They are then saved on another directory. 

We start by installing dependencies:

In [1]:
pip install numpy


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


Verify imports:

In [3]:
import os
import numpy as np
import cv2

The only area that we're interested in is the area that belongs to the biggest "blob", hence why we just find the biggest blob of all and ignore the rest.
This is thanks to the background being black to begin with (dataset preparation).

In [17]:

# Function to remove black background and trim the image using OpenCV
def remove_black_background_and_trim(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply a binary threshold to mask out darker areas (tuning this threshold value is key)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    

    # Find all contours (blobs) in the image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Check if any contours were found
    if len(contours) == 0:
        print("No blobs found in the image.")
        return image

    # Find the largest contour based on the contour area
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Create a mask for the largest blob (initially black)
    mask = np.zeros_like(gray)
    
    # Draw the largest contour on the mask
    cv2.drawContours(mask, [largest_contour], -1, 255, thickness=cv2.FILLED)
    
    # Apply the mask to the original image (bitwise AND)
    result = cv2.bitwise_and(image, image, mask=mask)


    # Find the bounding box of the non-zero region in the mask
    x, y, w, h = cv2.boundingRect(mask)
    
    # Crop the image to the bounding box
    cropped_result = result[y:y+h, x:x+w]

    return cropped_result




Then, we just process these images and store them following the convention:

* **c/r/w:** Cylinder / Romb / Wave
* **l/g/d:** Light / Gradient / Dark
* **g/r/v:** Green / Red / Violet
* **1/2/3:** # Of figures in card

In [None]:
# Function to process images from Dataset folder and save to Segmented folder
def process_images(dataset_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # List type of figure in card
    figure = ['c','r','w']
    filling = ['l', 'g', 'd']
    color = ['g','r','v']
    number = ['1','2','3']

    cardnum = 0

    # Loop through all files in the dataset folder
    for filename in sorted(os.listdir(dataset_folder)):
        # Check if the file is an image (you can add more formats if needed)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            # Read the image
            img_path = os.path.join(dataset_folder, filename)
            image = cv2.imread(img_path)

            if image is not None:
                # Process the image to remove black background and trim
                processed_image = remove_black_background_and_trim(image)
                
                # Save the processed image to the output folder
                filename = figure[cardnum % 3] + filling[cardnum//3 % 3] + color[cardnum//9 % 3] + number[cardnum//27] + ".png"
                output_path = os.path.join(output_folder, filename)
                cv2.imwrite(output_path, processed_image)
                print(f"Processed and saved: {output_path}")

        cardnum += 1

# Main execution
if __name__ == "__main__":
    dataset_folder = "Dataset"      # Folder containing the input images
    output_folder = "Segmented"     # Folder to save the processed images

    process_images(dataset_folder, output_folder)