# Artificial dataset generation
Since in this project I wasn't going to take thousands of images of the same playing cards, I only took one and the rest will be generated in this Jupyter notebook

In [1]:
import os
import numpy as np
import cv2
import random

Iterate through every image, make copies of it with several deformations, and store its location in a train.yaml file for YOLO to train with. Original pictures are saved as validation.

In [2]:
def add_salt_and_pepper_noise(image, prob):
    noisy_image = np.copy(image)
    black = 0
    white = 255
    probs = np.random.rand(image.shape[0], image.shape[1])
    noisy_image[probs < (prob / 2)] = black
    noisy_image[probs > 1 - (prob / 2)] = white
    return noisy_image

def add_gaussian_noise(image, mean=0, var=0.01):
    row, col, ch = image.shape
    sigma = var ** 0.5
    gaussian = np.random.normal(mean, sigma, (row, col, ch))
    noisy_image = image + gaussian * 255
    noisy_image = np.clip(noisy_image, 0, 255)
    return noisy_image.astype(np.uint8)

# https://stackoverflow.com/questions/43892506/opencv-python-rotate-image-without-cropping-sides 
def rotate_image(img, angle):
    size_reverse = np.array(img.shape[1::-1]) # swap x with y
    M = cv2.getRotationMatrix2D(tuple(size_reverse / 2.), angle, 1.)
    MM = np.absolute(M[:,:2])
    size_new = MM @ size_reverse
    M[:,-1] += (size_new - size_reverse) / 2.
    return cv2.warpAffine(img, M, tuple(size_new.astype(int)))

def apply_perspective_transform(image):
    h, w = image.shape[:2]
    pts1 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
    pts2 = np.float32([[random.randint(0, w//4), random.randint(0, h//4)], 
                       [w - random.randint(0, w//4), random.randint(0, h//4)], 
                       [random.randint(0, w//4), h - random.randint(0, h//4)], 
                       [w - random.randint(0, w//4), h - random.randint(0, h//4)]])
    
    matrix = cv2.getPerspectiveTransform(pts1, pts2)
    transformed_image = cv2.warpPerspective(image, matrix, (w, h))
    return transformed_image

In [3]:

# This function is used to deform the image via rotation or perspective transform
def move_around(image):
    noisetype = random.randint(0, 1)
    if noisetype == 0:
        noisyImg = rotate_image(image, angle=random.uniform(-45, 45))
    else:
        noisyImg = apply_perspective_transform(image)
    return noisyImg

# This function is used to add noises to the image
def add_noises(image):
    noisetype = random.randint(0, 2)
    if noisetype == 0:
        noisyImg = add_salt_and_pepper_noise(image, prob=0.05)
    elif noisetype == 1:
        noisyImg = add_gaussian_noise(image, mean = 0, var = 0.01)
    else:
        noisyImg = image * random.random()*2.0
        noisyImg = np.clip(noisyImg, 0, 255)

    more_noise = random.randint(0, 1)
    if more_noise == 0:
        return add_noises(noisyImg)
    else:
        return noisyImg

In [8]:
# Function to process images from Dataset folder and save to Segmented folder
def process_images(dataset_folder, output_folder, background_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Generate a list of background images
    background_images = []
    for root, _, filelist in os.walk(background_folder):
        for filename in filelist:
            # Check if the file is an image (you can add more formats if needed)
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                img_path = os.path.join(root, filename)
                img_path = os.path.join(background_folder, img_path)
                image = cv2.imread(img_path)
                if image is not None:
                    height, width, channels = image.shape
                    if height > 100 and width > 100:
                        background_images.append(image)

    # Initialize the card number
    cardnum = 0

    # Loop through all files in the dataset folder
    for filename in sorted(os.listdir(dataset_folder)):
        # Check if the file is an image (you can add more formats if needed)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            # Read the image
            img_path = os.path.join(dataset_folder, filename)
            image = cv2.imread(img_path)

            if image is not None:
                # Process the image to remove black background and trim
                processed_image = move_around(image)
                gray = cv2.cvtColor(processed_image, cv2.COLOR_BGR2GRAY)
                _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
                contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


                #Remove content outside the bounding box
                cv2.drawContours(thresh, contours, -1, color=(255, 255, 255), thickness=cv2.FILLED)
                processed_image = cv2.bitwise_and(processed_image, processed_image, mask=thresh)



                # Place noisy image on a random background on a random position
                background = random.choice(background_images)
                background = cv2.resize(background, (640, 640))

                rand_size = random.randint(100, 430)
                resized_img = cv2.resize(processed_image, (rand_size, int(rand_size*1.5)))

                #Find new bounding box
                gray = cv2.cvtColor(resized_img, cv2.COLOR_BGR2GRAY)
                _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
                contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                x, y, w, h = cv2.boundingRect(contours[0])
                cv2.drawContours(thresh, contours, -1, color=(255, 255, 255), thickness=cv2.FILLED)

                # cv2.rectangle(processed_image, (x, y), (x+w, y+h), (0, 255, 0), 2)

                x_offset = random.randint(0, background.shape[1] - resized_img.shape[1])
                y_offset = random.randint(0, background.shape[0] - resized_img.shape[0])
                            
                mask = thresh != 0
                background[y_offset:y_offset+resized_img.shape[0], x_offset:x_offset+resized_img.shape[1]][mask]= resized_img[mask]
                processed_image = background

                cv2.rectangle(processed_image, (x+x_offset, y+y_offset), (x+x_offset+w, y+y_offset+h), (0, 255, 0), 2)

                # Print thresh on screen
                # resized_img = cv2.bitwise_and(resized_img, resized_img, mask=thresh)
                cv2.imshow('thresh', processed_image)
                cv2.waitKey(0)
                cv2.destroyAllWindows()


                # # Process the image to remove black background and trim
                # processed_image = add_noises(processed_image)
                
                # Save the processed image to the output folder
                output_path = os.path.join(output_folder, str(cardnum)+".png")
                cv2.imwrite(output_path, processed_image)
                print(f"Processed and saved: {output_path}")
                if cardnum == 8:
                    raise Exception("Stop")

        cardnum += 1

# Main execution
if __name__ == "__main__":
    dataset_folder = "Segmented"      # Folder containing the input images
    output_folder = "Training_Dataset"     # Folder to save the processed images
    background_folder = "/media/jaume/1C8E63228E62F3A4/Users/Jaume/Documents/Youtube" # Folder containing the background images

    process_images(dataset_folder, output_folder, background_folder)



Processed and saved: Training_Dataset/0.png
Processed and saved: Training_Dataset/1.png
Processed and saved: Training_Dataset/2.png
Processed and saved: Training_Dataset/3.png
Processed and saved: Training_Dataset/4.png
Processed and saved: Training_Dataset/5.png
Processed and saved: Training_Dataset/6.png
Processed and saved: Training_Dataset/7.png
Processed and saved: Training_Dataset/8.png


Exception: Stop