# Dataset Creation
This notebook will guide you through the process of creating a dataset using the cards extracted before. The dataset will be used to train a model to detect cards in images.

Let's get started!

## Hull Tests
**Run the `test_hull.py` script to check that the bounding boxes and hulls in the bounding boxes are correct. If no cards are shown, you will need to change the TWEAK values in the `hull.py` script until `test_hull.py` returns an image with bounding boxes and hulls each time it is run (run it several times, don't stop with the first correct run).**

**Now that you have verified that the hulls work, you can run this entire Jupyter Notebook file.**

In [1]:
import os

# Get the current working directory. This is the directory from which the script is being run.
ROOT = os.getcwd()
HOME = os.path.dirname(ROOT)

# Hull cards
Now we can find the hull of each card and save it in a pickle file.

*Some images could be discarded if the hull is not found. don't panic, it's normal.*

In [2]:
import pickle
from glob import glob
from global_variables import *
from functions import *
from hull import findHull

# The directory where the card images are stored
imgs_dir = os.path.join(HOME, "dataset_creation/data/cards")

# The file where the card data will be saved
cards_pck_fn = os.path.join(HOME, "dataset_creation/data/cards.pck")

# A dictionary to store the card data
cards = {}

# A counter to keep track of the number of discarded images
counter = 0

# Loop over each suit and value to process each card
for suit in card_suits:
    for value in card_values:
        # The name of the card is the value followed by the suit
        card_name = value + suit

        # The directory where the images for this card are stored
        card_dir = os.path.join(imgs_dir, card_name)

        # If the directory does not exist, print a warning and skip to the next card
        if not os.path.isdir(card_dir):
            print(f"!!! {card_dir} does not exist !!!")
            continue

        # Initialize an empty list to store the images for this card
        cards[card_name] = []

        # Loop over each image file in the card's directory
        for f in glob(card_dir + "/*.png"):
            # Read the image file
            img = cv2.imread(f, cv2.IMREAD_UNCHANGED)

            # Find the convex hull for the top-left corner of the card
            hullHL = findHull(img, refCornerHL, debug="no")

            # If no hull was found, increment the counter and skip to the next image
            if hullHL is None:
                counter += 1
                continue

            # Find the convex hull for the bottom-right corner of the card
            hullLR = findHull(img, refCornerLR, debug="no")

            # If no hull was found, increment the counter and skip to the next image
            if hullLR is None:
                counter += 1
                continue

            # Convert the image to "rgb" format
            img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)

            # Add the image and its hulls to the list for this card
            cards[card_name].append((img, hullHL, hullLR))

        # Print the number of images used and discarded for this card
        print(f"Images used for {card_name} : {len(cards[card_name])}")
        print(f"Images discarded for {card_name} : {counter}")

        # Reset the counter for the next card
        counter = 0

# Save the card data to a file
print("Saved in :", cards_pck_fn)
pickle.dump(cards, open(cards_pck_fn, 'wb'))

# Close any openCV windows
cv2.destroyAllWindows()

Images used for 1a : 144
Images discarded for 1a : 6
Images used for 2a : 128
Images discarded for 2a : 5
Images used for 3a : 133
Images discarded for 3a : 2
Images used for 4a : 114
Images discarded for 4a : 3
Images used for 5a : 150
Images discarded for 5a : 0
Images used for 1p : 150
Images discarded for 1p : 0
Images used for 2p : 138
Images discarded for 2p : 0
Images used for 3p : 150
Images discarded for 3p : 0
Images used for 4p : 138
Images discarded for 4p : 0
Images used for 5p : 150
Images discarded for 5p : 0
Images used for 1o : 79
Images discarded for 1o : 12
Images used for 2o : 150
Images discarded for 2o : 0
Images used for 3o : 110
Images discarded for 3o : 40
Images used for 4o : 129
Images discarded for 4o : 21
Images used for 5o : 133
Images discarded for 5o : 17
Images used for 1b : 57
Images discarded for 1b : 0
Images used for 2b : 141
Images discarded for 2b : 0
Images used for 3b : 112
Images discarded for 3b : 0
Images used for 4b : 94
Images discarded for

# Load the cards
We save the contents of the cards.pck file in a cards variable

In [3]:
import pickle
from global_variables import *

# The file where the card data is stored
cards_pck_fn = os.path.join(HOME, "dataset_creation/data/cards.pck")

class Cards():
    """
    The Cards class is used to load card data from a pickle file and provide methods to interact with the data.

    Attributes:
        _cards (dict): A dictionary where keys are card names (ex:'Kc') and values are lists of (img,hullHL,hullLR).
        _nb_cards_by_value (dict): A dictionary where keys are card names and values are the number of cards of that name.
    """

    def __init__(self, cards_pck_fn=cards_pck_fn):
        """
        The constructor for the Cards class.

        Parameters:
            cards_pck_fn (str): The path to the pickle file containing the card data.
        """
        self._cards = pickle.load(open(cards_pck_fn, 'rb'))
        self._nb_cards_by_value = {k: len(self._cards[k]) for k in self._cards}
        print("Cards loaded per name :", self._nb_cards_by_value)

    def get_random(self, card_name=None, display=False):
        """
        The method to get a random card.

        Parameters:
            card_name (str): The name of the card to get. If None, a random card is chosen. Default is None.
            display (bool): Whether to display the card image. Default is False.

        Returns:
            tuple: A tuple containing the card image, card name, and the hulls of the card.
        """
        if card_name is None:
            card_name = random.choice(list(self._cards.keys()))
        card, hull1, hull2 = self._cards[card_name][random.randint(0, self._nb_cards_by_value[card_name] - 1)]
        if display:
            display_img(card, [hull1, hull2], "rgb")
        return card, card_name, hull1, hull2

# Create an instance of the Cards class
cards = Cards()

Cards loaded per name : {'1a': 144, '2a': 128, '3a': 133, '4a': 114, '5a': 150, '1p': 150, '2p': 138, '3p': 150, '4p': 138, '5p': 150, '1o': 79, '2o': 150, '3o': 110, '4o': 129, '5o': 133, '1b': 57, '2b': 141, '3b': 112, '4b': 94, '5b': 148}


# Creation of 2-cards and 3-cards scenarios
 Move on to the creation of 2-card and 3-card scenarios that will then be used to train the model.

In [None]:
import os
from tqdm import tqdm
from background_random import backgrounds
from cards_scenario import Scene

# Function to generate scenarios with two cards
def generate_scenarios(num_scenarios, save_dir):
    """
    Generate a specified number of scenarios with two cards.

    Parameters:
    num_scenarios (int): The number of scenarios to generate.
    save_dir (str): The directory where the generated scenarios should be saved.
    """
    # Loop over the number of scenarios to generate
    for _ in tqdm(range(num_scenarios)):
        # Get a random background
        bg = backgrounds.get_random()
        # Get two random cards
        img1, card_val1, hulla1, hullb1 = cards.get_random()
        img2, card_val2, hulla2, hullb2 = cards.get_random()
        # Create a new scene with the two cards
        newimg = Scene(bg, img1, card_val1, hulla1, hullb1, img2, card_val2, hulla2, hullb2)
        # Write the new scene to files
        newimg.write_files(save_dir)

# The directory where the model training data is stored
model_training_dir = os.path.join(HOME, "model_training/" + dataset_name)
# The types of scenarios to generate
scenario_types = ['train', 'val', 'test']
# The number of scenarios to generate for each type
num_scenarios = [14000, 3000, 3000]  # Number of scenarios for train, val, test respectively

# Loop over each scenario type
for scenario_type, num in zip(scenario_types, num_scenarios):
    # The directory where the scenarios of this type should be saved
    save_dir = os.path.join(model_training_dir, scenario_type, "images")
    # Create the directory if it does not exist
    os.makedirs(save_dir, exist_ok=True)
    # Generate the scenarios
    generate_scenarios(num, save_dir)
    
# Print a message indicating that the scenarios have been generated
print("Scenarios with 2 cards generated")

C:\Users\macma\PycharmProjects\yolo-card-trainer\src\dataset_creation
Nb of images loaded : 5640


  8%|▊         | 1187/14000 [02:00<18:20, 11.65it/s] 

In [None]:
import os
from tqdm import tqdm
from background_random import backgrounds
from cards_scenario import Scene

def generate_scenarios(num_scenarios, save_dir):
    """
    Generate a specified number of scenarios with three cards.

    Parameters:
    num_scenarios (int): The number of scenarios to generate.
    save_dir (str): The directory where the generated scenarios should be saved.
    """
    # Loop over the number of scenarios to generate
    for _ in tqdm(range(num_scenarios)):
        # Get a random background
        bg = backgrounds.get_random()
        # Get three random cards
        img1, card_val1, hulla1, hullb1 = cards.get_random()
        img2, card_val2, hulla2, hullb2 = cards.get_random()
        img3, card_val3, hulla3, hullb3 = cards.get_random()
        # Create a new scene with the three cards
        newimg = Scene(bg, img1, card_val1, hulla1, hullb1, img2, card_val2, hulla2, hullb2, img3, card_val3, hulla3, hullb3)
        # Write the new scene to files
        newimg.write_files(save_dir)

# The directory where the model training data is stored
model_training_dir = os.path.join(HOME, "model_training/" + dataset_name)
# The types of scenarios to generate
scenario_types = ['train', 'val', 'test']

# Loop over each scenario type
for scenario_type, num in zip(scenario_types, num_scenarios):
    # The directory where the scenarios of this type should be saved
    save_dir = os.path.join(model_training_dir, scenario_type, "images")
    # Create the directory if it does not exist
    os.makedirs(save_dir, exist_ok=True)
    # Generate the scenarios
    generate_scenarios(num, save_dir)

# Print a message indicating that the scenarios have been generated
print("Scenarios with 3 cards generated")

# Creating labels
Creating labels from the xml file of each image Now that we have the scenarios with their respective .xml we can translate them into YOLO txt files and create labels accordingly. We then remove all xml files that are no longer useful.

In [None]:
import os
import subprocess
import shutil
import glob

def create_dir_if_not_exists(directory):
    """
    Create a directory if it does not already exist.

    Parameters:
    directory (str): The path of the directory to create.
    """
    if not os.path.isdir(directory):
        os.makedirs(directory)

def run_command(command):
    """
    Run a command in the shell.

    Parameters:
    command (list): The command to run, as a list of strings.
    """
    subprocess.run(command)

# Directories where the labels will be stored
labels_dirs = [os.path.join(model_training_dir, "train/labels"), os.path.join(model_training_dir, "val/labels"),
               os.path.join(model_training_dir, "test/labels")]

# Path to the script that converts VOC annotations to YOLO format
convert_voc_yolo_dir = os.path.join(HOME, "dataset_creation/convert_voc_yolo.py")

# Directories where the images are stored
images_dirs = [os.path.join(model_training_dir, "train/images"), os.path.join(model_training_dir, "val/images"),
               os.path.join(model_training_dir, "test/images")]

# Path to the file containing the names of the cards
cards_names_dir = os.path.join(HOME, "dataset_creation/data/cards.names")

# Commands to run the conversion script for each set of images
commands = [
    ["python", convert_voc_yolo_dir, images_dirs[0], cards_names_dir],
    ["python", convert_voc_yolo_dir, images_dirs[1], cards_names_dir],
    ["python", convert_voc_yolo_dir, images_dirs[2], cards_names_dir],
]

# Run each command
for command in commands:
    run_command(command)

def move_txt_files(source_dir, destination_dir):
    """
    Move all .txt files from one directory to another.

    Parameters:
    source_dir (str): The directory to move the files from.
    destination_dir (str): The directory to move the files to.
    """
    if not os.path.isdir(destination_dir):
        os.makedirs(destination_dir)

    for file_name in os.listdir(source_dir):
        if file_name.endswith(".txt"):
            source = os.path.join(source_dir, file_name)
            destination = os.path.join(destination_dir, file_name)
            shutil.move(source, destination)

# Create the labels directories if they do not exist
for directory in labels_dirs:
    create_dir_if_not_exists(directory)

# Move the .txt files to the labels directories
for source_dir, destination_dir in zip(images_dirs, labels_dirs):
    move_txt_files(source_dir, destination_dir)

def delete_xml_files(directory):
    """
    Delete all .xml files in a directory.

    Parameters:
    directory (str): The directory to delete the files from.
    """
    # Get all XML files in the directory
    xml_files = glob.glob(os.path.join(directory, '*.xml'))

    # Delete each XML file
    for xml_file in xml_files:
        os.remove(xml_file)

# Delete the XML files from the images directories
for directory in images_dirs:
    delete_xml_files(directory)

# Your dataset is now ready! 
**You can now move on to the next step: training the model.**

*proceed to the next notebook: [Model Training.ipynb]*