Mount the drive folder containing all required files

In [1]:
# mount the drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# access the folder containing the files required to run the project
%cd /content/drive/My Drive/Colab environments/Risiko! DL/
# check that we are in the desired folder and that all required files are present
%ls

Mounted at /content/drive
/content/drive/My Drive/Colab environments/Risiko! DL
 [0m[01;34m3D_models[0m/
 [01;34mbackgrounds[0m/
 coco_risiko.yaml
 Complete_tanks_flags_detection.ipynb
 custom_complete_yolo.yaml
 custom_yolo.yaml
 [01;34mpre_trained_weights[0m/
 [01;34mreal_images[0m/
'Risiko!_Synthetic_Dataset_Creator.ipynb'
'Risiko! Test.ipynb'
 [01;34mruns[0m/
 Split_train_test_val.ipynb
 [01;34msynthetic_dataset[0m/
 [01;34msynthetic_images[0m/
 Tanks_flags_detection_training.ipynb
 Test_detection.ipynb
 test_example.txt
 test.txt


Import required libraries


In [2]:
import os
import cv2
import numpy as np
import random
import shutil

## Creation of the folders that will contain the split dataset
We decided to create three folders:
- train
- validation
- test

Each folder then contains two subfolders:
- real
- synthetic

Each of the two above subfolders then contains two folders:
- images
- labels

In [3]:
# name of the folder containing the datasets (train, validation, test)
datasets_folder = "datasets"

# path from the current position to the folder containing the datasets
datasets_folder_path = os.path.join(os.getcwd(), datasets_folder)

# names of the datasets
datasets_names = ["train", "validation", "test"]

# names of the subfolders of each dataset folder
synthetic_real_names = ["synthetic", "real"]

# names of the subfolders of each of the above subfolder
images_labels_names = ["images", "labels"]

# check if the folders containing the datasets exist, otherwise create them all
if not os.path.isdir(datasets_folder):
    os.mkdir(datasets_folder)
    print(f"created folder {datasets_folder}")

    # paths for the datasets folders inside datasets
    train_validation_test = []
    # paths for the subfolders of each dataset folder
    synthetic_real = []
    # paths for the subfolders of each subfolder
    images_labels = []

    # create each dataset folder (train, validation, test) with its subfolders (images, labels)
    for name in datasets_names:
        path = os.path.join(datasets_folder_path, name)
        train_validation_test.append(path)
        os.mkdir(path)
        print(f"created folder {path}")
        # for each created dataset folder, create its subfolders (synthetic, real)
        for sub_dir in synthetic_real_names:
            sub_path = os.path.join(path, sub_dir)
            synthetic_real.append(sub_path)
            os.mkdir(sub_path)
            print(f"created folder {sub_path}")
            # for each created subfolder, create its subfolders (images, labels)
            for sub_sub_dir in images_labels_names:
                sub_sub_path = os.path.join(sub_path, sub_sub_dir)
                images_labels.append(sub_sub_path)
                os.mkdir(sub_sub_path)
                print(f"created folder {sub_sub_path}")

created folder datasets
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train/synthetic
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train/synthetic/images
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train/synthetic/labels
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train/real
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train/real/images
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/train/real/labels
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/validation
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/validation/synthetic
created folder /content/drive/My Drive/Colab environments/Risiko! DL/datasets/validation/synthetic/images
created folder /conte

## Save the names of the images
We load the names of the images in the three folders:
- synthetic_images
- synthetic_dataset
- real_images

In [4]:
# load images names in the synthetic_images folder
synthetic_images_names = []
synthetic_images_path = os.path.join(os.getcwd(), "synthetic_images", "images")

for file_name in os.listdir(synthetic_images_path):
    if file_name.endswith(".jpg"):
        synthetic_images_names.append(file_name[:-4]) # remove .jpg extension

# load images names in the synthetic_dataset folder
synthetic_dataset_names = []
synthetic_dataset_path = os.path.join(os.getcwd(), "synthetic_dataset", "images")

for file_name in os.listdir(synthetic_dataset_path):
    if file_name.endswith(".jpg"):
        synthetic_dataset_names.append(file_name[:-4]) # remove .jpg extension

# load images names in the real_images folder
real_images_names = []
real_images_path = os.path.join(os.getcwd(), "real_images", "images")

for file_name in os.listdir(real_images_path):
    if file_name.endswith(".jpg"):
        real_images_names.append(file_name[:-4]) # remove .jpg extension

## Clean dataset: remove unlabeled images
If there are images in our datasets that are not labeled, we remove them.

In [5]:
# The function checks if each image in $images_path_dir has a corresponding label in $labels_path_dir.
# If this is not the case, the given image is removed from $images_path_dir
# $names_list contains the names of the images (that are equal to the names of the labels) without extension
# $images_path_dir is the path to the directory containing the images
# $labels_path_dir is the path to the directory containing the labeles of the images
# return $True if some image has been removed, $False otherwise
def removeUnlabeledImages(names_list, images_path_dir, labels_path_dir):
    # image removed flag
    removed = False
    # check each name
    for name in names_list:
        # name of the label
        label_name = name + ".txt"
        # path to the label
        label_path = os.path.join(labels_path_dir, label_name)
        # check if the corresponding label exists
        if not os.path.exists(label_path):
            # name of the image
            image_name = name + ".jpg"
            # path to the corresponding image
            image_path = os.path.join(images_path_dir, image_name)
            # remove the image
            os.remove(image_path)
            removed = True

    return removed


In [6]:
# $synthetic_dataset_names
dataset_folder = "synthetic_dataset"
images_path_dir = os.path.join(os.getcwd(), dataset_folder, "images")
labels_path_dir = os.path.join(os.getcwd(), dataset_folder, "labels")
removeUnlabeledImages(synthetic_dataset_names, images_path_dir, labels_path_dir)

# $synthetic_images_names
dataset_folder = "synthetic_images"
images_path_dir = os.path.join(os.getcwd(), dataset_folder, "images")
labels_path_dir = os.path.join(os.getcwd(), dataset_folder, "labels")
removeUnlabeledImages(synthetic_images_names, images_path_dir, labels_path_dir)

# $real_images_names
dataset_folder = "real_images"
images_path_dir = os.path.join(os.getcwd(), dataset_folder, "images")
labels_path_dir = os.path.join(os.getcwd(), dataset_folder, "labels")
removeUnlabeledImages(real_images_names, images_path_dir, labels_path_dir)

False

## Lists split

- The list $synthetic\_dataset\_names$ is split into:
    - train 70%
    - validation 15%
    - test 15%
- The list $synthetic\_images\_names$ is split into:
    - train 70%
    - validation 15%
    - test 15%
- The list $real\_images\_names$ is split into:
    - train 70%
    - test 30%

In [7]:
# Split the list $lst into two lists:
# - l_1 has $fraction * (# elements in $lst) elements of $lst;
# - l_2 has $(1 - fraction) * (# elements in $lst) elements of $lst.
def split_list(lst, fraction=0.5):

    # shuffle the elements of $lst
    random.shuffle(lst)

    # index in $lst of the first element of $l_2
    index = int(fraction * len(lst))

    # first list
    l_1 = lst[:index]
    # second list
    l_2 = lst[index:]

    return l_1, l_2

In [8]:
# $synthetic_dataset
train_synthetic_dataset, test_synthetic_dataset = split_list(synthetic_dataset_names, fraction=0.7)
validation_synthetic_dataset, test_synthetic_dataset = split_list(test_synthetic_dataset, fraction=0.5)

# $synthetic_images
train_synthetic_images, test_synthetic_images = split_list(synthetic_images_names, fraction=0.7)
validation_synthetic_images, test_synthetic_images = split_list(test_synthetic_images, fraction=0.5)

# $real_images
train_real_images, test_real_images = split_list(real_images_names, fraction=0.7)

## Move images and labels according to the above splits

In [9]:
# $list_filenames contains a list of names of files without extension
# the extension to be added to each filename in $list_filenames can only be .txt either .jpg
# $source_dir_path is the path to the directory containing the files to be copied
# $dest_dir_path is the path to the directory where the files have to be copied
# $is_image is True if $list_filenames contains names of images with .jpg extension to be added
# $is_image is False if $list_filenames contains names of text files with .txt extension to be added
# $suff is the suffix to be added in the end of the name of the destination file
def copyFileList(list_filenames, source_dir_path, dest_dir_path, is_image=True, suff=""):
    for file_name in list_filenames:
        source_file_name = os.path.join(source_dir_path, file_name)
        dest_file_name = os.path.join(dest_dir_path, file_name)
        if is_image:
            source_file_name = source_file_name + ".jpg"
            dest_file_name = dest_file_name + suff + ".jpg"
        else:
            source_file_name = source_file_name + ".txt"
            dest_file_name = dest_file_name + suff + ".txt"
        shutil.copy(source_file_name, dest_file_name)

#### $synthetic\_dataset$
Fill the train\\synthetic, validation\\synthetic, test\\synthetic folders with images and labels which names are in the $synthetic\_dataset$ list.

In [10]:
from posix import truncate
# source path: synthetic_dataset folder
source_path_synthetic_dataset = os.path.join(os.getcwd(), "synthetic_dataset")
source_path_synthetic_dataset_images = os.path.join(source_path_synthetic_dataset, "images")
source_path_synthetic_dataset_labels = os.path.join(source_path_synthetic_dataset, "labels")

# TRAIN\SYNTHETIC

# destination folder inside train
dest_train_path_synthetic = os.path.join(os.getcwd(), "datasets", "train", "synthetic")

# $train_synthetic_dataset: images copy
dest_train_path_synthetic_images = os.path.join(dest_train_path_synthetic, "images")
copyFileList(train_synthetic_dataset, source_path_synthetic_dataset_images, dest_train_path_synthetic_images, is_image=True)

# $train_synthetic_dataset: labels copy
dest_train_path_synthetic_labels = os.path.join(dest_train_path_synthetic, "labels")
copyFileList(train_synthetic_dataset, source_path_synthetic_dataset_labels, dest_train_path_synthetic_labels, is_image=False)

# VALIDATION\SYNTHETIC

# destination folder inside validation
dest_validation_path_synthetic = os.path.join(os.getcwd(), "datasets", "validation", "synthetic")

# $validation_synthetic_dataset: images copy
dest_validation_path_synthetic_images = os.path.join(dest_validation_path_synthetic, "images")
copyFileList(validation_synthetic_dataset, source_path_synthetic_dataset_images, dest_validation_path_synthetic_images, is_image=True)

# $validation_synthetic_dataset: labels copy
dest_validation_path_synthetic_labels = os.path.join(dest_validation_path_synthetic, "labels")
copyFileList(validation_synthetic_dataset, source_path_synthetic_dataset_labels, dest_validation_path_synthetic_labels, is_image=False)

# TEST\SYNTHETIC

# destination folder inside test
dest_test_path_synthetic = os.path.join(os.getcwd(), "datasets", "test", "synthetic")

# $test_synthetic_dataset: images copy
dest_test_path_synthetic_images = os.path.join(dest_test_path_synthetic, "images")
copyFileList(test_synthetic_dataset, source_path_synthetic_dataset_images, dest_test_path_synthetic_images, is_image=True)

# $test_synthetic_dataset: labels copy
dest_test_path_synthetic_labels = os.path.join(dest_test_path_synthetic, "labels")
copyFileList(test_synthetic_dataset, source_path_synthetic_dataset_labels, dest_test_path_synthetic_labels, is_image=False)

#### $synthetic\_images$
Fill the train\\synthetic, validation\\synthetic, test\\synthetic folders with images and labels which names are in the $synthetic\_images$ list.

In [11]:
# source path: synthetic_images folder
source_path_synthetic_images = os.path.join(os.getcwd(), "synthetic_images")
source_path_synthetic_images_images = os.path.join(source_path_synthetic_images, "images")
source_path_synthetic_images_labels = os.path.join(source_path_synthetic_images, "labels")

# TRAIN\SYNTHETIC

# destination folder inside train
dest_train_path_synthetic = os.path.join(os.getcwd(), "datasets", "train", "synthetic")

# $train_synthetic_images: images copy
dest_train_path_synthetic_images = os.path.join(dest_train_path_synthetic, "images")
copyFileList(train_synthetic_images, source_path_synthetic_images_images, dest_train_path_synthetic_images, is_image=True,
             suff="_1") # we add a suffix in order not to overwrite images from synthetic dataset

# $train_synthetic_images: labels copy
dest_train_path_synthetic_labels = os.path.join(dest_train_path_synthetic, "labels")
copyFileList(train_synthetic_images, source_path_synthetic_images_labels, dest_train_path_synthetic_labels, is_image=False,
             suff="_1") # we add a suffix in order not to overwrite images from synthetic dataset

# VALIDATION\SYNTHETIC

# destination folder inside validation
dest_validation_path_synthetic = os.path.join(os.getcwd(), "datasets", "validation", "synthetic")

# $validation_synthetic_images: images copy
dest_validation_path_synthetic_images = os.path.join(dest_validation_path_synthetic, "images")
copyFileList(validation_synthetic_images, source_path_synthetic_images_images, dest_validation_path_synthetic_images, is_image=True,
             suff="_1") # we add a suffix in order not to overwrite images from synthetic dataset

# $validation_synthetic_images: labels copy
dest_validation_path_synthetic_labels = os.path.join(dest_validation_path_synthetic, "labels")
copyFileList(validation_synthetic_images, source_path_synthetic_images_labels, dest_validation_path_synthetic_labels, is_image=False,
             suff="_1") # we add a suffix in order not to overwrite images from synthetic dataset

# TEST\SYNTHETIC

# destination folder inside test
dest_test_path_synthetic = os.path.join(os.getcwd(), "datasets", "test", "synthetic")

# $test_synthetic_images: images copy
dest_test_path_synthetic_images = os.path.join(dest_test_path_synthetic, "images")
copyFileList(test_synthetic_images, source_path_synthetic_images_images, dest_test_path_synthetic_images, is_image=True,
             suff="_1") # we add a suffix in order not to overwrite images from synthetic dataset

# $test_synthetic_images: labels copy
dest_test_path_synthetic_labels = os.path.join(dest_test_path_synthetic, "labels")
copyFileList(test_synthetic_images, source_path_synthetic_images_labels, dest_test_path_synthetic_labels, is_image=False,
             suff="_1") # we add a suffix in order not to overwrite images from synthetic dataset

#### $real\_images$
Fill the train\\real and test\\real folders with images and labels which names are in the $real\_images$ list.

In [12]:
# source path: real_images folder
source_path_real_images = os.path.join(os.getcwd(), "real_images")
source_path_real_images_images = os.path.join(source_path_real_images, "images")
source_path_real_images_labels = os.path.join(source_path_real_images, "labels")

# TRAIN\REAL

# destination folder inside train
dest_train_path_real = os.path.join(os.getcwd(), "datasets", "train", "real")

# $train_real_images: images copy
dest_train_path_real_images = os.path.join(dest_train_path_real, "images")
copyFileList(train_real_images, source_path_real_images_images, dest_train_path_real_images, is_image=True)

# $train_real_images: labels copy
dest_train_path_real_labels = os.path.join(dest_train_path_real, "labels")
copyFileList(train_real_images, source_path_real_images_labels, dest_train_path_real_labels, is_image=False)

# TEST\REAL

# destination folder inside test
dest_test_path_real = os.path.join(os.getcwd(), "datasets", "test", "real")

# $test_real_images: images copy
dest_test_path_real_images = os.path.join(dest_test_path_real, "images")
copyFileList(test_real_images, source_path_real_images_images, dest_test_path_real_images, is_image=True)

# $test_real_images: labels copy
dest_test_path_real_labels = os.path.join(dest_test_path_real, "labels")
copyFileList(test_real_images, source_path_real_images_labels, dest_test_path_real_labels, is_image=False)