Mount the drive folder containing all required files

In [None]:
# mount the drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# access the folder containing the files required to run the project
%cd /content/drive/My Drive/Colab environments/Risiko! DL/
# check that we are in the desired folder and that all required files are present
%ls

Import required libraries


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import shutil

## Creation of the folders that will contain the split dataset
We decided to create three folders:
- train
- validation
- test

Each folder then contains two subfolders:
- real
- synthetic

Each of the two above subfolders then contains two folders:
- images
- labels

In [None]:
# name of the folder containing the datasets (train, validation, test)
datasets_folder = "datasets"

# path from the current position to the folder containing the datasets
datasets_folder_path = os.path.join(os.getcwd(), datasets_folder)

# names of the datasets
datasets_names = ["train", "validation", "test"]

# names of the subfolders of each dataset folder
synthetic_real_names = ["synthetic", "real"]

# names of the subfolders of each of the above subfolder
images_labels_names = ["images", "labels"]

# check if the folders containing the datasets exist, otherwise create them all
if not os.path.isdir(datasets_folder):
    os.mkdir(datasets_folder)
    print(f"created folder {datasets_folder}")

    # paths for the datasets folders inside datasets 
    train_validation_test = []
    # paths for the subfolders of each dataset folder
    synthetic_real = []
    # paths for the subfolders of each subfolder
    images_labels = []

    # create each dataset folder (train, validation, test) with its subfolders (images, labels)
    for name in datasets_names:
        path = os.path.join(datasets_folder_path, name)
        train_validation_test.append(path)
        os.mkdir(path)
        print(f"created folder {path}")
        # for each created dataset folder, create its subfolders (synthetic, real)
        for sub_dir in synthetic_real_names:
            sub_path = os.path.join(path, sub_dir)
            synthetic_real.append(sub_path)
            os.mkdir(sub_path)
            print(f"created folder {sub_path}")
            # for each created subfolder, create its subfolders (images, labels)
            for sub_sub_dir in images_labels_names:
                sub_sub_path = os.path.join(sub_path, sub_sub_dir)
                images_labels.append(sub_sub_path)
                os.mkdir(sub_sub_path)
                print(f"created folder {sub_sub_path}")

## Save the names of the images
We load the names of the images in the three folders:
- synthetic_images
- synthetic_dataset
- real_images

In [None]:
# load images names in the synthetic_images folder
synthetic_images_names = []
synthetic_images_path = os.path.join(os.getcwd(), "synthetic_images", "images")

for file_name in os.listdir(synthetic_images_path):
    if file_name.endswith(".jpg"):
        synthetic_images_names.append(file_name[:-4]) # remove .jpg extension

# load images names in the synthetic_dataset folder
synthetic_dataset_names = []
synthetic_dataset_path = os.path.join(os.getcwd(), "synthetic_dataset", "images")

for file_name in os.listdir(synthetic_dataset_path):
    if file_name.endswith(".jpg"):
        synthetic_dataset_names.append(file_name[:-4]) # remove .jpg extension

# load images names in the real_images folder
real_images_names = []
real_images_path = os.path.join(os.getcwd(), "real_images", "images")

for file_name in os.listdir(real_images_path):
    if file_name.endswith(".jpg"):
        real_images_names.append(file_name[:-4]) # remove .jpg extension

## Lists split

- The list $synthetic\_dataset\_names$ is split into:
    - train 70%
    - validation 15%
    - test 15%
- The list $synthetic\_images\_names$ is split into:
    - train 70%
    - validation 15%
    - test 15%
- The list $real\_images\_names$ is split into:
    - train 70%
    - test 30%

In [None]:
# $synthetic_dataset
train_synthetic_dataset, test_synthetic_dataset = train_test_split(synthetic_dataset_names, test_size=0.3, random_state=27)
validation_synthetic_dataset, test_synthetic_dataset = train_test_split(test_synthetic_dataset, test_size=0.5, random_state=27)

# $synthetic_images
train_synthetic_images, test_synthetic_images = train_test_split(synthetic_images_names, test_size=0.3, random_state=27)
validation_synthetic_images, test_synthetic_images = train_test_split(test_synthetic_images, test_size=0.5, random_state=27)

# $real_images
train_real_images, test_real_images = train_test_split(real_images_names, test_size=0.3, random_state=27)

## Move images and labels according to the above splits

In [None]:
# $list_filenames contains a list of names of files without extension
# the extension to be added to each filename in $list_filenames can only be .txt either .jpg
# $source_dir_path is the path to the directory containing the files to be copied
# $dest_dir_path is the path to the directory where the files have to be copied
# $is_image is True if $list_filenames contains names of images with .jpg extension to be added
# $is_image is False if $list_filenames contains names of text files with .txt extension to be added
def copyFileList(list_filenames, source_dir_path, dest_dir_path, is_image=True):   
    for file_name in list_filenames:
        source_file_name = os.path.join(source_dir_path, file_name)
        dest_file_name = os.path.join(dest_dir_path, file_name)
        if is_image:
            source_file_name = source_file_name + ".jpg"
            dest_file_name = dest_file_name + ".jpg"
        else:
            source_file_name = source_file_name + ".txt"
            dest_file_name = dest_file_name + ".txt"
        shutil.copy(source_file_name, dest_file_name)

#### $synthetic\_dataset$
Fill the train\\synthetic, validation\\synthetic, test\\synthetic folders with images and labels which names are in the $synthetic\_dataset$ list.

In [None]:
# source path: synthetic_dataset folder
source_path_synthetic_dataset = os.path.join(os.getcwd(), "synthetic_dataset")
source_path_synthetic_dataset_images = os.path.join(source_path_synthetic_dataset, "images")
source_path_synthetic_dataset_labels = os.path.join(source_path_synthetic_dataset, "labels")

# TRAIN\SYNTHETIC

# destination folder inside train
dest_train_path_synthetic = os.path.join(os.getcwd(), "datasets", "train", "synthetic")

# $train_synthetic_dataset: images copy
dest_train_path_synthetic_images = os.path.join(dest_train_path_synthetic, "images")
copyFileList(train_synthetic_dataset, source_path_synthetic_dataset_images, dest_train_path_synthetic_images, is_image=True)

# $train_synthetic_dataset: labels copy
dest_train_path_synthetic_labels = os.path.join(dest_train_path_synthetic, "labels")
copyFileList(train_synthetic_dataset, source_path_synthetic_dataset_labels, dest_train_path_synthetic_labels, is_image=False)

# VALIDATION\SYNTHETIC

# destination folder inside validation
dest_validation_path_synthetic = os.path.join(os.getcwd(), "datasets", "validation", "synthetic")

# $validation_synthetic_dataset: images copy
dest_validation_path_synthetic_images = os.path.join(dest_validation_path_synthetic, "images")
copyFileList(validation_synthetic_dataset, source_path_synthetic_dataset_images, dest_validation_path_synthetic_images, is_image=True)

# $validation_synthetic_dataset: labels copy
dest_validation_path_synthetic_labels = os.path.join(dest_validation_path_synthetic, "labels")
copyFileList(validation_synthetic_dataset, source_path_synthetic_dataset_labels, dest_validation_path_synthetic_labels, is_image=False)

# TEST\SYNTHETIC

# destination folder inside test
dest_test_path_synthetic = os.path.join(os.getcwd(), "datasets", "test", "synthetic")

# $test_synthetic_dataset: images copy
dest_test_path_synthetic_images = os.path.join(dest_test_path_synthetic, "images")
copyFileList(test_synthetic_dataset, source_path_synthetic_dataset_images, dest_test_path_synthetic_images, is_image=True)

# $test_synthetic_dataset: labels copy
dest_test_path_synthetic_labels = os.path.join(dest_test_path_synthetic, "labels")
copyFileList(test_synthetic_dataset, source_path_synthetic_dataset_labels, dest_test_path_synthetic_labels, is_image=False)

#### $synthetic\_images$
Fill the train\\synthetic, validation\\synthetic, test\\synthetic folders with images and labels which names are in the $synthetic\_images$ list.

In [None]:
# source path: synthetic_images folder
source_path_synthetic_images = os.path.join(os.getcwd(), "synthetic_images")
source_path_synthetic_images_images = os.path.join(source_path_synthetic_images, "images")
source_path_synthetic_images_labels = os.path.join(source_path_synthetic_images, "labels")

# TRAIN\SYNTHETIC

# destination folder inside train
dest_train_path_synthetic = os.path.join(os.getcwd(), "datasets", "train", "synthetic")

# $train_synthetic_images: images copy
dest_train_path_synthetic_images = os.path.join(dest_train_path_synthetic, "images")
copyFileList(train_synthetic_images, source_path_synthetic_images_images, dest_train_path_synthetic_images, is_image=True)

# $train_synthetic_images: labels copy
dest_train_path_synthetic_labels = os.path.join(dest_train_path_synthetic, "labels")
copyFileList(train_synthetic_images, source_path_synthetic_images_labels, dest_train_path_synthetic_labels, is_image=False)

# VALIDATION\SYNTHETIC

# destination folder inside validation
dest_validation_path_synthetic = os.path.join(os.getcwd(), "datasets", "validation", "synthetic")

# $validation_synthetic_images: images copy
dest_validation_path_synthetic_images = os.path.join(dest_validation_path_synthetic, "images")
copyFileList(validation_synthetic_dataset, source_path_synthetic_dataset_images, dest_validation_path_synthetic_images, is_image=True)

# $validation_synthetic_images: labels copy
dest_validation_path_synthetic_labels = os.path.join(dest_validation_path_synthetic, "labels")
copyFileList(validation_synthetic_images, source_path_synthetic_images_labels, dest_validation_path_synthetic_labels, is_image=False)

# TEST\SYNTHETIC

# destination folder inside test
dest_test_path_synthetic = os.path.join(os.getcwd(), "datasets", "test", "synthetic")

# $test_synthetic_images: images copy
dest_test_path_synthetic_images = os.path.join(dest_test_path_synthetic, "images")
copyFileList(test_synthetic_images, source_path_synthetic_images_images, dest_test_path_synthetic_images, is_image=True)

# $test_synthetic_images: labels copy
dest_test_path_synthetic_labels = os.path.join(dest_test_path_synthetic, "labels")
copyFileList(test_synthetic_images, source_path_synthetic_images_labels, dest_test_path_synthetic_labels, is_image=False)

#### $synthetic\_images$
Fill the train\\real and test\\real folders with images and labels which names are in the $real\_images$ list.

In [None]:
# source path: real_images folder
source_path_real_images = os.path.join(os.getcwd(), "real_images")
source_path_real_images_images = os.path.join(source_path_real_images, "images")
source_path_real_images_labels = os.path.join(source_path_real_images, "labels")

# TRAIN\REAL

# destination folder inside train
dest_train_path_real = os.path.join(os.getcwd(), "datasets", "train", "real")

# $train_real_images: images copy
dest_train_path_real_images = os.path.join(dest_train_path_real, "images")
copyFileList(train_real_images, source_path_real_images_images, dest_train_path_real_images, is_image=True)

# $train_real_images: labels copy
dest_train_path_real_labels = os.path.join(dest_train_path_real, "labels")
copyFileList(train_real_images, source_path_real_images_labels, dest_train_path_real_labels, is_image=False)

# TEST\REAL

# destination folder inside test
dest_test_path_real = os.path.join(os.getcwd(), "datasets", "test", "real")

# $test_real_images: images copy
dest_test_path_real_images = os.path.join(dest_test_path_real, "images")
copyFileList(test_real_images, source_path_real_images_images, dest_test_path_real_images, is_image=True)

# $test_real_images: labels copy
dest_test_path_real_labels = os.path.join(dest_test_path_real, "labels")
copyFileList(test_real_images, source_path_real_images_labels, dest_test_path_real_labels, is_image=False)