Mount the drive folder containing all required files

In [1]:
# mount the drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# access the folder containing the files required to run the project
%cd /content/drive/My Drive/Colab environments/Risiko! DL/
# check that we are in the desired folder and that all required files are present
%ls

Mounted at /content/drive
/content/drive/My Drive/Colab environments/Risiko! DL
 [0m[01;34m3D_models[0m/            'Risiko!_Synthetic_Dataset_Creator.ipynb'
 [01;34mbackgrounds[0m/          'Risiko! Test.ipynb'
 coco_risiko.yaml       Split_train_test_val.ipynb
 [01;34mdatasets[0m/              [01;34msynthetic_dataset[0m/
 [01;34mpre_trained_weights[0m/   [01;34msynthetic_images[0m/
 [01;34mreal_images[0m/           test_example.txt


Import required libraries


In [2]:
import os
import cv2
from sklearn.model_selection import train_test_split

## Creation of the folders that will contain the split dataset
We decided to create three folders:
- train
- validation
- test

Each folder then contains two subfolders:
- real
- synthetic

Each of the two above subfolders then contains two folders:
- images
- labels

In [3]:
# name of the folder containing the datasets (train, validation, test)
datasets_folder = "datasets"

# path from the current position to the folder containing the datasets
datasets_folder_path = os.path.join(os.getcwd(), datasets_folder)

# names of the datasets
datasets_names = ["train", "validation", "test"]

# names of the subfolders of each dataset folder
synthetic_real_names = ["synthetic", "real"]

# names of the subfolders of each of the above subfolder
images_labels_names = ["images", "labels"]

# check if the folders containing the datasets exist, otherwise create them all
if not os.path.isdir(datasets_folder):
    os.mkdir(datasets_folder)
    print(f"created folder {datasets_folder}")

    # paths for the datasets folders inside datasets 
    train_validation_test = []
    # paths for the subfolders of each dataset folder
    synthetic_real = []
    # paths for the subfolders of each subfolder
    images_labels = []

    # create each dataset folder (train, validation, test) with its subfolders (images, labels)
    for name in datasets_names:
        path = os.path.join(datasets_folder_path, name)
        train_validation_test.append(path)
        os.mkdir(path)
        print(f"created folder {path}")
        # for each created dataset folder, create its subfolders (synthetic, real)
        for sub_dir in synthetic_real_names:
            sub_path = os.path.join(path, sub_dir)
            synthetic_real.append(sub_path)
            os.mkdir(sub_path)
            print(f"created folder {sub_path}")
            # for each created subfolder, create its subfolders (images, labels)
            for sub_sub_dir in images_labels_names:
                sub_sub_path = os.path.join(sub_path, sub_sub_dir)
                images_labels.append(sub_sub_path)
                os.mkdir(sub_sub_path)
                print(f"created folder {sub_sub_path}")

## Load the images
We load the images from the three folders:
- synthetic_images
- synthetic_dataset
- real_images

In [4]:
# load images in the synthetic_images folder
synthetic_images = []

print(os.getcwd())

synthetic_images_path = os.path.join(os.getcwd(), "synthetic_images", "images")

for file_name in os.listdir(synthetic_images_path):
    if file_name.endswith(".jpg"):
        synthetic_images.append(cv2.imread(os.path.join(synthetic_images_path, file_name)))

# load images in the synthetic_dataset folder
synthetic_dataset = []
synthetic_dataset_path = os.path.join(os.getcwd(), "synthetic_dataset", "images")

for file_name in os.listdir(synthetic_dataset_path):
    if file_name.endswith(".jpg"):
        synthetic_dataset.append(cv2.imread(os.path.join(synthetic_dataset_path, file_name)))

# load images in the real_images folder
real_images = []
real_images_path = os.path.join(os.getcwd(), "real_images", "images")

for file_name in os.listdir(real_images_path):
    if file_name.endswith(".jpg"):
        real_images.append(cv2.imread(os.path.join(real_images_path, file_name)))

/content/drive/My Drive/Colab environments/Risiko! DL


## Datasets split into the above folders
- Images and labels from $synthetic\_dataset$ are split into:
    - train 70%
    - validation 15%
    - test 15%
- Images and labels from $synthetic\_images$ are split into:
    - train 70%
    - validation 15%
    - test 15%
- Images and labels from $real\_images$ are split into:
    - train 70%
    - test 30%