In [4]:
import os
import re

base_dir = '/home/umar/Pictures/datasets/FLAME2/'

def get_id_from_path(path):
    base = os.path.basename(path)
    og_filename, extension = os.path.splitext(base)
    id = og_filename.split(" ")[-1]
    id = re.sub(r'[()]', '', id)
    return int(id)

rgb_base_dir = os.path.join(base_dir, "RGB")
ir_base_dir = os.path.join(base_dir, "Thermal")

rgb_list = [os.path.join(rgb_base_dir, i) for i in os.listdir(rgb_base_dir) if i.endswith(".jpg")]
ir_list = [os.path.join(ir_base_dir, i) for i in os.listdir(ir_base_dir) if i.endswith(".jpg")]

rgb_list.sort(key=get_id_from_path)
ir_list.sort(key=get_id_from_path)

print("renamed_rgb_list:", rgb_list[:4])
print("renamed_ir_list:", ir_list[:4])

renamed_rgb_list: []
renamed_ir_list: []


In [2]:
len(rgb_list), len(ir_list)

(53451, 53451)

In [4]:
label_dict_ = {
    "NN": (1, 13700),
    "YY": [(13701, 14699), (15981, 19802), (19900, 27183), (27515, 31294), (31510, 33597), (33930, 36550), (38031, 38153), (41642, 45279), (51207, 52286)],
    "YN": [(14700, 15980), (19803, 19899), (27184, 27514), (31295, 31509), (33598, 33929), (36551, 38030), (38154, 41642), (45280, 51206), (52287, 53451)],
}
# rewrite the label_dict so that we can easily lookup the label of a given frame based on the id
label_dict = {}
for key, value in label_dict_.items():
    if type(value) == tuple:
        for i in range(value[0], value[1]+1):
            label_dict[i] = key
    else:
        for i in value:
            for j in range(i[0], i[1]+1):
                label_dict[j] = key

# DO NOT RUN ANY OF THE BELOW CODE AGAIN IT WAS ONLY TO REORGANIZE THE DATASET FILES PREVIOUSLY

In [6]:
# lets split our path lists into lists for each class label, so that we can have different classes in different folders
# this will make it easier to train our model

# create the folders
for key in label_dict_.keys():
    os.makedirs(os.path.join(base_dir, key), exist_ok=True)

# move the files
for i in range(len(rgb_list)):
    rgb_path = rgb_list[i]
    ir_path = ir_list[i]
    id = get_id_from_path(rgb_path)
    label = label_dict[id]
    os.rename(rgb_path, os.path.join(base_dir, label, os.path.basename(rgb_path)))
    os.rename(ir_path, os.path.join(base_dir, label, os.path.basename(ir_path)))

# lets check if the files were moved correctly
for key in label_dict_.keys():
    rgb_list = [os.path.join(base_dir, key, i) for i in os.listdir(os.path.join(base_dir, key)) if i.endswith(".jpg")]
    ir_list = [os.path.join(base_dir, key, i) for i in os.listdir(os.path.join(base_dir, key)) if i.endswith(".jpg")]
    rgb_list.sort(key=get_id_from_path)
    ir_list.sort(key=get_id_from_path)
    print("renamed_rgb_list:", rgb_list[:4])
    print("renamed_ir_list:", ir_list[:4])

# Now take the thermal images out of the folders and put them in a new folder that is organized by class label the same way as the rgb images
# thermal images are identified by the basename of the path which looks like this: "254p Thermal Frame (37793).jpg", where the number in the brackets is the id
# therefore, we will need to check if a file is thermal or rgb before moving it, and only reorganize the thermal images

# create the folders
for key in label_dict_.keys():
    os.makedirs(os.path.join(base_dir, key, "thermal"), exist_ok=True)

# move the files
for key in label_dict_.keys():
    rgb_list = [os.path.join(base_dir, key, i) for i in os.listdir(os.path.join(base_dir, key)) if i.endswith(".jpg")]
    ir_list = [os.path.join(base_dir, key, i) for i in os.listdir(os.path.join(base_dir, key)) if i.endswith(".jpg")]
    rgb_list.sort(key=get_id_from_path)
    ir_list.sort(key=get_id_from_path)
    for i in range(len(rgb_list)):
        rgb_path = rgb_list[i]
        ir_path = ir_list[i]
        if get_id_from_path(rgb_path) == get_id_from_path(ir_path):
            os.rename(ir_path, os.path.join(base_dir, key, "thermal", os.path.basename(ir_path)))


renamed_rgb_list: ['/home/umar/Pictures/datasets/FLAME2/NN/254p Thermal Frame (1).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/254p RGB Frame (1).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/254p Thermal Frame (2).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/254p RGB Frame (2).jpg']
renamed_ir_list: ['/home/umar/Pictures/datasets/FLAME2/NN/254p Thermal Frame (1).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/254p RGB Frame (1).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/254p Thermal Frame (2).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/254p RGB Frame (2).jpg']
renamed_rgb_list: ['/home/umar/Pictures/datasets/FLAME2/YY/254p Thermal Frame (13701).jpg', '/home/umar/Pictures/datasets/FLAME2/YY/254p RGB Frame (13701).jpg', '/home/umar/Pictures/datasets/FLAME2/YY/254p RGB Frame (13702).jpg', '/home/umar/Pictures/datasets/FLAME2/YY/254p Thermal Frame (13702).jpg']
renamed_ir_list: ['/home/umar/Pictures/datasets/FLAME2/YY/254p Thermal Frame (13701).jpg', '/home/umar/Pictures/datasets/FLA

In [15]:
# create lists of paths to the images for each class
nn_dir = os.path.join(base_dir, "NN", "thermal")
yy_dir = os.path.join(base_dir, "YY", "thermal")
yn_dir = os.path.join(base_dir, "YN", "thermal")

print("nn_dir:", nn_dir)
print("yy_dir:", yy_dir)
print("yn_dir:", yn_dir)


nn_list = [os.path.join(nn_dir, i) for i in os.listdir(nn_dir) if i.endswith(".jpg")]
yy_list = [os.path.join(yy_dir, i) for i in os.listdir(yy_dir) if i.endswith(".jpg")]
yn_list = [os.path.join(yn_dir, i) for i in os.listdir(yn_dir) if i.endswith(".jpg")]

print("nn_list:", nn_list[:4])
print("yy_list:", yy_list[:4])
print("yn_list:", yn_list[:4])

# for each path in the above lists, first check if the image is rgb or thermal, then reorganize them so the directory structure is as follows:
# base_dir
#   RGB
#       NN
#       YY
#       YN
#   Thermal
#       NN
#       YY
#       YN

# create the folders
os.makedirs(os.path.join(base_dir, "RGB", "NN"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "RGB", "YY"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "RGB", "YN"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "Thermal", "NN"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "Thermal", "YY"), exist_ok=True)
os.makedirs(os.path.join(base_dir, "Thermal", "YN"), exist_ok=True)

# move the files
for i in range(len(nn_list)):
    nn_path = nn_list[i]
    if "Thermal" in nn_path:
        os.rename(nn_path, os.path.join(base_dir, "Thermal", "NN", os.path.basename(nn_path)))
    else:
        os.rename(nn_path, os.path.join(base_dir, "RGB", "NN", os.path.basename(nn_path)))

for i in range(len(yy_list)):
    yy_path = yy_list[i]
    if "Thermal" in yy_path:
        os.rename(yy_path, os.path.join(base_dir, "Thermal", "YY", os.path.basename(yy_path)))
    else:
        os.rename(yy_path, os.path.join(base_dir, "RGB", "YY", os.path.basename(yy_path)))

for i in range(len(yn_list)):
    yn_path = yn_list[i]
    if "Thermal" in yn_path:
        os.rename(yn_path, os.path.join(base_dir, "Thermal", "YN", os.path.basename(yn_path)))
    else:
        os.rename(yn_path, os.path.join(base_dir, "RGB", "YN", os.path.basename(yn_path)))

# Check if the files were moved correctly
nn_list = [os.path.join(base_dir, "RGB", "NN", i) for i in os.listdir(os.path.join(base_dir, "RGB", "NN")) if i.endswith(".jpg")]
yy_list = [os.path.join(base_dir, "RGB", "YY", i) for i in os.listdir(os.path.join(base_dir, "RGB", "YY")) if i.endswith(".jpg")]
yn_list = [os.path.join(base_dir, "RGB", "YN", i) for i in os.listdir(os.path.join(base_dir, "RGB", "YN")) if i.endswith(".jpg")]
print("nn_list:", nn_list[:4])
print("yy_list:", yy_list[:4])
print("yn_list:", yn_list[:4])

# check if the length of the lists is correct and adds up to the original number of images
print("len(nn_list):", len(nn_list))    
print("len(yy_list):", len(yy_list))
print("len(yn_list):", len(yn_list))
print("len(nn_list) + len(yy_list) + len(yn_list):", len(nn_list) + len(yy_list) + len(yn_list))



nn_dir: /home/umar/Pictures/datasets/FLAME2/NN/thermal
yy_dir: /home/umar/Pictures/datasets/FLAME2/YY/thermal
yn_dir: /home/umar/Pictures/datasets/FLAME2/YN/thermal
nn_list: ['/home/umar/Pictures/datasets/FLAME2/NN/thermal/254p Thermal Frame (7815).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/thermal/254p RGB Frame (7423).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/thermal/254p RGB Frame (2417).jpg', '/home/umar/Pictures/datasets/FLAME2/NN/thermal/254p RGB Frame (2306).jpg']
yy_list: ['/home/umar/Pictures/datasets/FLAME2/YY/thermal/254p RGB Frame (34931).jpg', '/home/umar/Pictures/datasets/FLAME2/YY/thermal/254p Thermal Frame (27084).jpg', '/home/umar/Pictures/datasets/FLAME2/YY/thermal/254p Thermal Frame (22825).jpg', '/home/umar/Pictures/datasets/FLAME2/YY/thermal/254p Thermal Frame (35869).jpg']
yn_list: ['/home/umar/Pictures/datasets/FLAME2/YN/thermal/254p RGB Frame (37193).jpg', '/home/umar/Pictures/datasets/FLAME2/YN/thermal/254p Thermal Frame (48470).jpg', '/home/umar/Picture

In [None]:
import os
# Currently the directory structure is as follows:
# base_dir
#   RGB
#       NN
#       YY
#       YN
#   Thermal
#       NN
#       YY
#       YN

# We want to reorganize the directory structure to be as follows:
# base_dir
#   RGB
#       train
#           NN
#           YY
#           YN
#       val
#           NN
#           YY
#           YN
#       test
#           NN
#           YY
#           YN
#   Thermal
#       train
#           NN
#           YY
#           YN
#       val
#           NN
#           YY
#           YN
#       test
#           NN
#           YY
#           YN

base_dir = '/home/umar/Pictures/datasets/FLAME2/'

# use a loop to create the folders
for i in ["RGB", "Thermal"]:
    for j in ["train", "val", "test"]:
        for k in ["NN", "YY", "YN"]:
            os.makedirs(os.path.join(base_dir, i, j, k), exist_ok=True)

# move the files
for i in ["RGB", "Thermal"]:
    for j in ["NN", "YY", "YN"]:
        for k in ["train", "val", "test"]:
            for l in os.listdir(os.path.join(base_dir, i, j)):
                os.rename(os.path.join(base_dir, i, j, l), os.path.join(base_dir, i, k, j, l))


In [9]:
# check if the files were moved correctly
nn_list = [os.path.join(base_dir, "RGB", "train", "NN", i) for i in os.listdir(os.path.join(base_dir, "RGB", "train", "NN")) if i.endswith(".jpg")]
yy_list = [os.path.join(base_dir, "RGB", "train", "YY", i) for i in os.listdir(os.path.join(base_dir, "RGB", "train", "YY")) if i.endswith(".jpg")]
yn_list = [os.path.join(base_dir, "RGB", "train", "YN", i) for i in os.listdir(os.path.join(base_dir, "RGB", "train", "YN")) if i.endswith(".jpg")]
print("nn_list:", nn_list[:4])
print("yy_list:", yy_list[:4])
print("yn_list:", yn_list[:4])

# check if the length of the lists is correct and adds up to the original number of images
print("len(nn_list):", len(nn_list))
print("len(yy_list):", len(yy_list))
print("len(yn_list):", len(yn_list))
print("len(nn_list) + len(yy_list) + len(yn_list):", len(nn_list) + len(yy_list) + len(yn_list))

nn_list: ['/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (7423).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (2417).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (2306).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (5061).jpg']
yy_list: ['/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (34931).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (17842).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (33190).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (34393).jpg']
yn_list: ['/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (37193).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (47123).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (46828).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (45918).jpg']
len(nn_list): 13700
len(yy

In [6]:
# In total there should be 53451 images in the RGB folder and 53451 images in the Thermal folder
# Calculate the number of images in the rgb and thermal folders, to see if they add up to 53451
rgb_count = 0
thermal_count = 0
i = "RGB"
for j in ["train", "val", "test"]:
    for k in ["NN", "YY", "YN"]:
        rgb_count += len(os.listdir(os.path.join(base_dir, i, j, k)))
print("rgb_count:", rgb_count)

i = "Thermal"
for j in ["train", "val", "test"]:
    for k in ["NN", "YY", "YN"]:
        thermal_count += len(os.listdir(os.path.join(base_dir, i, j, k)))
print("thermal_count:", thermal_count)

rgb_count: 53451
thermal_count: 53451


In [None]:
# Unfortunately, only the Train folder in each RGB and Thermal folder has 53451 images, the val and test folders have 0 images
# The train folder is divided into 3 subfolders, NN, YY and YN, each of which has a different number of images in it
# We need to create a function that will move images from the train folder to the val and test folders, while keeping the ratio of NN, YY and YN images the same
# We will use the same function for both the RGB and Thermal folders

import os
import random
import shutil

def move_images(source_dir, dest_dir, ratio):
    """
    This function moves images from the source_dir to the dest_dir, while keeping the ratio of NN, YY and YN images the same
    """
    # the image files are further within the 3 subfolders, NN, YY and YN of the source_dir
    # we need to get the list of images in each of the 3 subfolders
    nn_list = [os.path.join(source_dir, "NN", i) for i in os.listdir(os.path.join(source_dir, "NN")) if i.endswith(".jpg")]
    yy_list = [os.path.join(source_dir, "YY", i) for i in os.listdir(os.path.join(source_dir, "YY")) if i.endswith(".jpg")]
    yn_list = [os.path.join(source_dir, "YN", i) for i in os.listdir(os.path.join(source_dir, "YN")) if i.endswith(".jpg")]

    # calculate the number of images to move from each subfolder
    nn_count = int(len(nn_list) * ratio)
    yy_count = int(len(yy_list) * ratio)
    yn_count = int(len(yn_list) * ratio)
    
    # calculate the number of images to move to the dest_dir
    nn_move = int(nn_count * ratio)
    yy_move = int(yy_count * ratio)
    yn_move = int(yn_count * ratio)

    # move the images from the source_dir to the dest_dir
    for i in random.sample(nn_list, nn_move):
        shutil.move(i, os.path.join(dest_dir, "NN", os.path.basename(i)))
    for i in random.sample(yy_list, yy_move):
        shutil.move(i, os.path.join(dest_dir, "YY", os.path.basename(i)))
    for i in random.sample(yn_list, yn_move):
        shutil.move(i, os.path.join(dest_dir, "YN", os.path.basename(i)))

# move the images from the train folder to the val folder
for i in ["RGB", "Thermal"]:
    source_dir = os.path.join(base_dir, i, "train")
    dest_dir = os.path.join(base_dir, i, "val")
    move_images(source_dir, dest_dir, 0.15)
    
# move the images from the train folder to the test folder
for i in ["RGB", "Thermal"]:
    source_dir = os.path.join(base_dir, i, "train")
    dest_dir = os.path.join(base_dir, i, "test")
    move_images(source_dir, dest_dir, 0.15)


In [15]:
# check if the images were moved correctly
for i in ["RGB", "Thermal"]:
    for j in ["train", "val", "test"]:
        nn_list = [os.path.join(base_dir, i, j, "NN", k) for k in os.listdir(os.path.join(base_dir, i, j, "NN")) if k.endswith(".jpg")]
        yy_list = [os.path.join(base_dir, i, j, "YY", k) for k in os.listdir(os.path.join(base_dir, i, j, "YY")) if k.endswith(".jpg")]
        yn_list = [os.path.join(base_dir, i, j, "YN", k) for k in os.listdir(os.path.join(base_dir, i, j, "YN")) if k.endswith(".jpg")]
        print("nn_list:", nn_list[:4])
        print("yy_list:", yy_list[:4])
        print("yn_list:", yn_list[:4])
        print("len(nn_list):", len(nn_list))
        print("len(yy_list):", len(yy_list))
        print("len(yn_list):", len(yn_list))
        print("len(nn_list) + len(yy_list) + len(yn_list):", len(nn_list) + len(yy_list) + len(yn_list))

# check if the number of images in the train, val and test folders add up to 53451
for i in ["RGB", "Thermal"]:
    for j in ["train", "val", "test"]:
        count = 0
        for k in ["NN", "YY", "YN"]:
            count += len(os.listdir(os.path.join(base_dir, i, j, k)))
        print("count:", count)

nn_list: ['/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (7423).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (2417).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (2306).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/NN/254p RGB Frame (5061).jpg']
yy_list: ['/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (34931).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (17842).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (33190).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YY/254p RGB Frame (34393).jpg']
yn_list: ['/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (37193).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (47123).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (46828).jpg', '/home/umar/Pictures/datasets/FLAME2/RGB/train/YN/254p RGB Frame (45918).jpg']
len(nn_list): 13091
len(yy