In [1]:
import tensorflow as tf
import cv2
import os

tf.config.run_functions_eagerly(True)
tf.compat.v1.enable_eager_execution()



## Load Data and Train Test Split

In order to check whether the modelling succeeded (if overfitting didn't occur) a train test split will be made at the beginnig of the process. This way the validation set generated below will be used to simulate unseen data.

In [2]:
data_dir='../../data/raw/FaceImages/'
train_set, validation_set = tf.keras.utils.image_dataset_from_directory(
        data_dir, # images directory
        image_size=(256, 256), # images size
        seed=1, # Especifica una semilla para la aleatorización. Esto se usa para garantizar que, si necesitas dividir el conjunto de datos en entrenamiento y validación de manera aleatoria, la división sea reproducible si se usa la misma semilla.
        validation_split=0.2, # the percentage of data for validation
        subset='both', # Esto significa que se creará un conjunto de datos que contendrá tanto los datos de entrenamiento como los de validación, dividiendo según el porcentaje especificado en validation_split
    )

Found 6797 files belonging to 2 classes.
Using 5438 files for training.
Using 1359 files for validation.




# Save Data

Simply save the comepleted split separetely.

In [3]:
# Function to create class folders within the specified directory
def create_class_folders(output_dir, classes):
    for class_name in classes:
        class_path = os.path.join(output_dir, class_name)
        os.makedirs(class_path, exist_ok=True)

# Function to save images from a dataset to class-specific folders
def save_images(dataset, output_dir, classes):
    create_class_folders(output_dir, classes)

    for _, (images, labels) in enumerate(dataset):
        # Process each batch of images
        for image, label in zip(images.numpy(), labels.numpy()):
            class_name = classes[label]
            class_path = os.path.join(output_dir, class_name)
            filename = f'image_{len(os.listdir(class_path)) + 1}.png'
            filepath = os.path.join(class_path, filename)
            
            cv2.imwrite(filepath, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

In [None]:
# Batch size
batch_size = 32

# Directories to save the images
train_output_dir = '../../data/raw/TrainTest/train/'
val_output_dir = '../../data/raw/TrainTest/test/'
os.makedirs(train_output_dir, exist_ok=True)
os.makedirs(val_output_dir, exist_ok=True)

class_names = ['0', '1']

# Save images from the training dataset
save_images(train_set, train_output_dir, class_names)

# Save images from the validation dataset
save_images(validation_set, val_output_dir, class_names)

# Optionally, print the paths to the saved images directories
print(f"Train set images saved to: {train_output_dir}")
print(f"Validation set images saved to: {val_output_dir}")