## Preparing the Training Data

In [None]:
import os
import random
import shutil

# Set your dataset directory
base_dir = "dataset/train"

# The target number of files per class
target_file_count = 1000

# For each class directory in the train dataset
for class_dir in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_dir)

    # Check if it's a directory
    if os.path.isdir(class_path):
        # Get all file names
        files = os.listdir(class_path)

        # Shuffle the list in place and truncate files to the target count
        random.shuffle(files)
        files_to_keep = files[:target_file_count]

        # Move the files to keep to a temporary directory
        temp_dir = os.path.join(base_dir, "temp_" + class_dir)
        os.makedirs(temp_dir, exist_ok=True)

        # Move files to keep into the temporary directory
        for f in files_to_keep:
            shutil.move(os.path.join(class_path, f), os.path.join(temp_dir, f))

        # Remove the original directory
        shutil.rmtree(class_path)

        # Rename the temporary directory back to the original directory name
        os.rename(temp_dir, class_path)

print("Truncation complete. Each class now has a maximum of 1000 files.")

## Preparing the Validation Data


In [None]:
import os
import shutil
import numpy as np

# Set the base directory where train and test folders are located
base_dir = "dataset"

# Class labels
class_labels = [
    "happy",
    "sad",
    "angry",
    "surprise",
]  # Add or modify according to your dataset

# Desired splits: 70% for training, 15% for validation, 15% for testing
train_split = 0.70
validation_split = 0.15

# Create validation directory
validation_dir = os.path.join(base_dir, "validation")
os.makedirs(validation_dir, exist_ok=True)

for label in class_labels:
    # Create subdirectories for each class label in validation directory
    os.makedirs(os.path.join(validation_dir, label), exist_ok=True)

    # Source directory (train)
    source_dir = os.path.join(base_dir, "train", label)

    # Get all file names in the source directory
    files = os.listdir(source_dir)

    # Calculate the number of files for validation
    validation_count = int(len(files) * validation_split)

    # Randomly pick files for validation
    validation_files = np.random.choice(files, validation_count, replace=False)

    # Move files to the validation directory
    for file_name in validation_files:
        shutil.move(
            os.path.join(source_dir, file_name),
            os.path.join(validation_dir, label, file_name),
        )

print("Files successfully moved to the validation folder.")

## Preparing the Test Data

In [None]:
import os
import random

# Set your dataset directory for the test set
test_dir = "dataset/test"

# The target number of files per class for the test set
target_test_file_count = 250

# For each class directory in the test dataset
for class_dir in os.listdir(test_dir):
    class_path = os.path.join(test_dir, class_dir)

    # Check if it's a directory
    if os.path.isdir(class_path):
        # Get all file names
        files = os.listdir(class_path)

        # If there are more than 250 files, we'll truncate the list
        if len(files) > target_test_file_count:
            # Randomly shuffle the list
            random.shuffle(files)

            # Select the files to remove
            files_to_remove = files[target_test_file_count:]

            # Remove the excess files
            for f in files_to_remove:
                os.remove(os.path.join(class_path, f))

print("Test dataset truncation complete.")