In [2]:
import os

# --- CONFIGURATION ---
base_dir = "fer2013"  # Your main folder
# ---------------------

def remove_junk_files():
    print(f"Cleaning junk files from '{base_dir}'...")
    deleted_count = 0

    # Walk through every folder and subfolder
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            file_path = os.path.join(root, file)

            # Check if it is NOT an image
            if not file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                try:
                    os.remove(file_path)
                    print(f"  Deleted: {file}")
                    deleted_count += 1
                except Exception as e:
                    print(f"  Error deleting {file}: {e}")

    print(f"\nDone! Removed {deleted_count} non-image files.")

remove_junk_files()

Cleaning junk files from 'fer2013'...
  Deleted: Training_3792144.jpg:Zone.Identifier
  Deleted: Training_7984662.jpg:Zone.Identifier
  Deleted: Training_83759368.jpg:Zone.Identifier
  Deleted: Training_94314499.jpg:Zone.Identifier
  Deleted: Training_86916447.jpg:Zone.Identifier
  Deleted: Training_2821982.jpg:Zone.Identifier
  Deleted: Training_65918810.jpg:Zone.Identifier
  Deleted: Training_20601290.jpg:Zone.Identifier
  Deleted: Training_44403549.jpg:Zone.Identifier
  Deleted: Training_24027041.jpg:Zone.Identifier
  Deleted: Training_77555432.jpg:Zone.Identifier
  Deleted: Training_73386715.jpg:Zone.Identifier
  Deleted: Training_60333662.jpg:Zone.Identifier
  Deleted: Training_53706978.jpg:Zone.Identifier
  Deleted: Training_17117229.jpg:Zone.Identifier
  Deleted: Training_15485793.jpg:Zone.Identifier
  Deleted: Training_99581423.jpg:Zone.Identifier
  Deleted: Training_15216755.jpg:Zone.Identifier
  Deleted: Training_14259954.jpg:Zone.Identifier
  Deleted: Training_79375757.jpg:Z

In [3]:
import os
import shutil
import random

# --- CONFIGURATION ---
base_dir = "fer2013"  # Your main folder
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "valid")
split_ratio = 0.2  # 20% for validation
# ---------------------

def create_valid_from_train():
    # Safety check
    if os.path.exists(val_dir):
        print(f"Stop! The folder '{val_dir}' already exists.")
        return

    print(f"Creating validation set by moving {split_ratio:.0%} of training data...")
    os.makedirs(val_dir)

    # List emotions (angry, happy, etc.)
    emotions = [d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))]

    for emotion in emotions:
        src_path = os.path.join(train_dir, emotion)
        dst_path = os.path.join(val_dir, emotion)
        os.makedirs(dst_path, exist_ok=True)

        # Get all images
        images = os.listdir(src_path)
        random.shuffle(images)

        # Calculate how many to move
        num_to_move = int(len(images) * split_ratio)
        files_to_move = images[:num_to_move]

        print(f"  {emotion}: Moving {num_to_move} images to valid...")

        # Move them
        for img in files_to_move:
            shutil.move(os.path.join(src_path, img), os.path.join(dst_path, img))

    print("\nSuccess! Your 'train' folder is smaller, and 'valid' is ready.")

create_valid_from_train()

Creating validation set by moving 20% of training data...
  neutral: Moving 993 images to valid...
  happiness: Moving 1443 images to valid...
  disgust: Moving 87 images to valid...
  surprise: Moving 634 images to valid...
  anger: Moving 799 images to valid...
  fear: Moving 819 images to valid...
  sadness: Moving 966 images to valid...

Success! Your 'train' folder is smaller, and 'valid' is ready.
