In [None]:
import os
import shutil

In [14]:
# Defining paths
image_base_path = "../data/images/"
categories = ["GoogleEmoji", "JoyPixelsEmoji", "OpenMojiEmoji", "TwitterEmoji"]

# Checking total files in each path
for category in categories:
    folder_path = os.path.join(image_base_path, category)
    if os.path.exists(folder_path):
        files = os.listdir(folder_path)
        print(f"Category: {category} - Total files: {len(files)}")
    else:
        print(f"Folder not found: {folder_path}")

Category: GoogleEmoji - Total files: 3584
Category: JoyPixelsEmoji - Total files: 3820
Category: OpenMojiEmoji - Total files: 4284
Category: TwitterEmoji - Total files: 873


In [15]:
# Define processed image path
processed_image_path = "../data/processed_images/"

# Ensure the processed images folder exists
os.makedirs(processed_image_path, exist_ok=True)

def copy_and_rename_images(folder_path, category):
    """Copy images to a new folder with modified names."""
    category_processed_path = os.path.join(processed_image_path, category)
    os.makedirs(category_processed_path, exist_ok=True)  # Create subfolder for each category
    
    for filename in os.listdir(folder_path):
        # Remove "emoji_u" prefix if it exists
        if filename.startswith("emoji_u"):
            new_name = filename.replace("emoji_u", "", 1)  # Remove "emoji_u" prefix
        else:
            new_name = filename  # Keep as is if no "emoji_u"
        
        # Replace all "_" with "-"
        new_name = new_name.replace("_", "-").lower()
        
        # Remove '-fe0f' to standardize all button/text emoji hex codes
        new_name = new_name.replace("-fe0f", "")
        
        # Remove '-200d' to standardize all skin-tone emoji hex codes
        new_name = new_name.replace("-200d", "")
        
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(category_processed_path, new_name)

        # Copy the file instead of renaming
        shutil.copy(old_path, new_path)
        print(f"Copied: {filename} -> {new_name}")

# Process each category folder
for category in categories:
    folder_path = os.path.join(image_base_path, category)
    if os.path.exists(folder_path):
        print(f"Processing {category}...")
        copy_and_rename_images(folder_path, category)
    else:
        print(f"Folder not found: {folder_path}")

Processing GoogleEmoji...
Copied: emoji_u1f3c7_1f3fd.png -> 1f3c7-1f3fd.png
Copied: emoji_u261d_1f3fe.png -> 261d-1f3fe.png
Copied: emoji_u1f469_1f3fd_200d_2764_200d_1f48b_200d_1f468_1f3fd.png -> 1f469-1f3fd-2764-1f48b-1f468-1f3fd.png
Copied: emoji_u1f575_1f3fd_200d_2640.png -> 1f575-1f3fd-2640.png
Copied: emoji_u1f9ce_1f3ff_200d_2640.png -> 1f9ce-1f3ff-2640.png
Copied: emoji_u1f468_200d_1f692.png -> 1f468-1f692.png
Copied: emoji_u1f239.png -> 1f239.png
Copied: emoji_u1f468_1f3fd_200d_2764_200d_1f48b_200d_1f468_1f3fc.png -> 1f468-1f3fd-2764-1f48b-1f468-1f3fc.png
Copied: emoji_u1f977_1f3fd.png -> 1f977-1f3fd.png
Copied: emoji_u1f469_1f3fc_200d_1f9b3.png -> 1f469-1f3fc-1f9b3.png
Copied: emoji_u1f57a.png -> 1f57a.png
Copied: emoji_u1f647_200d_2640.png -> 1f647-2640.png
Copied: emoji_u1f9d1_1f3ff_200d_1f9b3.png -> 1f9d1-1f3ff-1f9b3.png
Copied: emoji_u1f563.png -> 1f563.png
Copied: emoji_u1f91c_1f3fb.png -> 1f91c-1f3fb.png
Copied: emoji_u1f9cd_1f3fb_200d_2640.png -> 1f9cd-1f3fb-2640.png
Cop