### COMPILE DIR

In [15]:
import os
import shutil
from tqdm import tqdm

# Define the root directory for the original dataset
original_root_dir = r'D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia'

# Define the root directory for the new dataset
new_dataset_root_dir = r'DS_COMPILED'

# Define the leaf classes
leaf_classes = ["Apitong", "Balete", "Bayabas", "Kamagong", "Langka", "Mahogany", "Mangga", "Palo Maria"]

# Define the subfolders to look for within each leaf class directory
subfolders = ["Back", "Discard", "Front", "Tilt"]

# Create the new dataset root directory
os.makedirs(new_dataset_root_dir, exist_ok=True)

# Create subdirectories named after the leaf classes within the new dataset directory
for leaf_class in leaf_classes:
    leaf_class_dir = os.path.join(new_dataset_root_dir, leaf_class)
    os.makedirs(leaf_class_dir, exist_ok=True)

def copy_files_to_new_structure(original_root, new_root, leaf_classes, subfolders):
    for user_dir in tqdm(os.listdir(original_root), desc="Processing users"):
        user_dir_path = os.path.join(original_root, user_dir)
        if os.path.isdir(user_dir_path):
            tqdm.write(f"Processing files in {user_dir_path}")
            # Iterate through leaf classes within each user directory
            for leaf_class in leaf_classes:
                leaf_class_dir_path = os.path.join(user_dir_path, leaf_class)
                if os.path.isdir(leaf_class_dir_path):
                    for subfolder in subfolders:
                        subfolder_path = os.path.join(leaf_class_dir_path, subfolder)
                        if os.path.isdir(subfolder_path):
                            for filename in os.listdir(subfolder_path):
                                src_file = os.path.join(subfolder_path, filename)
                                dest_dir = os.path.join(new_root, leaf_class)
                                os.makedirs(dest_dir, exist_ok=True)
                                dest_file = os.path.join(dest_dir, filename)
                                shutil.copy(src_file, dest_file)
                        else:
                            tqdm.write(f"Subfolder {subfolder_path} does not exist in {leaf_class_dir_path}.")
                else:
                    tqdm.write(f"Leaf class directory {leaf_class_dir_path} does not exist in {user_dir_path}.")

# Call the function to copy files
copy_files_to_new_structure(original_root_dir, new_dataset_root_dir, leaf_classes, subfolders)

print(f"Files copied to new directory structure under {new_dataset_root_dir}")


Processing users:   0%|                                                                         | 0/19 [00:00<?, ?it/s]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\AC Narvaez - Itel RS4


Processing users:   0%|                                                                         | 0/19 [00:05<?, ?it/s]

Leaf class directory D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\AC Narvaez - Itel RS4\Mangga does not exist in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\AC Narvaez - Itel RS4.


Processing users:   5%|███▍                                                             | 1/19 [00:06<01:53,  6.31s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Angelo Castillo - Redmi 10 (2022)


Processing users:  11%|██████▊                                                          | 2/19 [00:22<03:29, 12.33s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Carlo Parducho - Infinix 05G (2023)


Processing users:  16%|██████████▎                                                      | 3/19 [00:51<05:13, 19.62s/it]

Leaf class directory D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Carlo Parducho - Infinix 05G (2023)\Palo Maria does not exist in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Carlo Parducho - Infinix 05G (2023).
Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\CBenedict Gutierrez - Huawei Nova10


Processing users:  21%|█████████████▋                                                   | 4/19 [01:12<05:04, 20.32s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Cloud Bagtas - Infinix 05G


Processing users:  26%|█████████████████                                                | 5/19 [01:25<04:07, 17.66s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Daniel Mercado - Infinix Smart 8


Processing users:  32%|████████████████████▌                                            | 6/19 [01:37<03:25, 15.82s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Dhan Mabilangan - iPhone 13


Processing users:  37%|███████████████████████▉                                         | 7/19 [01:49<02:53, 14.42s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Franco Villamor - Nubia Z50S


Processing users:  42%|███████████████████████████▎                                     | 8/19 [01:56<02:14, 12.26s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Hans Fernando - Tecno Pova Neo 2


Processing users:  47%|██████████████████████████████▊                                  | 9/19 [02:20<02:38, 15.89s/it]

Leaf class directory D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Hans Fernando - Tecno Pova Neo 2\Palo Maria does not exist in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Hans Fernando - Tecno Pova Neo 2.
Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Jester Cruz - iPhone 13 Pro


Processing users:  53%|█████████████████████████████████▋                              | 10/19 [02:37<02:26, 16.26s/it]

Leaf class directory D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Jester Cruz - iPhone 13 Pro\Palo Maria does not exist in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Jester Cruz - iPhone 13 Pro.
Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Kurt Magcawas - Redmi Note 10


Processing users:  58%|█████████████████████████████████████                           | 11/19 [03:00<02:25, 18.15s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Marvin Buquis - Redmi Note 11


Processing users:  63%|████████████████████████████████████████▍                       | 12/19 [03:20<02:11, 18.83s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Neil Ongsinco - Realme 7


Processing users:  68%|███████████████████████████████████████████▊                    | 13/19 [03:37<01:49, 18.32s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Nikka Farofaldane - iPhone 6s


Processing users:  68%|███████████████████████████████████████████▊                    | 13/19 [03:38<01:49, 18.32s/it]

Subfolder D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Nikka Farofaldane - iPhone 6s\Apitong\Discard does not exist in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Nikka Farofaldane - iPhone 6s\Apitong.


Processing users:  74%|███████████████████████████████████████████████▏                | 14/19 [04:00<01:38, 19.70s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Prince Mampusti - Huawei Y7


Processing users:  79%|██████████████████████████████████████████████████▌             | 15/19 [04:34<01:35, 23.83s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Reymer Unciano - Honor 8x


Processing users:  84%|█████████████████████████████████████████████████████▉          | 16/19 [05:05<01:18, 26.09s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\Yvonne Galicia - Samsung Galaxy A32


Processing users:  89%|█████████████████████████████████████████████████████████▎      | 17/19 [05:33<00:53, 26.77s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\z Alren Tobias - Huawei Nova 7


Processing users:  95%|████████████████████████████████████████████████████████████▋   | 18/19 [05:58<00:26, 26.05s/it]

Processing files in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\z Clark Abutal - iPhone 11 ata


Processing users:  95%|████████████████████████████████████████████████████████████▋   | 18/19 [06:09<00:26, 26.05s/it]

Leaf class directory D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\z Clark Abutal - iPhone 11 ata\Mangga does not exist in D:\SnapfoliaV2\OJT SNAPFOLIA\June 10 2024 - Snapfolia\z Clark Abutal - iPhone 11 ata.


Processing users: 100%|████████████████████████████████████████████████████████████████| 19/19 [06:11<00:00, 19.55s/it]

Files copied to new directory structure under DS_COMPILED





### GET SPECIFIC NUM OF IMAGES

In [None]:
import os
import shutil
import random
from tqdm import tqdm

# Define the root directory where the original dataset is located
original_root_dir = r'D:\SnapfoliaV2\DS_COMPILED'

# Define the root directory for the new dataset containing 100 images per class
new_dataset_root_dir = r'DS_100'

# Create the new dataset root directory if it doesn't exist
os.makedirs(new_dataset_root_dir, exist_ok=True)

# Number of images to select from each leaf class directory
num_images_per_class = 100

def select_and_copy_images(original_root, new_root, num_images):
    # List all directories (leaf classes) under the original root directory
    leaf_classes = os.listdir(original_root)
    
    # Initialize tqdm to show progress bar
    for leaf_class in tqdm(leaf_classes, desc="Copying images", unit="class"):
        class_dir = os.path.join(original_root, leaf_class)
        if os.path.isdir(class_dir):
            # Create directory for the class in the new dataset if it doesn't exist
            new_class_dir = os.path.join(new_root, leaf_class)
            os.makedirs(new_class_dir, exist_ok=True)
            
            # List all files in the leaf class directory
            files = os.listdir(class_dir)
            
            # Shuffle the list of files to randomly select images
            random.shuffle(files)
            
            # Ensure we don't exceed the number of available files
            num_files_to_copy = min(num_images, len(files))
            
            # Select the first num_images files (after shuffling)
            selected_files = files[:num_files_to_copy]
            
            # Copy selected files to the new directory
            for filename in selected_files:
                src_file = os.path.join(class_dir, filename)
                dest_file = os.path.join(new_class_dir, filename)
                shutil.copy(src_file, dest_file)

                # Print progress information
                print(f"Copied {filename} from {leaf_class}")

# Call the function to select and copy images
select_and_copy_images(original_root_dir, new_dataset_root_dir, num_images_per_class)

print(f"Selected {num_images_per_class} images from each leaf class directory under {original_root_dir} and copied them to {new_dataset_root_dir}")

### Move to Leaf Dir

In [3]:
import os
import shutil
from tqdm import tqdm

# Define the root directory for the original dataset
original_root_dir = r'DS_100'

# Define the leaf classes
leaf_classes = ["Apitong", "Balete", "Bayabas", "Kamagong", "Langka", "Mahogany", "Mangga", "Palo Maria"]

# Create the new "leaf" directory within the DS_COMPILED directory
leaf_dir = os.path.join(original_root_dir, "leaf")
os.makedirs(leaf_dir, exist_ok=True)

def copy_files_to_leaf_directory(original_root, new_leaf_dir, leaf_classes):
    for leaf_class in tqdm(leaf_classes, desc="Copying files for leaf classes"):
        leaf_class_dir_path = os.path.join(original_root, leaf_class)
        if os.path.isdir(leaf_class_dir_path):
            for filename in os.listdir(leaf_class_dir_path):
                src_file = os.path.join(leaf_class_dir_path, filename)
                dest_file = os.path.join(new_leaf_dir, filename)
                shutil.copy(src_file, dest_file)
        else:
            tqdm.write(f"Leaf class directory {leaf_class_dir_path} does not exist.")

# Call the function to copy files
copy_files_to_leaf_directory(original_root_dir, leaf_dir, leaf_classes)

print(f"Files copied to new directory structure under {leaf_dir}")

Copying files for leaf classes: 100%|██████████████████████████████████| 8/8 [00:46<00:00,  5.87s/it]

Files copied to new directory structure under DS_100\leaf



