In [2]:
import os
from pathlib import Path
import shutil
from google.colab import drive

def mount_drive():
    """Mount Google Drive"""
    drive.mount('/content/drive', force_remount=True)
    print("Drive mounted successfully")

def get_new_filename(category, filename):
    """
    Generate new filename based on category while preserving unique identifiers
    """
    base, ext = os.path.splitext(filename)
    try:
        # Find the UUID part (assuming it's the last part after a hyphen)
        uuid_part = base.split('-')[-1]
        new_name = f"{category}_musa-acuminata-overripe-{uuid_part}{ext}"
        return new_name
    except:
        return f"{category}_musa-acuminata-overripe-{base}{ext}"

def sort_bananas(source_dir, output_dir):
    """
    Sort and rename banana images based on their categories
    """
    # List of all categories
    categories = [
        'VeryRipe', 'Unripe', 'UnfreshUnripe',
        'UncertainRipe', 'Ripe', 'OverRipe', 'Mold'
    ]

    # Process each split (valid, train, test)
    for split in ['valid', 'train', 'test']:
        print(f"\nProcessing {split} split...")

        # Setup paths
        split_source = os.path.join(source_dir, split)
        split_output = os.path.join(output_dir, split)

        if not os.path.exists(split_source):
            print(f"Warning: Source directory {split_source} not found")
            continue

        # Create output category directories
        for category in categories:
            os.makedirs(os.path.join(split_output, category), exist_ok=True)

        # Process each category in the source
        for category in os.listdir(split_source):
            category_path = os.path.join(split_source, category)
            if not os.path.isdir(category_path):
                continue

            print(f"Processing category: {category}")

            # Process each image in the category
            for filename in os.listdir(category_path):
                if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                    continue

                source_path = os.path.join(category_path, filename)
                dest_folder = os.path.join(split_output, category)

                # Generate new filename using the category
                new_filename = get_new_filename(category, filename)
                dest_path = os.path.join(dest_folder, new_filename)

                try:
                    # Copy the file with new name
                    shutil.copy2(source_path, dest_path)
                    print(f"Copied: {filename} → {new_filename}")
                except Exception as e:
                    print(f"Error copying {filename}: {str(e)}")

def print_summary(output_dir):
    """Print summary of sorted images"""
    print("\nSorting Summary:")
    print("=" * 50)

    for split in ['valid', 'train', 'test']:
        split_dir = os.path.join(output_dir, split)
        if not os.path.exists(split_dir):
            continue

        print(f"\n{split.upper()} Split:")
        print("-" * 20)

        total_images = 0
        for category in os.listdir(split_dir):
            category_path = os.path.join(split_dir, category)
            if os.path.isdir(category_path):
                image_count = len([f for f in os.listdir(category_path)
                                 if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
                print(f"{category}: {image_count} images")
                total_images += image_count
        print(f"Total {split} images: {total_images}")

def main():
    # Mount Google Drive
    mount_drive()

    # Define paths
    source_dir = "/content/drive/MyDrive/SeniorProject/Tomato/RipenessClassification_Sorted"
    output_dir = "/content/drive/MyDrive/SeniorProject/Tomato/BananaRipenessClassification_Copy"

    print(f"\nStarting image sorting process...")
    print(f"Source directory: {source_dir}")
    print(f"Output directory: {output_dir}")

    # Create main output directory
    os.makedirs(output_dir, exist_ok=True)

    # Sort the images
    sort_bananas(source_dir, output_dir)

    # Print final summary
    print_summary(output_dir)

    print("\nSorting completed!")

if __name__ == "__main__":
    main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copied: Ripe_healthy_(48)_aca1a_dup14b.jpg → Ripe_musa-acuminata-overripe-Ripe_healthy_(48)_aca1a_dup14b.jpg
Copied: Ripe_healthy_(16)_dup15b_dup15b.jpg → Ripe_musa-acuminata-overripe-Ripe_healthy_(16)_dup15b_dup15b.jpg
Copied: Ripe_healthy_(55)_aca1a_dup16b.jpg → Ripe_musa-acuminata-overripe-Ripe_healthy_(55)_aca1a_dup16b.jpg
Copied: Ripe_healthy_(47)_aca1a_dup17b.jpg → Ripe_musa-acuminata-overripe-Ripe_healthy_(47)_aca1a_dup17b.jpg
Copied: Ripe_healthy_(22)_aca1a_dup18b.jpg → Ripe_musa-acuminata-overripe-Ripe_healthy_(22)_aca1a_dup18b.jpg
Copied: Ripe_healthy_(40)_aca1a_dup2b.jpg → Ripe_musa-acuminata-overripe-Ripe_healthy_(40)_aca1a_dup2b.jpg
Copied: Ripe_IMG_1842_2_aca1a_dup11b.jpg → Ripe_musa-acuminata-overripe-Ripe_IMG_1842_2_aca1a_dup11b.jpg
Copied: Ripe_IMG_1826_aca1a_dup12b.jpg → Ripe_musa-acuminata-overripe-Ripe_IMG_1826_aca1a_dup12b.jpg
Copied: Ripe_IMG_1826_2_aca1a_dup13b.jpg → Ripe_musa-acuminata-overripe-Rip