In [1]:
!pip install seaborn



In [2]:
!pip install pillow



In [3]:
!pip install pillow-heif

Collecting pillow-heif
  Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pillow-heif
Successfully installed pillow-heif-0.22.0


In [4]:
from posix import read
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [5]:
import shutil
from PIL import Image
import io
import subprocess
from google.colab import drive

def mount_drive():
    """Mount Google Drive"""
    drive.mount('/content/drive', force_remount=True)
    print("Drive mounted successfully")

def clean_filename(filename):
    """Remove spaces and special characters from filename"""
    # Split name and extension
    base, ext = os.path.splitext(filename)
    # Replace spaces with underscores and remove other special characters
    cleaned_base = base.replace(' ', '_').replace('-', '_')
    # Return cleaned filename
    return f"{cleaned_base}{ext}"

def generate_new_filename(category, filename):
    """
    Generate new filename based on category
    Format: Category_CleanedFilename
    Always returns with .jpg extension
    """
    # First clean the filename (remove spaces, etc.)
    cleaned_name = clean_filename(filename)
    # Split name and extension
    base, ext = os.path.splitext(cleaned_name)

    # Check if the filename already starts with the category
    if base.startswith(f"{category}_"):
        base = base
    else:
        base = f"{category}_{base}"

    # Always use jpg extension
    return f"{base}.jpg"

def sort_mangous(source_dir, output_dir):
    """
    Sort and rename tomato images based on their categories
    """
    # List of all categories
    categories = [
        'OverRipe', 'Ripe',
        'Unripe', 'VeryRipe'
    ]

    # Process each split (valid, train, test)
    for split in ['test', 'train', 'valid']:
        print(f"\nProcessing {split} split...")

        # Setup paths
        split_source = os.path.join(source_dir, split)
        split_output = os.path.join(output_dir, split)

        if not os.path.exists(split_source):
            print(f"Warning: Source directory {split_source} not found")
            continue

        # Create output category directories
        for category in categories:
            os.makedirs(os.path.join(split_output, category), exist_ok=True)

        # Process each category in the source
        for category in os.listdir(split_source):
            category_path = os.path.join(split_source, category)
            if not os.path.isdir(category_path):
                continue

            print(f"Processing category: {category}")

            # Process each image in the category
            for filename in os.listdir(category_path):
                if not filename.lower().endswith(('.jpg', '.jpeg', '.png', '.heic')):
                    continue

                source_path = os.path.join(category_path, filename)
                dest_folder = os.path.join(split_output, category)

                # Generate new filename using the category
                new_filename = generate_new_filename(category, filename)

                # Show renaming information
                if new_filename != filename:
                    print(f"Renaming: {filename} → {new_filename}")

                dest_path = os.path.join(dest_folder, new_filename)

                try:
                    # Check if we need to convert the image
                    if filename.lower().endswith('.heic'):
                        try:
                            # Try to use pillow-heif if available
                            try:
                                import pillow_heif
                                heif_file = pillow_heif.read_heif(source_path)
                                img = Image.frombytes(
                                    heif_file.mode,
                                    heif_file.size,
                                    heif_file.data,
                                    "raw",
                                    heif_file.mode
                                )
                                img.save(dest_path, 'JPEG', quality=90)
                                print(f"Converted {filename} to JPG using pillow-heif")
                            except (ImportError, Exception) as e:
                                # If pillow-heif fails, just copy the file without conversion
                                print(f"HEIC conversion failed, copying without conversion: {str(e)}")
                                # But still rename with .jpg extension for consistency
                                shutil.copy2(source_path, dest_path)
                        except Exception as e:
                            print(f"Error handling {filename}: {str(e)}")
                            # Still try to copy the file to maintain dataset completeness
                            shutil.copy2(source_path, dest_path)
                    elif filename.lower().endswith('.png'):
                        try:
                            # Open and convert PNG to JPG
                            with Image.open(source_path) as img:
                                # Convert to RGB (in case of PNG with transparency)
                                if img.mode != 'RGB':
                                    img = img.convert('RGB')
                                # Save as JPG
                                img.save(dest_path, 'JPEG', quality=90)
                                print(f"Converted {filename} to JPG")
                        except Exception as e:
                            print(f"Error converting PNG {filename}, copying: {str(e)}")
                            shutil.copy2(source_path, dest_path)
                    else:
                        # Just copy if it's already a JPG
                        shutil.copy2(source_path, dest_path)
                except Exception as e:
                    print(f"Error processing {filename}: {str(e)}")
                    # Try a direct copy as last resort
                    try:
                        shutil.copy2(source_path, dest_path)
                        print(f"Copied {filename} without processing")
                    except Exception as copy_err:
                        print(f"Failed to copy {filename}: {str(copy_err)}")

def print_summary(output_dir):
    """Print summary of sorted images"""
    print("\nSorting Summary:")
    print("=" * 50)

    for split in ['test', 'train', 'valid']:
        split_dir = os.path.join(output_dir, split)
        if not os.path.exists(split_dir):
            continue

        print(f"\n{split.upper()} Split:")
        print("-" * 20)

        total_images = 0
        for category in os.listdir(split_dir):
            category_path = os.path.join(split_dir, category)
            if os.path.isdir(category_path):
                image_count = len([f for f in os.listdir(category_path)
                                 if f.lower().endswith(('.jpg', '.jpeg', '.png', '.heic'))])
                print(f"{category}: {image_count} images")
                total_images += image_count
        print(f"Total {split} images: {total_images}")

def install_heic_support():
    """Install necessary packages for HEIC support"""
    print("Installing HEIC support packages...")
    try:
        # Install libheif for HEIC support
        subprocess.run(['apt-get', 'update'], check=True)
        subprocess.run(['apt-get', 'install', '-y', 'libheif-dev'], check=True)
        subprocess.run(['pip', 'install', 'pillow-heif'], check=True)
        print("HEIC support installed successfully")
    except Exception as e:
        print(f"Warning: Could not install HEIC support: {str(e)}")
        print("Will copy HEIC files without conversion")

def main():
    # Mount Google Drive
    mount_drive()

    # Install HEIC support
    install_heic_support()

    # Define paths
    source_dir = "/content/drive/MyDrive/SeniorProject/Mango/RipenessClassification"
    output_dir = "/content/drive/MyDrive/SeniorProject/Mango/RipenessClassification_Sorted_One"

    print(f"\nStarting image sorting process...")
    print(f"Source directory: {source_dir}")
    print(f"Output directory: {output_dir}")

    # Create main output directory
    os.makedirs(output_dir, exist_ok=True)

    # Sort the images
    sort_mangous(source_dir, output_dir)

    # Print final summary
    print_summary(output_dir)

    print("\nSorting completed!")

if __name__ == "__main__":
    main()


Mounted at /content/drive
Drive mounted successfully
Installing HEIC support packages...
HEIC support installed successfully

Starting image sorting process...
Source directory: /content/drive/MyDrive/SeniorProject/Mango/RipenessClassification
Output directory: /content/drive/MyDrive/SeniorProject/Mango/RipenessClassification_Sorted_One

Processing test split...
Processing category: OverRipe
Renaming: OverRipe_Copy_of_OverRipe_IMG_2737 (1).jpg → OverRipe_Copy_of_OverRipe_IMG_2737_(1).jpg
Renaming: OverRipe_Copy_of_OverRipe_IMG_1892_copy_6 (1).jpg → OverRipe_Copy_of_OverRipe_IMG_1892_copy_6_(1).jpg
Renaming: OverRipe_Copy_of_OverRipe_IMG_1891_copy_2 (1).jpg → OverRipe_Copy_of_OverRipe_IMG_1891_copy_2_(1).jpg
Renaming: OverRipe_IMG_1897_copy_11 (1).jpg → OverRipe_IMG_1897_copy_11_(1).jpg
Renaming: OverRipe_IMG_1897_copy_8 (1).jpg → OverRipe_IMG_1897_copy_8_(1).jpg
Renaming: OverRipe_IMG_1897_copy_9 (1).jpg → OverRipe_IMG_1897_copy_9_(1).jpg
Renaming: OverRipe_IMG_1897_copy_10 (1).jpg → O