In [19]:
import os
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans
from skimage import color, feature
from collections import Counter
from scipy.stats import skew, kurtosis
import cv2
import piexif
import json
import sys
import time

def is_similar(row, tolerance=10):
    """
    Check if all pixels in the row are similar within the given tolerance.
    """
    # Compute the difference between max and min for each channel
    diff = row.max(axis=0) - row.min(axis=0)
    return np.all(diff < tolerance)

def find_letterbox_height(image_np, tolerance=10, min_height=10, from_top=True):
    """
    Find the height of the letterbox from the top or bottom.
    """
    height, width, _ = image_np.shape
    letterbox_height = 0
    range_y = range(height) if from_top else range(height-1, -1, -1)
    
    for y in range_y:
        row = image_np[y, :, :]
        if is_similar(row, tolerance):
            letterbox_height +=1
        else:
            break
    # Ensure the detected letterbox is at least min_height pixels
    if letterbox_height >= min_height:
        return letterbox_height
    else:
        return 0

def remove_letterbox(image_np, tolerance=10, min_letterbox_height=10):
    """
    Detect and remove letterboxing from the top and bottom of the image.
    """
    top_height = find_letterbox_height(image_np, tolerance, min_letterbox_height, from_top=True)
    bottom_height = find_letterbox_height(image_np, tolerance, min_letterbox_height, from_top=False)
    
    # and if the absolute value of the difference between the top and bottom is less than min_height
    if (top_height > 0 or bottom_height > 0) and abs(top_height - bottom_height) < min_letterbox_height:
        cropped_image = image_np[top_height: image_np.shape[0] - bottom_height, :, :]
        print(f"Removed letterbox: Top={top_height}px, Bottom={bottom_height}px")
        return cropped_image
    else:
        print("No letterbox detected.")
        return image_np

def embed_metrics_into_exif(image_path, output_path, metrics_dict):
    """
    Embed the metrics dictionary into the JPEG's EXIF UserComment field.
    """
    try:
        # Load existing EXIF data
        exif_dict = piexif.load(image_path)
    except Exception as e:
        print(f"Error loading EXIF data for {image_path}: {e}")
        exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
    
    # Convert metrics dictionary to JSON string
    metrics_json = json.dumps(metrics_dict)
    
    # EXIF UserComment requires a specific format: a character code followed by the comment
    # We'll use ASCII encoding for simplicity
    user_comment = "ASCII\0\0\0" + metrics_json
    
    # Encode the comment to bytes
    user_comment_bytes = user_comment.encode('utf-8')
    
    # Insert the UserComment into EXIF
    exif_dict['Exif'][piexif.ExifIFD.UserComment] = user_comment_bytes
    
    # Dump the EXIF data to bytes
    exif_bytes = piexif.dump(exif_dict)
    
    # Save the image with new EXIF data
    try:
        image = Image.open(image_path)
        image.save(output_path, "jpeg", exif=exif_bytes)
        print(f"Saved image with embedded metrics to {output_path}")
    except Exception as e:
        print(f"Error saving image with EXIF data for {image_path}: {e}")

def get_all_image_paths(root_dir, supported_extensions=None):
    """
    Traverse the root_dir and return a list of all image file paths.
    """
    if supported_extensions is None:
        supported_extensions = ('.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif')
    
    image_paths = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.lower().endswith(supported_extensions):
                image_paths.append(os.path.join(dirpath, filename))
    return image_paths

def has_user_comment(image_path):
    """
    Check if the image already has a UserComment in its EXIF data.
    """
    try:
        exif_dict = piexif.load(image_path)
        user_comment = exif_dict['Exif'].get(piexif.ExifIFD.UserComment, None)
        if user_comment:
            # Decode the UserComment
            try:
                # UserComment starts with a 8-byte code
                comment_str = user_comment.decode('utf-8', errors='ignore')
                # Remove the first 8 bytes (the encoding)
                comment_str = comment_str[8:]
                if comment_str.strip():
                    return True
            except:
                return False
        return False
    except:
        return False

def process_image(image_path):
    """
    Process a single image: remove letterbox, perform k-means clustering,
    compute metrics, and embed them into EXIF.
    Returns True if processed successfully, False otherwise.
    """
    outputImagePath = image_path  # Overwrite the original image

    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found at path: {image_path}")
        return False
    except Exception as e:
        print(f"Error opening image {image_path}: {e}")
        return False

    image_np = np.array(image)

    # Ensure the image is in RGB format
    if image.mode != 'RGB':
        image = image.convert('RGB')
        image_np = np.array(image)

    # Remove alpha channel if present
    if image_np.shape[2] == 4:
        image_np = image_np[:, :, :3]

    # *** Detect and Remove Letterbox ***
    # Parameters can be adjusted based on the expected letterbox characteristics
    tolerance = 2              # Tolerance for color similarity (0-255)
    min_letterbox_height = 10  # Minimum height in pixels to consider as letterbox

    image_np = remove_letterbox(image_np, tolerance, min_letterbox_height)

    # Step 2: Normalize and convert to LAB color space
    image_normalized = image_np / 255.0
    image_lab = color.rgb2lab(image_normalized)

    # Step 3: Reshape image data for clustering
    pixels_lab = image_lab.reshape(-1, 3)

    # Step 4: Perform k-means clustering with k=8
    k = 8  # Set the number of clusters
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(pixels_lab)

    # Step 5: Process cluster centers
    cluster_centers_lab = kmeans.cluster_centers_
    cluster_centers_rgb = color.lab2rgb(cluster_centers_lab.reshape(1, -1, 3))
    cluster_centers_rgb = np.squeeze(cluster_centers_rgb)
    cluster_centers_rgb_uint8 = np.clip(cluster_centers_rgb * 255, 0, 255).astype(int)

    # Step 6: Quantify and visualize cluster sizes
    labels = kmeans.labels_
    counts = Counter(labels)
    total_pixels = sum(counts.values())

    # Sort clusters by the number of pixels
    sorted_counts = counts.most_common()
    sorted_cluster_indices = [item[0] for item in sorted_counts]
    sorted_cluster_sizes = [item[1] for item in sorted_counts]
    sorted_cluster_percentages = [(count / total_pixels) * 100 for count in sorted_cluster_sizes]
    sorted_colors_rgb = np.array([cluster_centers_rgb_uint8[i] for i in sorted_cluster_indices])

    # *** Removed Section: Variables related to bar charts ***
    # bar_colors = sorted_colors_rgb / 255
    # cluster_numbers = range(1, k + 1)
    # *** End of Removed Section ***

    # *** Compute Additional Image Metrics ***

    # Convert LAB to HSV for saturation metrics
    image_hsv = color.rgb2hsv(image_normalized)
    saturation = image_hsv[:, :, 1]
    mean_saturation = np.mean(saturation)
    median_saturation = np.median(saturation)
    std_saturation = np.std(saturation)

    # Luminance Metrics
    l_channel = image_lab[:, :, 0]
    mean_luminance = np.mean(l_channel)
    median_luminance = np.median(l_channel)
    std_luminance = np.std(l_channel)
    dynamic_range = l_channel.max() - l_channel.min()

    # Contrast Metrics
    rms_contrast = np.sqrt(np.mean(l_channel**2))
    michelson_contrast = (l_channel.max() - l_channel.min()) / (l_channel.max() + l_channel.min())

    # Texture Features using GLCM
    glcm = feature.graycomatrix(l_channel.astype(np.uint8), distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
    glcm_contrast = feature.graycoprops(glcm, 'contrast')[0, 0]
    glcm_correlation = feature.graycoprops(glcm, 'correlation')[0, 0]

    # Sharpness using Variance of Laplacian
    laplacian_var = cv2.Laplacian(l_channel, cv2.CV_64F).var()

    # Skewness and Kurtosis of Luminance
    lum_skew = skew(l_channel.flatten())
    lum_kurt = kurtosis(l_channel.flatten())

    # Compile Features
    # additional_features = {
    #     'Mean Luminance': f"{mean_luminance:.2f}",
    #     'Median Luminance': f"{median_luminance:.2f}",
    #     'Std Luminance': f"{std_luminance:.2f}",
    #     'Dynamic Range': f"{dynamic_range:.2f}",
    #     'RMS Contrast': f"{rms_contrast:.2f}",
    #     'Michelson Contrast': f"{michelson_contrast:.2f}",
    #     'Mean Saturation': f"{mean_saturation:.2f}",
    #     'Median Saturation': f"{median_saturation:.2f}",
    #     'Std Saturation': f"{std_saturation:.2f}",
    #     'GLCM Contrast': f"{glcm_contrast:.2f}",
    #     'GLCM Correlation': f"{glcm_correlation:.2f}",
    #     'Laplacian Variance': f"{laplacian_var:.2f}",
    #     'Luminance Skewness': f"{lum_skew:.2f}",
    #     'Luminance Kurtosis': f"{lum_kurt:.2f}"
    # }

    # *** Compile K-Means Clustering Results ***
    # clustering_results = {
    #     'KMeans Clustering': {
    #         'Number of Clusters': k,
    #         'Cluster Centers (RGB)': cluster_centers_rgb_uint8.tolist(),
    #         'Cluster Counts': sorted_cluster_sizes,
    #         'Cluster Percentages': [f"{pct:.2f}%" for pct in sorted_cluster_percentages]
    #     }
    # }

    # *** Combine All Metrics ***
    all_metrics = {
          'Mean Luminance': f"{mean_luminance:.2f}",
        'Median Luminance': f"{median_luminance:.2f}",
        'Std Luminance': f"{std_luminance:.2f}",
        'Dynamic Range': f"{dynamic_range:.2f}",
        'RMS Contrast': f"{rms_contrast:.2f}",
        'Michelson Contrast': f"{michelson_contrast:.2f}",
        'Mean Saturation': f"{mean_saturation:.2f}",
        'Median Saturation': f"{median_saturation:.2f}",
        'Std Saturation': f"{std_saturation:.2f}",
        'GLCM Contrast': f"{glcm_contrast:.2f}",
        'GLCM Correlation': f"{glcm_correlation:.2f}",
        'Laplacian Variance': f"{laplacian_var:.2f}",
        'Luminance Skewness': f"{lum_skew:.2f}",
        'Luminance Kurtosis': f"{lum_kurt:.2f}",
        'KMeans Clustering': {
            'Number of Clusters': k,
            'Cluster Centers (RGB)': cluster_centers_rgb_uint8.tolist(),
            'Cluster Counts': sorted_cluster_sizes,
            'Cluster Percentages': [f"{pct:.2f}%" for pct in sorted_cluster_percentages]
        }
    }

    # *** Embed Metrics into EXIF Metadata ***
    embed_metrics_into_exif(image_path=image_path, output_path=outputImagePath, metrics_dict=all_metrics)

    return True

def main():
    # Define the root images directory
    root_images_dir = "/Users/greyson/Projects/custom_gallery/gallery/public/img/"

    # Get all image file paths
    print("Collecting image files...")
    image_paths = get_all_image_paths(root_images_dir)
    total_images = len(image_paths)
    print(f"Found {total_images} image(s) to process.")

    if total_images == 0:
        print("No images found. Exiting.")
        sys.exit(0)

    # Initialize counters and timers
    processed_count = 0
    skipped_count = 0
    start_time = time.time()

    for idx, image_path in enumerate(image_paths, start=1):
        print(f"\nProcessing image {idx}/{total_images}: {image_path}")

        # if has_user_comment(image_path):
        #     print("Image already has EXIF UserComment. Skipping.")
        #     skipped_count += 1
        #     continue

        image_start_time = time.time()
        success = process_image(image_path)
        image_end_time = time.time()

        if success:
            processed_count += 1
            print(f"Processed in {image_end_time - image_start_time:.2f} seconds.")
        else:
            print("Failed to process this image.")

        # Calculate elapsed time and estimate remaining time
        elapsed_time = time.time() - start_time
        avg_time_per_image = elapsed_time / idx
        remaining_images = total_images - idx
        estimated_remaining_time = avg_time_per_image * remaining_images

        # Format estimated remaining time
        hrs, rem = divmod(estimated_remaining_time, 3600)
        mins, secs = divmod(rem, 60)
        time_str = ""
        if hrs >= 1:
            time_str += f"{int(hrs)}h "
        if mins >= 1:
            time_str += f"{int(mins)}m "
        time_str += f"{int(secs)}s"

        print(f"Estimated time remaining: {time_str}")

    total_elapsed = time.time() - start_time
    hrs, rem = divmod(total_elapsed, 3600)
    mins, secs = divmod(rem, 60)
    total_time_str = ""
    if hrs >= 1:
        total_time_str += f"{int(hrs)}h "
    if mins >= 1:
        total_time_str += f"{int(mins)}m "
    total_time_str += f"{int(secs)}s"

    print("\nProcessing Complete!")
    print(f"Total images processed: {processed_count}")
    print(f"Total images skipped: {skipped_count}")
    print(f"Total time taken: {total_time_str}")

if __name__ == "__main__":
    # Ensure piexif is installed. If not, provide instructions.
    try:
        import piexif
    except ImportError:
        print("The 'piexif' library is required to embed metadata. Install it using 'pip install piexif'.")
        sys.exit(1)

    main()


Collecting image files...
Found 1844 image(s) to process.

Processing image 1/1844: /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2023-02-13_17-37-29_ConHictJyXu/330257550_869215664158675_3796246925761316888_n.jpg
No letterbox detected.
Saved image with embedded metrics to /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2023-02-13_17-37-29_ConHictJyXu/330257550_869215664158675_3796246925761316888_n.jpg
Processed in 0.95 seconds.
Estimated time remaining: 29m 8s

Processing image 2/1844: /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2023-02-13_17-37-29_ConHictJyXu/330655364_215130921052217_1176247351636244488_n.jpg
No letterbox detected.
Saved image with embedded metrics to /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2023-02-13_17-37-29_ConHictJyXu/330655364_215130921052217_1176247351636244488_n.jpg
Processed in 0.89 seconds.
Estimated time remaining: 28m 10s

Processing image

KeyboardInterrupt: 