In [14]:
import os
import sys
import sqlite3
import numpy as np
from PIL import Image
import time
from datetime import datetime, timezone
import matplotlib.pyplot as plt

# Define the database path
DATABASE_PATH = os.path.join("/Users/greyson/Projects/custom_gallery/gallery/prisma", 'image_analysis.db')

# Define the root directory where images are stored
IMAGE_DIRECTORY = "../gallery/public/img"

def connect_db():
    conn = sqlite3.connect(DATABASE_PATH)
    conn.execute('PRAGMA foreign_keys = ON;')  # Enable foreign key support
    return conn

def remove_letterbox(image_np, std_threshold=5, min_letterbox_height=10):
    """
    Detect and remove letterboxing from the top and bottom of the image
    using standard deviation of pixel intensities along rows.
    Returns the top and bottom letterbox heights, and the cropped image.
    """
    # Convert to grayscale
    grayscale = np.mean(image_np, axis=2)

    # Compute the standard deviation along each row
    std_dev = np.std(grayscale, axis=1)

    # Normalize the standard deviation
    std_dev_normalized = (std_dev - np.min(std_dev)) / (np.max(std_dev) - np.min(std_dev)) * 100

    # Find indices where std_dev exceeds the threshold
    content_indices = np.where(std_dev_normalized > std_threshold)[0]

    if content_indices.size == 0:
        print("No content detected in the image.")
        return 0, 0, image_np

    # Determine the top and bottom rows of the content area
    top_row = content_indices[0]
    bottom_row = content_indices[-1]

    # Calculate letterbox heights
    top_height = top_row
    bottom_height = image_np.shape[0] - bottom_row - 1

    # Ensure that the detected letterbox heights are at least min_letterbox_height
    if top_height < min_letterbox_height:
        top_height = 0
    if bottom_height < min_letterbox_height:
        bottom_height = 0

    # Crop the image to remove letterboxes
    cropped_image = image_np[top_height:image_np.shape[0] - bottom_height, :, :]
    print(f"Removed letterbox: Top={top_height}px, Bottom={bottom_height}px")
    return top_height, bottom_height, cropped_image

def process_image(image_path, image_id, conn):
    """
    Process a single image: remove letterbox, update width and height, overwrite the image file,
    and update the SQLite database.
    Returns True if processed successfully, False otherwise.
    """
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found at path: {image_path}")
        return False
    except Exception as e:
        print(f"Error opening image {image_path}: {e}")
        return False

    image_np = np.array(image)

    # Ensure the image is in RGB format
    if image.mode != 'RGB':
        image = image.convert('RGB')
        image_np = np.array(image)

    # Remove alpha channel if present
    if image_np.shape[2] == 4:
        image_np = image_np[:, :, :3]

    # *** Detect and Remove Letterbox using the new method ***
    std_threshold = 6            # Standard deviation threshold
    min_letterbox_height = 20    # Minimum height in pixels to consider as letterbox

    top_height, bottom_height, image_np = remove_letterbox(image_np, std_threshold, min_letterbox_height)

    # *** Save the cropped image back to the original path ***
    # Convert numpy array back to PIL Image
    cropped_image = Image.fromarray(image_np)
    # Save the cropped image, overwriting the original image
    cropped_image.save(image_path)

    # Get the width and height of the cropped image
    height, width, channels = image_np.shape

    # Update the Images table with letterbox heights and width/height
    cursor = conn.cursor()
    cursor.execute('''
    UPDATE Images
    SET letterbox_top = ?, letterbox_bottom = ?, width = ?, height = ?
    WHERE id = ?
    ''', (top_height, bottom_height, width, height, image_id))
    conn.commit()

    # Check if the image is entirely one color after cropping
    if len(np.unique(image_np.reshape(-1, image_np.shape[2]), axis=0)) == 1:
        # Image is entirely one color after cropping
        os.remove(image_path)
        print(f"Image {image_path} is entirely one color after cropping. Deleted.")
        # Delete the record from the Images table
        cursor.execute('DELETE FROM Images WHERE id = ?', (image_id,))
        conn.commit()
        return False  # Skip further processing

    print(f"Processed image {image_path}")
    return True

def main():
    # Connect to the database
    conn = connect_db()
    cursor = conn.cursor()

    # Check if 'width' and 'height' columns exist in Images table, add them if not
    cursor.execute("PRAGMA table_info(Images)")
    columns = [info[1] for info in cursor.fetchall()]
    if 'width' not in columns:
        cursor.execute("ALTER TABLE Images ADD COLUMN width INTEGER")
    if 'height' not in columns:
        cursor.execute("ALTER TABLE Images ADD COLUMN height INTEGER")
        conn.commit()

    # Get all images from the Images table
    cursor.execute('SELECT id, absolute_file_path FROM Images')
    images = cursor.fetchall()

    total_images = len(images)
    print(f"Total images to process: {total_images}")

    start_time = time.time()

    processed_count = 0
    for index, (image_id, image_path) in enumerate(images, start=1):
        # Check if the image file exists
        if not os.path.exists(image_path):
            print(f"Image file does not exist: {image_path}")
            # Optionally, remove the record from the database
            cursor.execute('DELETE FROM Images WHERE id = ?', (image_id,))
            conn.commit()
            continue

        # Process the image
        success = process_image(image_path, image_id, conn)
        if success:
            processed_count += 1

        # Print progress
        elapsed_time = time.time() - start_time
        avg_time_per_image = elapsed_time / index
        remaining_images = total_images - index
        estimated_remaining_time = avg_time_per_image * remaining_images
        print(f"Processed {index}/{total_images} images.")
        print(f"Estimated remaining time: {time.strftime('%H:%M:%S', time.gmtime(estimated_remaining_time))}")
        print("-------------------------------------------------------")

    conn.close()
    total_time_spent = time.time() - start_time
    print(f"All images processed.")
    print(f"Total images processed successfully: {processed_count}")
    print(f"Total time spent: {time.strftime('%H:%M:%S', time.gmtime(total_time_spent))}")

if __name__ == "__main__":
    main()


Total images to process: 3444
Removed letterbox: Top=0px, Bottom=0px
Processed image /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2021-07-06_16-54-00_CQ_j8VorM1y/209772164_1970922413072622_4634697276599521314_n.jpg
Processed 1/3444 images.
Estimated remaining time: 00:28:15
-------------------------------------------------------
Removed letterbox: Top=0px, Bottom=0px
Processed image /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2021-07-06_16-54-00_CQ_j8VorM1y/210547912_1953988728089169_980237342589702264_n.jpg
Processed 2/3444 images.
Estimated remaining time: 00:27:19
-------------------------------------------------------
Removed letterbox: Top=9px, Bottom=0px
Processed image /Users/greyson/Projects/custom_gallery/gallery/public/img/cameronmarygold/2021-07-06_16-54-00_CQ_j8VorM1y/210568208_331453215355133_1912118937781263435_n.jpg
Processed 3/3444 images.
Estimated remaining time: 00:26:53
--------------------------------------

KeyboardInterrupt: 