In [169]:
# Task 1. Setup and Configuration ⚙️
!pip install Pillow piexif tqdm pathlib




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [170]:
# 1.2: Import necessary libraries
import os
import json
import shutil
import pathlib
import logging
import sys
from PIL import Image
from PIL.ExifTags import TAGS
import piexif
from tqdm.notebook import tqdm

# 1.3: Define I/O Paths and Constants
# --- Paths ---
# Define the root directory where the original images are stored
# IMPORTANT: Replace 'RAW_IMAGE_FOLDER' with the actual path to your 700+ images
RAW_IMAGE_FOLDER = pathlib.Path('projects/2025/F2Poly/') 

# Define the path to the projects JSON file (using the uploaded file name)
PROJECTS_JSON_FILE = pathlib.Path('projects_left_behind.json') 

# Define the root directory for all processed, organized, and optimized files
OUTPUT_ROOT_FOLDER = pathlib.Path('./processed_gallery_images')

# --- Configuration Constants ---
MAX_FILE_SIZE_KB = 300
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_KB * 1024
WEBP_QUALITY = 85
IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png')

# --- Logging Setup ---
LOG_FILE = OUTPUT_ROOT_FOLDER / 'image_processing.log'
# We'll create the output folder here if it doesn't exist yet, to ensure the log file can be written.
OUTPUT_ROOT_FOLDER.mkdir(exist_ok=True, parents=True)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(LOG_FILE, mode='w'),
        logging.StreamHandler(sys.stdout) # StreamHandler with sys.stdout for better notebook display
    ]
)
logger = logging.getLogger(__name__)

# Final Status Print
logger.info("Setup and Configuration loaded successfully.")
logger.info(f"Raw Image Path: {RAW_IMAGE_FOLDER.resolve()}")
logger.info(f"Output Path: {OUTPUT_ROOT_FOLDER.resolve()}")
logger.info(f"Log file created at: {LOG_FILE.resolve()}")

2025-10-01 17:03:35,989 - INFO - Setup and Configuration loaded successfully.
2025-10-01 17:03:35,992 - INFO - Raw Image Path: C:\wamp64\www\media_new\projects\2025\F2Poly
2025-10-01 17:03:35,993 - INFO - Output Path: C:\wamp64\www\media_new\processed_gallery_images
2025-10-01 17:03:35,994 - INFO - Log file created at: C:\wamp64\www\media_new\processed_gallery_images\image_processing.log


In [171]:
# Task 2. Load Project Data 📂

logger.info("Starting Task 2: Loading project data.")
PROJECT_DATA = None
PROJECTS_LIST = []

try:
    with open(PROJECTS_JSON_FILE, 'r', encoding='utf-8') as f:
        PROJECT_DATA = json.load(f)
        
    # Check if the structure has a top-level 'projects' key
    if 'projects' in PROJECT_DATA and isinstance(PROJECT_DATA['projects'], list):
        PROJECTS_LIST = PROJECT_DATA['projects']
        
        logger.info(f"Successfully loaded {len(PROJECTS_LIST)} projects from {PROJECTS_JSON_FILE.name}.")
        
        # Display an inspection of the first project for verification
        if PROJECTS_LIST:
            first_project = PROJECTS_LIST[0]
            logger.info("--- First Project Data Inspection ---")
            logger.info(f"ID: {first_project.get('id')}, Slug: {first_project.get('slug')}")
            logger.info(f"Location (Lat/Lon): {first_project.get('location')}")
            logger.info(f"Image Check (f-image): {first_project.get('f-image')}")
            logger.info(f"Image Check (image-00): {first_project.get('image-00')}")
            logger.info("-------------------------------------")
            
            # Check for critical fields
            if not all(key in first_project for key in ['slug', 'location', 'f-image']):
                 logger.error("CRITICAL ERROR: Required fields ('slug', 'location', 'f-image') are missing in the project data. Stopping.")
                 raise KeyError("Missing critical project fields.")
        
    else:
        logger.error("JSON structure error: 'projects' key not found or is not a list.")
        PROJECTS_LIST = []

except FileNotFoundError:
    logger.error(f"CRITICAL ERROR: projects_full.json not found at {PROJECTS_JSON_FILE.resolve()}. Cannot proceed.")
    PROJECTS_LIST = []
except json.JSONDecodeError:
    logger.error("CRITICAL ERROR: Failed to decode projects_full.json. Check file formatting.")
    PROJECTS_LIST = []
except Exception as e:
    logger.error(f"An unexpected error occurred during data loading: {e}")
    PROJECTS_LIST = []
    
# Store the total number of projects for the final summary
TOTAL_PROJECTS = len(PROJECTS_LIST)

print(f"\n✅ Project Data Loaded. Total Projects to Process: {TOTAL_PROJECTS}")
if TOTAL_PROJECTS == 0:
    print("🛑 Please check the log file and ensure 'projects_full.json' is correctly placed and formatted.")

2025-10-01 17:03:36,005 - INFO - Starting Task 2: Loading project data.
2025-10-01 17:03:36,006 - INFO - Successfully loaded 1 projects from projects_left_behind.json.
2025-10-01 17:03:36,008 - INFO - --- First Project Data Inspection ---
2025-10-01 17:03:36,009 - INFO - ID: 406, Slug: polyester-carpet-dreamweaver-406
2025-10-01 17:03:36,010 - INFO - Location (Lat/Lon): [41.768558989989465, -88.14980582160743]
2025-10-01 17:03:36,011 - INFO - Image Check (f-image): giese (1).jpg
2025-10-01 17:03:36,012 - INFO - Image Check (image-00): bond (2).jpg
2025-10-01 17:03:36,013 - INFO - -------------------------------------

✅ Project Data Loaded. Total Projects to Process: 1


In [172]:
# 2.1 Test PROJECTS_LIST length and sample 
print(len(PROJECTS_LIST))
print(PROJECTS_LIST[0])

1
{'id': 406, 'title': 'Polyester Carpet by Dreamweaver Basement', 'category': 'Carpet', 'excerpt': 'Multiple projects featuring polyester carpet by Dreamweaver, for bedrooms, hallways, basement and even steps.', 'description': 'Multiple projects featuring polyester carpet by Dreamweaver, for bedrooms, hallways, basement and even steps.', 'slug': 'polyester-carpet-dreamweaver-406', 'location': [41.768558989989465, -88.14980582160743], 'thumb': '2025/F2Poly/thumb/', 'web': '2025/F2Poly/', 'complete_date': '2025-02-20', 'f-image': 'giese (1).jpg', 'image-00': 'bond (2).jpg', 'image-01': 'bond (3).jpg', 'image-02': 'crott (1).jpg', 'image-03': 'crott (2).jpg', 'image-04': 'crott (3).jpg', 'image-05': 'crott (4).jpg', 'image-06': 'bond (1).jpg', 'image-07': 'giese (2).jpg', 'image-08': 'giese (3).jpg', 'image-09': 'giese (4).jpg'}


In [173]:
# Task 3. Create Project-Specific Folders 🗂️ - Done successfully

logger.info("Starting Task 3: Creating output folders based on project slugs.")
folders_created = 0

# The OUTPUT_ROOT_FOLDER was already created in the Setup cell, 
# but we ensure it exists again and then iterate.
OUTPUT_ROOT_FOLDER.mkdir(exist_ok=True, parents=True)

# Loop through the list of projects and create a directory for each one
for project in PROJECTS_LIST:
    slug = project.get('slug')
    if slug:
        project_folder = OUTPUT_ROOT_FOLDER / slug
        try:
            # Use exist_ok=True so the script doesn't crash if the folder already exists
            project_folder.mkdir(exist_ok=True)
            folders_created += 1
        except Exception as e:
            logger.error(f"Failed to create folder for slug '{slug}': {e}")
    else:
        logger.warning(f"Skipping a project due to missing 'slug' field.")

print(f"\n✅ Created {folders_created} project folders inside: {OUTPUT_ROOT_FOLDER.resolve()}")

2025-10-01 17:03:36,030 - INFO - Starting Task 3: Creating output folders based on project slugs.

✅ Created 1 project folders inside: C:\wamp64\www\media_new\processed_gallery_images


In [174]:
# Task 4.A: Image File Name Extraction Function

def get_all_image_names(project):
    """
    Extracts all image file names from a single project dictionary.
    This includes the 'f-image' and all 'image-nn' variants.

    Args:
        project (dict): A single dictionary representing a project's data.

    Returns:
        list: A unique list of image file names associated with the project.
    """
    image_names = []
    
    # 1. Main feature image
    f_image = project.get('f-image')
    if f_image:
        image_names.append(f_image)
        
    # 2. Iterate through image-nn keys (image-00, image-01, etc.)
    # We check up to 50, which is a safe, high limit for gallery images
    for i in range(50):
        # Check padded key (image-00, image-01, ...)
        key_padded = f"image-{i:02d}"
        img_name = project.get(key_padded)
        if img_name:
            image_names.append(img_name)
        
        # Check non-padded key (image-10, image-11, image-20) for flexibility in data format
        if i >= 10:
            key_non_padded = f"image-{i}"
            img_name_non_padded = project.get(key_non_padded)
            # Ensure we don't add duplicates if the padded and non-padded keys point to the same file
            if img_name_non_padded and img_name_non_padded not in image_names:
                image_names.append(img_name_non_padded)

    # Use a set for unique names then convert back to list to handle any duplicate entries
    unique_image_names = list(set(image_names))
    
    logger.debug(f"Project '{project.get('slug')}': Found {len(unique_image_names)} unique images.")
    
    return unique_image_names

print("\n✅ Image File Name Extraction Function (4.A) Defined.")


✅ Image File Name Extraction Function (4.A) Defined.


In [175]:
# Task 4.B: SEO-Friendly Renaming Function

# Define the constant for the mandatory SEO suffix

SEO_BRAND_SUFFIX = "creative-floors-aurora" 

def create_seo_name(slug, original_image_name, all_project_images):
    """
    Generates a new, SEO-friendly filename based on the project slug,
    image index, and the required brand/location suffix.
    
    Format: [slug]-[index]-[brand-suffix].webp
    Example: hardwood-6-red-oak-101-001-creative-floors-aurora.webp
    
    Args:
        slug (str): The project's unique slug (e.g., 'hardwood-6-red-oak-101').
        original_image_name (str): The original filename (e.g., 'Resized_20190122_100008.jpg').
        all_project_images (list): Unique list of all image names for the project (from 4.A)
                                   used to determine the image's order/index.
        
    Returns:
        str: The new base filename without extension.
    """
    # 1. Determine the image index/role
    try:
        # Find the index of the current image in the project's list of images
        # Adding 1 for a 1-based index (001, 002, ...)
        index = all_project_images.index(original_image_name) + 1
    except ValueError:
        # Fallback if the image name wasn't in the list (shouldn't happen if 4.A is correct)
        index = 999 
        logger.warning(f"Could not find {original_image_name} in project list for slug '{slug}'. Using index 999.")

    # Format the index as a three-digit string
    index_str = f"{index:03d}"
    
    # 2. Construct the core part of the name
    # Format: [slug]-[index]
    base_name = f"{slug}-{index_str}"
    
    # 3. Add the required brand/location suffix (Creative Floors Aurora)
    # Format: [slug]-[index]-[suffix]
    seo_name = f"{base_name}-{SEO_BRAND_SUFFIX}"
    
    logger.debug(f"Renamed {original_image_name} to base name {seo_name}")
    
    return seo_name

print("\n✅ SEO-Friendly Renaming Function (4.B) Defined and ready.")


✅ SEO-Friendly Renaming Function (4.B) Defined and ready.


In [176]:
# 4.C - 1st

import random

# Define the path to the folder containing pictures with sample metadata
PICTURES_META_FOLDER = pathlib.Path('./pictures_meta')

# Check if the folder exists
if not PICTURES_META_FOLDER.is_dir():
    logger.error(f"CRITICAL ERROR: Metadata samples folder not found at {PICTURES_META_FOLDER.resolve()}. Cannot proceed with metadata fallback.")
    METADATA_SAMPLES = []
else:
    # Build a list of all JPEG and PNG files in the samples folder
    METADATA_SAMPLES = list(
        PICTURES_META_FOLDER.glob('*.jpg')
    ) + list(
        PICTURES_META_FOLDER.glob('*.jpeg')
    ) + list(
        PICTURES_META_FOLDER.glob('*.png')
    )

    if not METADATA_SAMPLES:
        logger.warning(f"Metadata samples folder found, but contains no valid images ({IMAGE_EXTENSIONS}). Metadata fallback will not work.")
    else:
        logger.info(f"Successfully collected {len(METADATA_SAMPLES)} metadata samples for fallback.")

print(f"\n✅ Metadata Sample Scan Complete. {len(METADATA_SAMPLES)} samples available.")

2025-10-01 17:03:36,061 - INFO - Successfully collected 7 metadata samples for fallback.

✅ Metadata Sample Scan Complete. 7 samples available.


In [177]:
# Task 4.C - 2nd Metadata and GPS Insertion Function

def get_exif_with_gps(image_path, latitude, longitude, metadata_sample_path):
    """
    Determines the final EXIF data structure for an image. It either loads
    existing metadata, copies it from a sample, and then inserts/updates the GPS location.
    
    Args:
        image_path (pathlib.Path): Path to the original image file.
        latitude (float): Project's latitude.
        longitude (float): Project's longitude.
        metadata_samples (list): List of pathlib.Path objects for sample images.
        
    Returns:
        dict: The complete piexif EXIF dictionary ready to be dumped to bytes.
    """
    # Helper to convert decimal degrees to rational tuple (D, M, S) for EXIF
    def to_dms(value):
        """Converts decimal degrees to a tuple (degrees, minutes, seconds) for EXIF."""
        if value < 0:
            value = -value
        d = int(value)
        m = int((value - d) * 60)
        s = int((value - d - m / 60) * 3600 * 100)
        return [(d, 1), (m, 1), (s, 100)]

    final_exif_dict = None
    
    # 1. Load Existing Metadata
    try:
        img = Image.open(image_path)
        img_exif_data = img.info.get('exif')
        
        if img_exif_data:
            # piexif.load() will raise an exception if data is malformed
            final_exif_dict = piexif.load(img_exif_data)
            logger.debug(f"Loaded existing metadata from {image_path.name}.")
            
            # Clean up old GPS data to ensure project coordinates are the source of truth
            if "GPS" in final_exif_dict:
                del final_exif_dict["GPS"] 
        
    except Exception as e:
        logger.debug(f"Could not load valid EXIF data from {image_path.name}: {e}. Attempting fallback.")

    # Define tags for checking valuable camera metadata
    MAKE_TAG = 271     # ImageIFD.Make - Camera Manufacturer
    MODEL_TAG = 272    # ImageIFD.Model - Camera Model

    # [NEW LOGIC] Check if the image has valuable original metadata (Make or Model)
    HAS_VALUABLE_METADATA = False
    
    # Check if the 0th IFD exists
    if final_exif_dict and final_exif_dict.get("0th"):
        # If either Make or Model tags are present, the metadata is considered valuable and should be preserved.
        if MAKE_TAG in final_exif_dict["0th"] or MODEL_TAG in final_exif_dict["0th"]:
            HAS_VALUABLE_METADATA = True

    # 2. Conditional Metadata Copy/Fallback
    
    # [MODIFIED CONDITIONAL] Copy sample metadata only if the image is missing the 0th IFD
    # OR if it exists but does NOT contain valuable camera metadata (Make or Model).
    if final_exif_dict is None or not final_exif_dict.get("0th") or not HAS_VALUABLE_METADATA: 
        
        # Check if the single sample path was provided
        if metadata_sample_path:
            # Use the pre-selected sample path
            sample_path = metadata_sample_path
            
            try:
                # Load EXIF data from the pre-selected sample
                sample_img = Image.open(sample_path)
                sample_exif_data = sample_img.info.get('exif')
                
                if sample_exif_data:
                    # Copy all metadata from the sample
                    final_exif_dict = piexif.load(sample_exif_data)
                    
                    # Remove sample's GPS data (we'll add the project's in step 3)
                    if "GPS" in final_exif_dict:
                        del final_exif_dict["GPS"] 
                        
                    logger.info(f"Copied metadata from sample: {sample_path.name} for {image_path.name}.")
                else:
                    # Fallback to a bare minimum if sample is also empty
                    final_exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}}
                    
            except Exception as e:
                logger.warning(f"Error loading sample metadata from {sample_path.name}: {e}. Using minimum metadata.")
                final_exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}}
        else:
            # Final resort: use a bare minimum EXIF dictionary
            final_exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}}
            logger.warning(f"No metadata found/copied for {image_path.name}. Using minimum EXIF structure.")

    # 3. GPS Conversion and Insertion (This step always runs)
    
    # Convert coordinates to DMS format
    lat_dms = to_dms(latitude)
    lon_dms = to_dms(longitude)

    # Determine reference (N/S, E/W)
    lat_ref = 'N' if latitude >= 0 else 'S'
    lon_ref = 'E' if longitude >= 0 else 'W'

    # Create the GPS IFD (Image File Directory) dictionary
    gps_ifd = {
        piexif.GPSIFD.GPSLatitudeRef: lat_ref,
        piexif.GPSIFD.GPSLatitude: lat_dms,
        piexif.GPSIFD.GPSLongitudeRef: lon_ref,
        piexif.GPSIFD.GPSLongitude: lon_dms,
        # Standard GPS Version ID, required by specification
        piexif.GPSIFD.GPSVersionID: (2, 2, 0, 0)
    }
    
    # Update the final dictionary with the project's GPS data
    final_exif_dict["GPS"] = gps_ifd
    
    logger.debug(f"Attached GPS data to EXIF dict: Lat={latitude}, Lon={longitude}")
    
    return final_exif_dict

print("\n✅ Metadata and GPS Insertion Function (4.C) Defined.")


✅ Metadata and GPS Insertion Function (4.C) Defined.


In [178]:
# Task 4.D: Optimization, Conversion, and Saving Function

# Note: This function relies on MAX_FILE_SIZE_BYTES, WEBP_QUALITY, 
# and the get_exif_with_gps function (4.C) being defined in previous cells.

def process_single_image(original_image_name, project_slug, lat, lon, seo_name, metadata_sample_path):
    """
    Handles the end-to-end processing of a single image file: 
    1. Gets/Copies EXIF data and adds GPS.
    2. Opens and optimizes the image (resizing if too large).
    3. Converts to WEBP.
    4. Saves the final file to the project's folder.
    
    Args:
        original_image_name (str): The filename from the JSON (e.g., 'sample.jpg').
        project_slug (str): The project's slug for the output folder.
        lat (float): Project latitude.
        lon (float): Project longitude.
        seo_name (str): The new SEO-friendly base name (from 4.B).
        metadata_samples (list): List of sample image paths for EXIF fallback.

    Returns:
        tuple: (bool success, str status_message, str final_file_name)
    """
    
    # 1. Setup Paths
    # We must find the full path to the source file (e.g., './RAW_IMAGE_FOLDER/sample.jpg')
    source_path = RAW_IMAGE_FOLDER / original_image_name
    target_folder = OUTPUT_ROOT_FOLDER / project_slug
    final_file_name = f"{seo_name}.webp"
    final_target_path = target_folder / final_file_name

    # Check if the source file exists and has an acceptable extension
    if not source_path.is_file() or source_path.suffix.lower() not in IMAGE_EXTENSIONS:
        return False, f"Source file not found or extension not supported: {original_image_name}", final_file_name

    original_size = source_path.stat().st_size
    was_resized = False
    
    try:
        # 2. Get EXIF Data (calls 4.C)
        final_exif_dict = get_exif_with_gps(source_path, lat, lon, metadata_sample_path)
        
        # Convert EXIF dictionary back into binary format for saving
        exif_bytes = piexif.dump(final_exif_dict)
        
        # 3. Open Image
        img = Image.open(source_path)
        width, height = img.size
        
        # 4. Size Reduction (Scaling): If file size > 300KB, reduce resolution
        if original_size > MAX_FILE_SIZE_BYTES:
            # We target a max width of 1920px if the image is excessively large, 
            # as this is a common compromise for web display speed.
            MAX_RESIZE_WIDTH = 1920
            
            if width > MAX_RESIZE_WIDTH:
                scale_factor = MAX_RESIZE_WIDTH / width
                new_width = MAX_RESIZE_WIDTH
                new_height = int(height * scale_factor)
                
                # Use a high-quality resampling filter (LANCZOS)
                img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
                was_resized = True
                logger.info(f"Resized: {original_image_name} from {width}x{height} to {new_width}x{new_height}.")
        
        # 5. Handle PNG/Transparency
        if img.mode in ('RGBA', 'P') or source_path.suffix.lower() == '.png':
            # Create a white background and paste the image, removing the alpha channel
            background = Image.new('RGB', img.size, (255, 255, 255))
            # Use the alpha channel as a mask if it exists
            background.paste(img, mask=img.split()[3] if 'A' in img.mode else None)
            img = background
            logger.debug("Converted image to RGB (removed transparency) for WEBP conversion.")
        
        # 6. Convert and Save (WEBP)
        img.save(
            final_target_path, 
            "webp", 
            quality=WEBP_QUALITY, 
            exif=exif_bytes,
            method=6 # Use a slightly slower but often better compression method
        )

        # 7. Final Verification and Status
        final_size = final_target_path.stat().st_size
        
        status_msg = (
            f"Processed successfully. Orig size: {original_size/1024:.1f} KB. "
            f"Final size: {final_size/1024:.1f} KB. "
            f"Action: {'Resized and Converted' if was_resized else 'Converted only'}."
        )
        
        return True, status_msg, final_file_name
        
    except Exception as e:
        # Catch any failure during the image processing, metadata stamping, or saving
        return False, f"Critical processing error for {original_image_name}: {e}", final_file_name

print("\n✅ Optimization, Conversion, and Saving Function (4.D) Defined.")


✅ Optimization, Conversion, and Saving Function (4.D) Defined.


In [179]:
# Task 4.E: Main Processing Loop

logger.info("Starting Task 4: Main Image Processing Pipeline.")

# Initialize global counters
total_images_found = 0
images_processed_success = 0
images_processed_failed = 0

# Use tqdm (or the DummyTqdm fallback) to show progress across all projects
for project in tqdm(PROJECTS_LIST, desc="Overall Projects Progress"):
    
    slug = project.get('slug')
    location = project.get('location', [41.74141971915897, -88.22985634634969]) # Default to Creative Floors HQ
    lat, lon = location[0], location[1]
    
    if not slug or lat is None or lon is None:
        logger.error(f"Skipping project due to missing slug or location data: ID={project.get('id')}.")
        continue
    # Select one metadata sample for the entire project
    selected_metadata_sample = random.choice(METADATA_SAMPLES) if METADATA_SAMPLES else None
    
    # 1. Get the list of original image file names (using 4.A)
    original_image_names = get_all_image_names(project)
    total_images_found += len(original_image_names)
    
    # Check if any images were found
    if not original_image_names:
        logger.info(f"Project '{slug}' has no associated images listed. Skipping.")
        continue

    logger.info(f"--- Processing Project: {slug} ({len(original_image_names)} images) ---")

    # Inner loop: Iterate over all images found for the current project
    for original_image_name in original_image_names:
        
        # 2. Generate SEO-friendly file name (using 4.B)
        seo_name = create_seo_name(slug, original_image_name, original_image_names)

        # 3. Execute the full processing pipeline (uses 4.C and 4.D logic)
        success, status_msg, final_file_name = process_single_image(
            original_image_name=original_image_name, 
            project_slug=slug, 
            lat=lat, 
            lon=lon, 
            seo_name=seo_name, 
            metadata_sample_path=selected_metadata_sample
        )
        
        # 4. Update Counters and Log
        if success:
            images_processed_success += 1
            logger.info(f"SUCCESS: {final_file_name} -> {status_msg}")
        else:
            images_processed_failed += 1
            logger.warning(f"FAILURE: {original_image_name} -> {status_msg}")

# Final summary printout
print("\n" + "="*50)
print("             ✨ PROCESSING SUMMARY ✨")
print("="*50)
print(f"Total Projects:             {TOTAL_PROJECTS}")
print(f"Total Images Expected:      {total_images_found}")
print("-" * 50)
print(f"✅ Images Successfully Processed: {images_processed_success}")
print(f"❌ Images Failed/Skipped:       {images_processed_failed}")
print("\nOutput Folder:")
print(f"   {OUTPUT_ROOT_FOLDER.resolve()}")
print("="*50)

if images_processed_failed > 0:
    print("WARNING: Check the detailed log file (image_processing.log) for failure reasons.")

2025-10-01 17:03:36,101 - INFO - Starting Task 4: Main Image Processing Pipeline.


Overall Projects Progress:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-01 17:03:36,109 - INFO - --- Processing Project: polyester-carpet-dreamweaver-406 (11 images) ---
2025-10-01 17:03:36,111 - INFO - Copied metadata from sample: IMG_0194.jpg for crott (1).jpg.
2025-10-01 17:03:36,677 - INFO - SUCCESS: polyester-carpet-dreamweaver-406-001-creative-floors-aurora.webp -> Processed successfully. Orig size: 281.1 KB. Final size: 329.8 KB. Action: Converted only.
2025-10-01 17:03:36,679 - INFO - Copied metadata from sample: IMG_0194.jpg for crott (2).jpg.
2025-10-01 17:03:37,316 - INFO - SUCCESS: polyester-carpet-dreamweaver-406-002-creative-floors-aurora.webp -> Processed successfully. Orig size: 293.1 KB. Final size: 352.7 KB. Action: Converted only.
2025-10-01 17:03:37,318 - INFO - Copied metadata from sample: IMG_0194.jpg for crott (3).jpg.
2025-10-01 17:03:37,819 - INFO - SUCCESS: polyester-carpet-dreamweaver-406-003-creative-floors-aurora.webp -> Processed successfully. Orig size: 239.3 KB. Final size: 271.2 KB. Action: Converted only.
2025-10-0

In [180]:
# the cell that gets the names ... #new format should be a list with images having the first one as thumbnail in the same folder, the project.php is a mess

In [181]:
import re
from pathlib import Path

# Set the log file path
log_file_path = 'processed_gallery_images/image_processing.log'

# Check if the file exists before attempting to read
if not Path(log_file_path).exists():
    print(f"Error: Log file not found at {log_file_path}")
else:
    failed_projects = set()
    current_project = None
    
    # Regex to capture the project slug: "Processing Project: <slug> (<number> images)"
    # It finds the text between "Processing Project: " and the opening parenthesis of the image count.
    project_start_pattern = re.compile(r"Processing Project: (.*?) \(")
    
    # Read the log file line by line
    with open(log_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            
            # 1. Check for the start of a new project
            if " - INFO - --- Processing Project:" in line:
                match = project_start_pattern.search(line)
                if match:
                    # Capture the project slug (group 1)
                    current_project = match.group(1).strip()
                else:
                    current_project = None
                    
            # 2. Check for a failure within the current project
            # If a project is being tracked AND we see a failure warning
            elif current_project and " - WARNING - FAILURE:" in line:
                # Add the project slug to the set (sets automatically handle uniqueness)
                failed_projects.add(current_project)

    # Print the final list of failed projects
    print("=" * 50)
    print("❌ PROJECTS WITH AT LEAST ONE IMAGE FAILURE ❌")
    print("=" * 50)
    
    if failed_projects:
        for project in sorted(list(failed_projects)):
            print(f"    - {project}")
    else:
        print("🎉 All projects completed without a logged image failure.")

    print("=" * 50)

❌ PROJECTS WITH AT LEAST ONE IMAGE FAILURE ❌
🎉 All projects completed without a logged image failure.
