In [None]:
import os
import json
import random
import shutil
from PIL import Image, ImageFile
from datetime import datetime
from typing import List, Optional

import rasterio
from rasterio.transform import Affine
import geopandas
from shapely.geometry import box

# Allow loading of truncated images, can be common with some TIFFs
ImageFile.LOAD_TRUNCATED_IMAGES = True
# FIX: Increase Pillow's maximum image pixels limit
Image.MAX_IMAGE_PIXELS = None

def create_coco_dataset_no_arcpy(
    tif_folder_path: str,
    polygon_shapefile_path: str,
    output_directory_path: str,
    include_tifs_list: Optional[List[str]] = None, # New parameter
    scale_factor: float = 1.0,
    coco_category_name: str = "tree",
    coco_supercategory_name: str = "natural_object",
    coco_license_name: str = "User Defined",
    coco_license_url: str = "",
    dataset_description: str = "Custom COCO Dataset",
    contributor_name: str = "GIS to COCO Script (No ArcPy)",
    resampling_method = Image.Resampling.LANCZOS # Ensure Pillow 9.1.0+
):
    """
    Converts TIF images and polygon annotations (from Shapefile)
    into COCO dataset format without using ArcPy.
    Optionally filters TIFs to include only those in include_tifs_list.
    Deletes original TIFs from tif_folder_path if no intersecting polygons are found.
    All included images are placed in a single image folder, and a single annotation JSON is created.
    Optionally scales images and annotations.
    Adds a default "unlabelled" polygon to all included images.
    """
    print(f"Starting COCO dataset creation (No ArcPy) at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"  TIF Folder: {tif_folder_path}")
    print(f"  Polygon Shapefile: {polygon_shapefile_path}")
    print(f"  Output Directory: {output_directory_path}")
    if include_tifs_list:
        print(f"  Including only specified TIFs: {len(include_tifs_list)} files listed.")
    else:
        print("  Processing all TIFs found in the folder.")

    if abs(scale_factor - 1.0) > 1e-9 : # Robust float comparison
        print(f"  Scaling Factor: {scale_factor}")
    else:
        print(f"  Scaling Factor: {scale_factor} (No significant scaling will be applied)")

    images_output_dir = os.path.join(output_directory_path, "images")
    annotations_output_dir = os.path.join(output_directory_path, "annotations")

    for dir_path in [output_directory_path, images_output_dir, annotations_output_dir]:
        os.makedirs(dir_path, exist_ok=True)

    main_category_id = 1
    unlabelled_category_id = 2
    unlabelled_category_name = "unlabelled"
    unlabelled_supercategory_name = "background"

    coco_data = {
        "info": {
            "description": dataset_description, "version": "1.0", "year": datetime.now().year,
            "contributor": contributor_name, "date_created": datetime.now().isoformat()
        },
        "licenses": [{"id": 1, "name": coco_license_name, "url": coco_license_url}],
        "categories": [
            {"id": main_category_id, "name": coco_category_name, "supercategory": coco_supercategory_name},
            {"id": unlabelled_category_id, "name": unlabelled_category_name, "supercategory": unlabelled_supercategory_name}
        ],
        "images": [],
        "annotations": []
    }

    try:
        print(f"  Loading polygons from: {polygon_shapefile_path}")
        all_polygons_gdf = geopandas.read_file(polygon_shapefile_path)
        print(f"    Loaded {len(all_polygons_gdf)} polygons.")
        if all_polygons_gdf.crs is None:
            print("    WARNING: Polygon shapefile has no CRS defined. Assuming it matches raster CRS.")
    except Exception as e:
        print(f"    ERROR: Could not read polygon shapefile: {e}")
        return None

    def world_to_pixel_affine(geo_x, geo_y, affine_transform: Affine):
        col, row = ~affine_transform * (geo_x, geo_y)
        return col, row

    # Determine which TIF files to consider
    if include_tifs_list:
        # User provided a specific list of TIF basenames
        all_tif_files_to_check = [
            f for f in include_tifs_list if os.path.exists(os.path.join(tif_folder_path, f))
        ]
        if len(all_tif_files_to_check) != len(include_tifs_list):
            print("  WARNING: Some TIFs in include_tifs_list were not found in tif_folder_path.")
        if not all_tif_files_to_check:
            print(f"ERROR: No TIF files from the provided list found in {tif_folder_path}.")
            return None
        print(f"  Will attempt to process {len(all_tif_files_to_check)} TIFs from the provided list.")
    else:
        # Scan the folder for all TIF files
        all_tif_files_to_check = [f for f in os.listdir(tif_folder_path) if f.lower().endswith((".tif", ".tiff"))]
        if not all_tif_files_to_check:
            print(f"ERROR: No TIF files found in {tif_folder_path}.")
            return None
        print(f"  Found {len(all_tif_files_to_check)} TIF files in the folder to pre-filter.")


    valid_tif_files = []
    # temp_image_details = {} # Not strictly needed for this version

    print("\nPre-filtering TIF files and deleting those without polygons...")
    for tif_filename_check in all_tif_files_to_check:
        tif_full_path_check = os.path.join(tif_folder_path, tif_filename_check)
        try:
            with rasterio.open(tif_full_path_check) as src_raster_check:
                raster_transform_check = src_raster_check.transform
                raster_bounds_check = src_raster_check.bounds
                raster_crs_check = src_raster_check.crs

            polygons_gdf_for_raster_check = all_polygons_gdf
            if all_polygons_gdf.crs and raster_crs_check and not all_polygons_gdf.crs.equals(raster_crs_check):
                try:
                    polygons_gdf_for_raster_check = all_polygons_gdf.to_crs(raster_crs_check)
                except Exception as reproj_err_check:
                    print(f"    WARNING: Could not reproject polygons for pre-filtering {tif_filename_check}: {reproj_err_check}. Skipping this file.")
                    continue
            
            clip_box_geom_check = box(raster_bounds_check.left, raster_bounds_check.bottom, raster_bounds_check.right, raster_bounds_check.top)
            try:
                clip_mask_gdf_check = geopandas.GeoDataFrame({'geometry': [clip_box_geom_check]}, crs=raster_crs_check)
                clipped_polygons_gdf_check = geopandas.clip(polygons_gdf_for_raster_check, clip_mask_gdf_check)
            except Exception as clip_e:
                print(f"    WARNING: Error during geopandas.clip for pre-filtering {tif_filename_check}: {clip_e}. Skipping this file.")
                continue
            
            if clipped_polygons_gdf_check.empty:
                print(f"  INFO: {tif_filename_check} has ZERO existing polygons from the shapefile.")
                try:
                    os.remove(tif_full_path_check)
                    print(f"    DELETED original TIF: {tif_full_path_check}")
                except OSError as e_remove:
                    print(f"    ERROR: Could not delete TIF {tif_full_path_check}: {e_remove}")
            else:
                print(f"  INFO: {tif_filename_check} has {len(clipped_polygons_gdf_check)} polygons. Will be included.")
                valid_tif_files.append(tif_filename_check)
                # temp_image_details[tif_filename_check] = {
                # 'width': src_raster_check.width, # Not opening again, so store if needed
                # 'height': src_raster_check.height,
                # 'transform': raster_transform_check,
                # 'bounds': raster_bounds_check,
                # 'crs': raster_crs_check
                # }

        except rasterio.errors.RasterioIOError as rio_e:
            print(f"    ERROR: Rasterio could not open or read {tif_filename_check} during pre-filtering: {rio_e}. Skipping.")
        except Exception as e_check:
            print(f"    ERROR pre-filtering file {tif_filename_check}: {e_check.__class__.__name__} - {e_check}. Skipping.")

    if not valid_tif_files:
        print("ERROR: No TIF files found with overlapping polygons (or none left after filtering). Cannot create dataset.")
        return None
    
    print(f"\nTotal TIF files considered for processing: {len(all_tif_files_to_check)}")
    print(f"TIF files to be included in COCO dataset: {len(valid_tif_files)}")

    random.shuffle(valid_tif_files) # Shuffle for potentially random processing order

    global_image_id_counter, global_annotation_id_counter = 0, 0

    print(f"\nProcessing {len(valid_tif_files)} images for the dataset...")

    for tif_filename in valid_tif_files:
        global_image_id_counter += 1 
        current_image_id = global_image_id_counter
        print(f"  Processing TIF: {tif_filename} (Image ID: {current_image_id})")

        tif_full_path = os.path.join(tif_folder_path, tif_filename)
        original_img_width, original_img_height = 0, 0

        try:
            with rasterio.open(tif_full_path) as src_raster:
                original_img_width = src_raster.width
                original_img_height = src_raster.height
                print(f"DEBUG: For {tif_filename} - Rasterio original dimensions: {original_img_width}x{original_img_height}")
                raster_transform = src_raster.transform
                raster_bounds = src_raster.bounds
                raster_crs = src_raster.crs

            polygons_gdf_for_raster = all_polygons_gdf
            if all_polygons_gdf.crs and raster_crs and not all_polygons_gdf.crs.equals(raster_crs):
                print(f"    WARNING: CRS mismatch. Polygon CRS: {all_polygons_gdf.crs}, Raster CRS: {raster_crs}. Attempting to reproject polygons.")
                try:
                    polygons_gdf_for_raster = all_polygons_gdf.to_crs(raster_crs)
                    print("      Polygons reprojected successfully.")
                except Exception as reproj_e:
                    print(f"      ERROR: Failed to reproject polygons for {tif_filename}: {reproj_e}. Skipping specific annotations for this TIF, but image and 'unlabelled' will be added.")
                    clipped_polygons_gdf = geopandas.GeoDataFrame(columns=['geometry'], crs=raster_crs) # Empty GDF
            elif all_polygons_gdf.crs is None and raster_crs is not None:
                print(f"    WARNING: Polygons have no CRS. Assuming they match raster CRS: {raster_crs}.")
            
            scaled_img_width = int(original_img_width * scale_factor)
            scaled_img_height = int(original_img_height * scale_factor)
            print(f"DEBUG: For {tif_filename} - Calculated scaled dimensions: {scaled_img_width}x{scaled_img_height} (using scale_factor: {scale_factor})")

            coco_data["images"].append({
                "id": current_image_id, "file_name": tif_filename,
                "width": scaled_img_width, "height": scaled_img_height,
                "license": 1, "date_captured": ""
            })

            clip_box_geom = box(raster_bounds.left, raster_bounds.bottom, raster_bounds.right, raster_bounds.top)
            try:
                clip_mask_gdf = geopandas.GeoDataFrame({'geometry': [clip_box_geom]}, crs=raster_crs)
                # Ensure polygons_gdf_for_raster is valid before clipping
                if not polygons_gdf_for_raster.empty and 'geometry' in polygons_gdf_for_raster.columns:
                     clipped_polygons_gdf = geopandas.clip(polygons_gdf_for_raster, clip_mask_gdf)
                else:
                    print(f"    INFO: No polygons to clip for {tif_filename} (possibly due to earlier reprojection failure).")
                    clipped_polygons_gdf = geopandas.GeoDataFrame(columns=['geometry'], crs=raster_crs) # Empty GDF

            except Exception as clip_e:
                print(f"    ERROR during geopandas.clip for {tif_filename}: {clip_e}. No specific annotations will be added.")
                clipped_polygons_gdf = geopandas.GeoDataFrame(columns=['geometry'], crs=raster_crs) # Empty GDF

            # Process existing polygons if any (category_id = main_category_id)
            if not clipped_polygons_gdf.empty:
                print(f"     Found {len(clipped_polygons_gdf)} specific polygons after clipping for {tif_filename}.")
                for _, poly_row in clipped_polygons_gdf.iterrows():
                    polygon_geom_shapely = poly_row.geometry
                    
                    if polygon_geom_shapely.is_empty or not polygon_geom_shapely.geom_type in ['Polygon', 'MultiPolygon']:
                        print(f"DEBUG: Skipping empty or invalid geometry type ({polygon_geom_shapely.geom_type}) in {tif_filename}.")
                        continue
                    
                    # map_unit_area is critical for area calculation.
                    # Ensure it's a valid number.
                    map_unit_area = 0.0
                    if hasattr(polygon_geom_shapely, 'area'):
                        map_unit_area = polygon_geom_shapely.area
                        if not isinstance(map_unit_area, (int, float)) or map_unit_area != map_unit_area: # Check for NaN
                            print(f"     WARNING: map_unit_area is NaN or invalid for a polygon in {tif_filename}. Setting to 0.")
                            map_unit_area = 0.0
                    else:
                        print(f"     WARNING: polygon_geom_shapely has no 'area' attribute for a polygon in {tif_filename}. Setting map_unit_area to 0.")


                    global_annotation_id_counter += 1
                    current_annotation_id = global_annotation_id_counter # Store current ID for clarity

                    coco_segments_for_ann = []
                    all_scaled_pixel_coords_x, all_scaled_pixel_coords_y = [], []
                    geoms_to_process = []
                    if polygon_geom_shapely.geom_type == 'Polygon':
                        geoms_to_process.append(polygon_geom_shapely)
                    elif polygon_geom_shapely.geom_type == 'MultiPolygon':
                        geoms_to_process.extend(list(polygon_geom_shapely.geoms))

                    for single_poly_shapely in geoms_to_process:
                        if single_poly_shapely.is_empty: # Additional check for parts of MultiPolygon
                            continue
                        exterior_coords_raw = list(single_poly_shapely.exterior.coords)
                        current_part_scaled_pixels_ext = []
                        for coord_tuple in exterior_coords_raw:
                            geo_x, geo_y = coord_tuple[:2]
                            original_pixel_x, original_pixel_y = world_to_pixel_affine(geo_x, geo_y, raster_transform)
                            scaled_pixel_x = original_pixel_x * scale_factor
                            scaled_pixel_y = original_pixel_y * scale_factor
                            clamped_scaled_x = max(0.0, min(scaled_pixel_x, float(scaled_img_width)))
                            clamped_scaled_y = max(0.0, min(scaled_pixel_y, float(scaled_img_height)))
                            current_part_scaled_pixels_ext.extend([clamped_scaled_x, clamped_scaled_y])
                            all_scaled_pixel_coords_x.append(clamped_scaled_x)
                            all_scaled_pixel_coords_y.append(clamped_scaled_y)
                        
                        # A valid COCO polygon segment needs at least 3 points (6 coordinates)
                        if len(current_part_scaled_pixels_ext) >= 6:
                            coco_segments_for_ann.append(current_part_scaled_pixels_ext)
                        else:
                            print(f"DEBUG: Exterior segment for ann ID {current_annotation_id} in {tif_filename} has < 3 points after scaling/clamping. Skipping this segment part.")

                        for interior_ring in single_poly_shapely.interiors:
                            if interior_ring.is_empty: continue
                            interior_coords_raw = list(interior_ring.coords)
                            current_part_scaled_pixels_int = []
                            for coord_tuple in interior_coords_raw:
                                geo_x, geo_y = coord_tuple[:2]
                                original_pixel_x, original_pixel_y = world_to_pixel_affine(geo_x, geo_y, raster_transform)
                                scaled_pixel_x = original_pixel_x * scale_factor
                                scaled_pixel_y = original_pixel_y * scale_factor
                                clamped_scaled_x = max(0.0, min(scaled_pixel_x, float(scaled_img_width)))
                                clamped_scaled_y = max(0.0, min(scaled_pixel_y, float(scaled_img_height)))
                                current_part_scaled_pixels_int.extend([clamped_scaled_x, clamped_scaled_y])
                                # Bounding box should include interior ring points as well if they extend beyond exterior
                                all_scaled_pixel_coords_x.append(clamped_scaled_x)
                                all_scaled_pixel_coords_y.append(clamped_scaled_y)
                            
                            if len(current_part_scaled_pixels_int) >= 6:
                                coco_segments_for_ann.append(current_part_scaled_pixels_int)
                            else:
                                print(f"DEBUG: Interior segment for ann ID {current_annotation_id} in {tif_filename} has < 3 points. Skipping this segment part.")
                    
                    # If after processing all parts, there are no valid segments or coordinates for bbox:
                    if not coco_segments_for_ann or not all_scaled_pixel_coords_x or not all_scaled_pixel_coords_y:
                        print(f"     INFO: Ann ID {current_annotation_id} for {tif_filename} resulted in no valid segmentation data or bbox coords after processing. Skipping this annotation.")
                        # Decrement counter as this annotation ID won't be used
                        global_annotation_id_counter -= 1 # Revert increment if annotation is skipped
                        continue

                    # --- Bounding Box Calculation ---
                    min_x_s = min(all_scaled_pixel_coords_x)
                    min_y_s = min(all_scaled_pixel_coords_y)
                    max_x_s = max(all_scaled_pixel_coords_x)
                    max_y_s = max(all_scaled_pixel_coords_y)
                    
                    # Ensure width and height are non-negative. COCO bboxes require w,h >= 0.
                    bbox_w = max(0.0, max_x_s - min_x_s)
                    bbox_h = max(0.0, max_y_s - min_y_s)
                    bbox_coco_scaled = [min_x_s, min_y_s, bbox_w, bbox_h]

                    # --- Area Calculation ---
                    pixel_area_on_map_sq = 0.0
                    # Ensure raster_transform components are valid numbers
                    if (isinstance(raster_transform.a, (int, float)) and raster_transform.a == raster_transform.a and # not NaN
                        isinstance(raster_transform.e, (int, float)) and raster_transform.e == raster_transform.e):   # not NaN
                        pixel_area_on_map_sq = abs(raster_transform.a * raster_transform.e)
                        if pixel_area_on_map_sq < 1e-12: # Treat extremely small values as zero
                            print(f"     WARNING: pixel_area_on_map_sq is near zero ({pixel_area_on_map_sq}) for {tif_filename}. Raster transform a={raster_transform.a}, e={raster_transform.e}.")
                            pixel_area_on_map_sq = 0.0
                    else:
                        print(f"     WARNING: Invalid raster_transform elements (a={raster_transform.a}, e={raster_transform.e}) for {tif_filename}. Area calculation will be 0.")
                    
                    original_pixel_area = 0.0
                    if pixel_area_on_map_sq > 1e-9: # Avoid division by zero
                        original_pixel_area = map_unit_area / pixel_area_on_map_sq
                    elif map_unit_area > 1e-9 : # If map_unit_area is not zero but pixel_area_on_map_sq is
                            print(f"     WARNING: map_unit_area ({map_unit_area}) > 0 but pixel_area_on_map_sq is 0 for ann ID {current_annotation_id} in {tif_filename}. Setting pixel area to 0.")
                    # If both are 0, original_pixel_area remains 0, which is fine.

                    scaled_pixel_area = original_pixel_area * (scale_factor ** 2)
                    scaled_pixel_area = max(0.0, scaled_pixel_area) # Ensure non-negative

                    # Final check for NaN/Inf in critical fields
                    if any(coord != coord for coord in bbox_coco_scaled) or \
                        scaled_pixel_area != scaled_pixel_area: # Checks for NaN
                        print(f"   ERROR: NaN detected in bbox or area for ann ID {current_annotation_id} in {tif_filename} before appending.")
                        print(f"     bbox: {bbox_coco_scaled}, area: {scaled_pixel_area}")
                        # Decrement counter and skip this problematic annotation
                        global_annotation_id_counter -= 1 # Revert increment
                        continue

                    print(f"DEBUG: Adding annotation ID {current_annotation_id} for Image ID {current_image_id}:")
                    print(f"DEBUG:   Bbox: {bbox_coco_scaled}")
                    print(f"DEBUG:   Area: {scaled_pixel_area}")
                    print(f"DEBUG:   Number of segmentation parts: {len(coco_segments_for_ann)}")

                    coco_data["annotations"].append({
                        "id": current_annotation_id, # Use the stored current_annotation_id
                        "image_id": current_image_id,
                        "category_id": main_category_id,
                        "segmentation": coco_segments_for_ann,
                        "area": scaled_pixel_area, # MUST be a valid float/int
                        "bbox": bbox_coco_scaled,   # MUST be a list of 4 valid floats/ints
                        "iscrowd": 0
                    })
            else: 
                print(f"    No specific polygons from shapefile for {tif_filename} (this is expected if pre-filtering removed them or reprojection failed).")

            # Add the default "unlabelled" polygon for this image
            global_annotation_id_counter += 1
            unlabelled_segmentation = [[
                0.0, 0.0, 
                float(scaled_img_width), 0.0,
                float(scaled_img_width), float(scaled_img_height),
                0.0, float(scaled_img_height)
            ]]
            unlabelled_bbox = [0.0, 0.0, float(scaled_img_width), float(scaled_img_height)]
            unlabelled_area = float(scaled_img_width * scaled_img_height)

            coco_data["annotations"].append({
                "id": global_annotation_id_counter, "image_id": current_image_id,
                "category_id": unlabelled_category_id, 
                "segmentation": unlabelled_segmentation,
                "area": unlabelled_area,
                "bbox": unlabelled_bbox,
                "iscrowd": 0 
            })
            print(f"    Added default 'unlabelled' annotation for {tif_filename}.")

            pil_img_original = Image.open(tif_full_path)
            print(f"DEBUG: For {tif_filename} - Pillow opened original dimensions: {pil_img_original.width}x{pil_img_original.height}")
            destination_image_path = os.path.join(images_output_dir, tif_filename)

            if abs(scale_factor - 1.0) < 1e-9:
                print(f"DEBUG: For {tif_filename} - Copying image, no scaling. Path: {destination_image_path}")
                shutil.copy2(tif_full_path, destination_image_path)
            else:
                print(f"DEBUG: For {tif_filename} - Entering scaling block. Target: {scaled_img_width}x{scaled_img_height}. Resampling: {resampling_method}")
                scaled_img_pil = pil_img_original.resize((scaled_img_width, scaled_img_height), resampling_method)
                print(f"DEBUG: For {tif_filename} - Pillow resized dimensions (before save): {scaled_img_pil.width}x{scaled_img_pil.height}")
                try:
                    print(f"DEBUG: For {tif_filename} - Attempting to save SCALED image WITHOUT tiffinfo to {destination_image_path}")
                    scaled_img_pil.save(destination_image_path)
                    print(f"DEBUG: For {tif_filename} - Successfully saved SCALED image WITHOUT tiffinfo to {destination_image_path}")
                except Exception as save_e:
                    print(f"    ERROR: Could not save {tif_filename} even without TIFF tags after scaling: {save_e}.")
            pil_img_original.close()

        except rasterio.errors.RasterioIOError as rio_e:
            print(f"    ERROR: Rasterio could not open or read {tif_filename} in main processing: {rio_e}")
        except FileNotFoundError:
            print(f"    ERROR: File not found for Pillow processing (was it moved/deleted after pre-filter?): {tif_full_path}")
        except Exception as e:
            print(f"    ERROR processing file {tif_filename}: {e.__class__.__name__} - {e}")
            import traceback
            traceback.print_exc()

    output_json_filename = os.path.join(annotations_output_dir, "instances.json")
    with open(output_json_filename, 'w') as f: json.dump(coco_data, f, indent=4)
    print(f"  Saved all annotations to {output_json_filename}")

    parent_dir_of_output = os.path.dirname(output_directory_path.rstrip(os.sep))
    if not parent_dir_of_output:
        parent_dir_of_output = "."
    archive_base_name = os.path.join(parent_dir_of_output, os.path.basename(output_directory_path.rstrip(os.sep)) + "_coco_dataset")

    print(f"\nCreating ZIP archive from contents of: {output_directory_path}")
    print(f"Archive will be saved as: {archive_base_name}.zip")

    try:
        zip_output_path = shutil.make_archive(
            base_name=archive_base_name,
            format='zip',
            root_dir=output_directory_path
        )
        print(f"Successfully created ZIP archive: {zip_output_path}")
        return zip_output_path
    except Exception as e:
        print(f"ERROR creating ZIP archive: {e}")
        import traceback
        traceback.print_exc()
        return None
    finally:
        print(f"COCO dataset creation finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

if __name__ == '__main__':

    # --- USER CONFIGURATION ---
    input_tifs_folder = r"C:\Users\kevin\dev\tornado-tree-destruction-ef\tornado-tree-destruction-ef\data\2021_08_19_lac_murray\TIFFs"  # MODIFY THIS
    input_polygon_shp = r"C:\Users\kevin\dev\tornado-tree-destruction-ef\tornado-tree-destruction-ef\data\2021_08_19_lac_murray\Lac_Murray ArcGIS\Polygons_Merge.shp"
    output_coco_dir = r"C:\Users\kevin\dev\tornado-tree-destruction-ef\tornado-tree-destruction-ef\data\2021_08_19_lac_murray\coco"  # MODIFY THIS


    # Optional: Provide a list of specific TIF filenames to include.
    # If None, all TIFs in input_tifs_folder will be considered.
    # Example: specific_tifs_to_include = ["image1.tif", "image3.tif"]
    specific_tifs_to_include: Optional[List[str]] = ["2019_Lac Murray_597700_5262700.tif", "2019_Lac Murray_599700_5261200.tif", "2019_Lac Murray_598700_5262200.tif", "2019_Lac Murray_599200_5262200.tif", "2019_Lac Murray_599700_5262200.tif", "2019_Lac Murray_606200_5264200.tif"]
    # specific_tifs_to_include = ["dummy_image_1.tif"] # Example for dummy data test

    image_annotation_scale_factor = 1.0  # Example: 0.5 to halve dimensions, 1.0 for no change.

    category = "fallen"
    super_category = "super"
    description = "Custom COCO dataset with specific objects and unlabelled background."
    contributor = "Your Name/Organization"
    license_nm = "Your License Name (e.g., CC BY 4.0)"
    license_link = "Link to your license (e.g., https://creativecommons.org/licenses/by/4.0/)"
    # --- END USER CONFIGURATION ---

    current_resampling_method = Image.Resampling.LANCZOS

    try:
        generated_zip_file = create_coco_dataset_no_arcpy(
            tif_folder_path=input_tifs_folder,
            polygon_shapefile_path=input_polygon_shp,
            output_directory_path=output_coco_dir,
            include_tifs_list=specific_tifs_to_include, # Pass the list here
            scale_factor=image_annotation_scale_factor,
            coco_category_name=category,
            coco_supercategory_name=super_category,
            dataset_description=description,
            contributor_name=contributor,
            coco_license_name=license_nm,
            coco_license_url=license_link,
            resampling_method=current_resampling_method
        )

        if generated_zip_file:
            print(f"\n--- SCRIPT FINISHED SUCCESSFULLY ---")
            print(f"COCO dataset ZIP archive created at: {generated_zip_file}")
            print(f"Intermediate COCO structure (before zipping) is at: {output_coco_dir}")
        else:
            print(f"\n--- SCRIPT FINISHED WITH ERRORS OR NO VALID IMAGES ---")
            print(f"COCO dataset generation may have failed or no images met the criteria. Please review the logs.")
            print(f"Intermediate COCO structure (if any) is at: {output_coco_dir}")

    except ImportError as ie:
        print(f"An import error occurred: {ie}")
        print("Please ensure you have installed all required libraries:")
        print("pip install rasterio geopandas shapely Pillow pyproj")
    except Exception as e:
        print(f"An unexpected error occurred in the main execution block: {e}")
        import traceback
        traceback.print_exc()
