In [9]:
# --- Imports ---
import os
import glob
import re
from datetime import datetime
import numpy as np
import rasterio
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
from PIL import Image, ImageDraw, ImageFont
import imageio
import warnings
import csv
import platform # To help find system fonts

# --- Add Geopandas and Rasterio features ---
try:
    import geopandas as gpd
    from rasterio.features import rasterize
    from rasterio.transform import from_bounds
    GEOPANDAS_AVAILABLE = True
except ImportError:
    GEOPANDAS_AVAILABLE = False
    print("WARNING: Geopandas not found. Country masking feature will be disabled.")


# --- Helper function to parse date from filename ---
def get_date_from_filename(filename):
    """Extracts YYYY_MM from common GEE export filenames."""
    match = re.search(r'_(\d{4})_(\d{2})\.tif$', os.path.basename(filename))
    if match:
        year, month = map(int, match.groups())
        return datetime(year, month, 1)
    else:
        # Try another common pattern YYYYMM
        match_alt = re.search(r'(\d{4})(\d{2})\.tif$', os.path.basename(filename))
        if match_alt:
            year, month = map(int, match_alt.groups())
            # Basic validation for month
            if 1 <= month <= 12:
                return datetime(year, month, 1)
            else:
                 print(f"Warning: Invalid month '{month}' found in filename: {filename}. Skipping.")
                 return None
        else:
            print(f"Warning: Could not parse date from filename: {filename} (tried YYYY_MM and YYYYMM patterns). Skipping.")
            return None

# --- Helper function to find fonts ---
def find_system_font(font_names):
    """ Helper to find a usable TTF/OTF font file """
    system = platform.system()
    common_paths = [] # List of directories to search
    if system == "Windows":
        common_paths = [os.path.join(os.environ.get("SystemRoot", "C:\\Windows"), "Fonts")]
    elif system == "Linux":
        common_paths = ["/usr/share/fonts/truetype/dejavu",
                        "/usr/share/fonts/truetype/msttcorefonts",
                        "/usr/share/fonts/truetype/liberation",
                        "/usr/share/fonts/truetype",
                        "/usr/local/share/fonts",
                        os.path.expanduser("~/.fonts")]
    elif system == "Darwin": # macOS
        common_paths = ["/System/Library/Fonts",
                        "/Library/Fonts",
                        os.path.expanduser("~/Library/Fonts")]

    # Add current directory as a fallback search path
    common_paths.append(".")

    for name in font_names:
        # 1. Try finding by name directly (esp. Windows)
        try:
            ImageFont.truetype(name, 10) # Test load
            print(f"  Found font '{name}' directly by name.")
            return name # Return the name if Pillow finds it
        except IOError:
            pass # Continue searching paths

        # 2. Search in common directories
        for directory in common_paths:
            if not os.path.isdir(directory): # Skip if dir doesn't exist
                continue

            # Check common extensions (case-insensitive check is useful here)
            base_name_lower = name.lower()
            try:
                for fname in os.listdir(directory):
                    fname_lower = fname.lower()
                    if fname_lower.startswith(base_name_lower) and (fname_lower.endswith(".ttf") or fname_lower.endswith(".otf")):
                        potential_path = os.path.join(directory, fname)
                        if os.path.isfile(potential_path): # Check if it's a file
                             # Try loading to be sure
                             try:
                                 ImageFont.truetype(potential_path, 10)
                                 print(f"  Found font at: {potential_path}")
                                 return potential_path
                             except IOError:
                                 print(f"  Found potential font file, but failed to load: {potential_path}")
                                 continue # Try next match
            except OSError: # Ignore permission errors etc.
                continue
    # If no font found after all checks
    return None

# --- Main Function ---
def create_nightlight_timelapse_and_graph(
    input_folder: str,
    output_path_base: str,
    output_format: str = 'gif',
    cmap_name: str = 'plasma',
    fps: int = 6,
    normalize_animation: bool = False,
    mask_outside_country: bool = True,
    country_boundary_shapefile_path: str = None,
    target_country_name: str = None,
    outside_mask_color: tuple = (0, 0, 0, 0),
    mp4_crf: int = 23,
    text_size_category: str = 'medium',
    font_path: str = None,
    graph_title: str = 'Average Night Light Intensity Over Time',
    watermark_text: str = 'My Custom Watermark',
    watermark_position: tuple = (10, 10), # Note: This position isn't actually used, it's always top-left
    debug_masking: bool = False
    ):
    """
    Creates timelapse, graph, and CSV with multiple night light metrics from GeoTIFFs,
    optionally masking areas outside a specified country boundary.

    Args:
        input_folder (str): Path to the folder containing GeoTIFF files.
        output_path_base (str): Base path and filename for output files (e.g., 'output/country_lights').
                                 Format extensions (.gif, .mp4, .csv, .png) will be added.
        output_format (str, optional): Animation format ('gif' or 'mp4'). Defaults to 'gif'.
        cmap_name (str, optional): Matplotlib colormap name for the animation. Defaults to 'plasma'.
        fps (int, optional): Frames per second for the animation. Defaults to 6.
        normalize_animation (bool, optional): If True, normalize colors based on the 0.5-99.5 percentile
                                             of *lit* pixel values across all frames. If False, use a fixed
                                             range from 0 to the 99.5 percentile. Defaults to False.
        mask_outside_country (bool, optional): If True, mask areas outside the specified country.
                                              Requires geopandas and a valid shapefile/country name. Defaults to True.
        country_boundary_shapefile_path (str, optional): Path to the country boundaries shapefile.
                                                       Required if mask_outside_country is True. Defaults to None.
        target_country_name (str, optional): Name of the country to keep (case-insensitive).
                                            Must match a name in the shapefile's attribute table.
                                            Required if mask_outside_country is True. Defaults to None.
        outside_mask_color (tuple, optional): RGBA color (0-255) for masked-out areas.
                                             (0, 0, 0, 0) means transparent. Defaults to (0, 0, 0, 0).
        mp4_crf (int, optional): Constant Rate Factor for MP4 encoding (lower means better quality, larger file).
                                Only used if output_format is 'mp4'. Defaults to 23.
        text_size_category (str, optional): Relative size for date/watermark text ('small', 'medium', 'large').
                                            Actual size depends on image height. Defaults to 'medium'.
        font_path (str, optional): Path to a specific .ttf or .otf font file to use for text overlay.
                                   If None, attempts to find common system fonts. Defaults to None.
        graph_title (str, optional): Title for the generated graph. Defaults to 'Average Night Light Intensity Over Time'.
        watermark_text (str, optional): Text to display as a watermark (top-left). Set to None or '' to disable.
                                        Defaults to 'My Custom Watermark'.
        watermark_position (tuple, optional): DEPRECATED. Position is fixed to top-left with padding.
        debug_masking (bool, optional): If True, print extra information during the masking process. Defaults to False.
    """

    print(f"Starting analysis for folder: {input_folder}")
    print(f"Output base: {output_path_base}")
    print(f"Format: {output_format}, FPS: {fps}, Colormap: {cmap_name}")
    print(f"Normalize Animation Frames: {normalize_animation}")
    print(f"Mask Outside Country: {mask_outside_country}")
    if mask_outside_country: print(f"Target Country Name: '{target_country_name}'")
    if output_format.lower() == 'mp4': print(f"MP4 CRF Value: {mp4_crf}")
    print(f"Text Size Category: {text_size_category}")
    if debug_masking: print("!!! Mask Debugging Enabled !!!")

    # --- Validate Inputs ---
    if mask_outside_country:
        if not GEOPANDAS_AVAILABLE:
            print("WARN: Geopandas library is missing. Disabling country mask.")
            mask_outside_country=False
        elif not country_boundary_shapefile_path:
            print("WARN: Country shapefile path is missing. Disabling country mask.")
            mask_outside_country=False
        elif not target_country_name:
            print("WARN: Target country name is missing. Disabling country mask.")
            mask_outside_country=False
        elif not os.path.exists(country_boundary_shapefile_path):
            print(f"WARN: Country shapefile not found at: {country_boundary_shapefile_path}. Disabling country mask.")
            mask_outside_country=False


    # --- 1. Find/Sort Files & Get Raster Metadata ---
    search_pattern = os.path.join(input_folder, '*.tif')
    tif_files = glob.glob(search_pattern)
    if not tif_files:
        print(f"Error: No .tif files found in {input_folder}")
        return

    file_date_pairs = []
    for f in tif_files:
        date = get_date_from_filename(f)
        if date:
            file_date_pairs.append((date, f))

    if not file_date_pairs:
        print(f"Error: Could not parse dates from any filenames in {input_folder}")
        return

    file_date_pairs.sort() # Sort by date
    sorted_files = [f[1] for f in file_date_pairs]
    sorted_dates = [f[0] for f in file_date_pairs]
    date_labels = [d.strftime('%Y-%m') for d in sorted_dates]
    print(f"Found and sorted {len(sorted_files)} TIFF files.")

    # --- Global Variables & Load Raster Metadata ---
    raster_transform = None
    raster_shape = None
    raster_crs = None
    country_mask_array = None
    country_mask_available = False
    try:
        with rasterio.open(sorted_files[0]) as src:
            raster_transform = src.transform
            raster_shape = src.shape
            raster_crs = src.crs
            if not raster_crs:
                print(f"WARNING: Raster {os.path.basename(sorted_files[0])} has no CRS defined. Masking might fail or be inaccurate.")
            print(f"Raster Properties: Shape={raster_shape}, CRS={raster_crs}")
    except Exception as e:
        print(f"ERROR reading metadata from {os.path.basename(sorted_files[0])}: {e}")
        return


    # --- Load Font ---
    font = None
    calculated_font_size = 10 # Default fallback size
    ttf_font_found = False
    print("\nAttempting to load font...")
    # Determine base size based on image height
    base_size = max(12, int(raster_shape[0] / 60)) # Adjusted divisor for potentially larger text
    size_multiplier = {'small': 0.7, 'medium': 1.0, 'large': 1.4}.get(text_size_category.lower(), 1.0)
    target_font_size = int(base_size * size_multiplier)

    # 1. Try user path
    if font_path and os.path.exists(font_path):
        try:
            font = ImageFont.truetype(font_path, target_font_size)
            calculated_font_size = target_font_size
            print(f"  Loaded specified font: {font_path} size {calculated_font_size}")
            ttf_font_found = True
        except Exception as e:
            print(f"Warn: Failed to load specified font '{font_path}': {e}")

    # 2. Try system fonts if user font failed or wasn't provided
    if not ttf_font_found:
        # Prioritize common sans-serif fonts
        preferred_fonts = ['DejaVuSans', 'Arial', 'LiberationSans-Regular', 'Helvetica', 'Verdana', 'arial']
        found_path = find_system_font(preferred_fonts)
        if found_path:
            try:
                font = ImageFont.truetype(found_path, target_font_size)
                calculated_font_size = target_font_size
                print(f"  Loaded system font: {found_path} size {calculated_font_size}")
                ttf_font_found = True
            except Exception as e:
                print(f"Warn: Failed to load system font '{found_path}': {e}")

    # 3. Fallback to default PIL font if no TTF found
    if not ttf_font_found:
        try:
            # Try loading default with a size hint (may not work well)
            font = ImageFont.load_default(size=int(target_font_size * 0.8)) # Default font is bitmap, size is tricky
            calculated_font_size = 10 # Reset size estimate for default font
            print("  Warn: Loaded default PIL bitmap font. Text quality and size will be limited.")
        except Exception as e: # Changed from IOError to Exception for broader catch
             print(f"ERROR: Default font loading failed: {e}. Text overlay disabled.")
             font = None


    # --- Prepare Country Mask ---
    if mask_outside_country:
        print(f"\nPreparing country mask for '{target_country_name}'...")
        try:
            world_gdf = gpd.read_file(country_boundary_shapefile_path)
            if debug_masking: print(f" DBG: Loaded SHP, columns: {world_gdf.columns.to_list()}")

            # Search common name columns case-insensitively
            possible_name_cols = ['NAME', 'ADMIN', 'SOVEREIGNT', 'name', 'admin', 'sovereignt', 'NAME_0', 'COUNTRY', 'Name', 'Country']
            target_geom = None
            country_gdf = None
            target_found = False
            for col in possible_name_cols:
                 if col in world_gdf.columns:
                      # Use str.contains for flexibility, but require exact match after filtering? No, exact match better.
                      # Ensure column is string type before lower()
                      if world_gdf[col].dtype == 'object':
                          country_gdf_candidate = world_gdf[world_gdf[col].str.lower() == target_country_name.lower()]
                          if not country_gdf_candidate.empty:
                              country_gdf = country_gdf_candidate
                              target_geom = country_gdf.geometry # Get geometries
                              print(f"  Found '{target_country_name}' in shapefile column '{col}'. ({len(country_gdf)} feature(s))")
                              target_found = True
                              break # Stop searching once found
                      else:
                           if debug_masking: print(f"  DBG: Skipping non-string column '{col}' for name search.")

            if not target_found:
                 # Try searching again with contains as a fallback, warning the user
                 print(f"  WARN: Exact match for '{target_country_name}' not found. Trying partial match (case-insensitive)...")
                 for col in possible_name_cols:
                     if col in world_gdf.columns and world_gdf[col].dtype == 'object':
                          country_gdf_candidate = world_gdf[world_gdf[col].str.contains(target_country_name, case=False, na=False)]
                          if not country_gdf_candidate.empty:
                              country_gdf = country_gdf_candidate
                              target_geom = country_gdf.geometry
                              print(f"  WARN: Found partial match(es) for '{target_country_name}' in column '{col}': {country_gdf[col].tolist()}. Using these features.")
                              target_found = True
                              break

            if not target_found:
                raise ValueError(f"Could not find country '{target_country_name}' in specified columns of the shapefile.")

            # Check and Reproject CRS if necessary
            if debug_masking: print(f" DBG: Shapefile CRS: {country_gdf.crs}")
            if not raster_crs:
                 print(" WARN: Raster CRS is undefined. Assuming shapefile CRS is compatible. Mask may be incorrect if CRSs differ.")
            elif country_gdf.crs != raster_crs:
                print(f"  Reprojecting country geometry from {country_gdf.crs} to {raster_crs}...")
                country_gdf = country_gdf.to_crs(raster_crs)
                target_geom = country_gdf.geometry # Update geometry after reprojection
                if debug_masking: print(f" DBG: Reprojected Shapefile CRS: {country_gdf.crs}")

            # Rasterize the geometry
            print("  Rasterizing country polygon(s)...")
            # Ensure geometries are valid before rasterizing
            geoms_to_rasterize = [(g, 1) for g in target_geom if g is not None and g.is_valid]
            if len(geoms_to_rasterize) < len(target_geom):
                 print(f" WARN: Skipped {len(target_geom) - len(geoms_to_rasterize)} invalid or null geometries.")

            if not geoms_to_rasterize:
                 raise ValueError("No valid geometries found for the target country after filtering.")

            country_mask_array = rasterize(
                shapes=geoms_to_rasterize,
                out_shape=raster_shape,
                transform=raster_transform,
                fill=0,       # Fill value for areas outside the shapes
                default_value=1, # Value for areas inside the shapes
                dtype='uint8'
            )

            country_mask_array = country_mask_array.astype(bool) # Convert to boolean mask (True means INSIDE country)
            country_mask_available = True

            # Debugging and Sanity Checks
            num_masked_pixels = np.sum(country_mask_array)
            total_pixels = country_mask_array.size
            if num_masked_pixels == 0:
                print("  WARNING: The generated country mask is entirely FALSE. No pixels are marked as inside the country.")
                print("           Check CRS alignment, country name spelling, and shapefile contents.")
                if debug_masking: print(f" DBG: Raster Bounds: {rasterio.transform.array_bounds(raster_shape[0], raster_shape[1], raster_transform)}")
                if debug_masking: print(f" DBG: Shapefile Bounds (after potential reproj): {country_gdf.total_bounds}")
                country_mask_available = False # Disable masking if mask is empty
            elif num_masked_pixels == total_pixels:
                print("  WARNING: The generated country mask is entirely TRUE. All pixels are marked as inside the country.")
                print("           This might happen if the country covers the entire raster extent or if rasterization failed.")
                # Continue with mask, but it might not have the intended effect
            else:
                 print(f"  Country mask created ({num_masked_pixels} pixels inside / {total_pixels} total).")

            if debug_masking:
                 unique_vals, counts = np.unique(country_mask_array, return_counts=True)
                 print(f"  DBG: Mask unique values & counts: {dict(zip(unique_vals, counts))}")

        except ImportError:
             print("ERROR: Geopandas is required for masking but not installed properly.")
             mask_outside_country = False # Should have been caught earlier, but double-check
        except Exception as e:
            print(f"ERROR creating country mask: {e}")
            print("  Disabling country masking for this run.")
            mask_outside_country = False
            country_mask_available = False


    # --- 2. Process Files: Calculate Multiple Statistics ---
    monthly_metrics_list = []
    all_lit_pixels_for_range = [] # Store only lit pixels for percentile calculation
    max_raw_value_overall = -np.inf # Track max value for potential fixed scaling
    min_raw_value_overall = np.inf # Track min value (less commonly used for NTL)

    print("\n--- Pass 1: Calculating Metrics & Determining Value Range ---")
    for i, filepath in enumerate(sorted_files):
        date = sorted_dates[i]
        print(f"  Processing {os.path.basename(filepath)} ({date.strftime('%Y-%m')})...", end='\r') # Overwrite line
        metrics = {'date': date}
        try:
            with rasterio.open(filepath) as src:
                # Read data, handle potential NoData values specified in metadata
                nodata_val = src.nodata
                raw_data = src.read(1).astype(np.float32)
                if nodata_val is not None:
                    raw_data[raw_data == nodata_val] = np.nan # Standardize NoData to NaN

                # Suppress warnings for all-NaN slices etc. during max/min calculation
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore", category=RuntimeWarning)
                    current_max_raw = np.nanmax(raw_data)
                    current_min_raw = np.nanmin(raw_data) # Also track min

                if np.isfinite(current_max_raw) and current_max_raw > max_raw_value_overall:
                    max_raw_value_overall = current_max_raw
                if np.isfinite(current_min_raw) and current_min_raw < min_raw_value_overall:
                    min_raw_value_overall = current_min_raw

                # --- Define Masks ---
                # Base masks (independent of country)
                nan_mask = np.isnan(raw_data)
                # 'Lit' usually means > 0, exclude NaNs
                lit_pixel_mask = ~nan_mask & (raw_data > 0)
                # 'Non-negative' includes 0, excludes NaNs
                non_negative_mask = ~nan_mask & (raw_data >= 0)

                # Apply country mask if available and enabled
                if country_mask_available:
                    # Pixels must be both inside the country AND meet the light condition
                    final_lit_mask = country_mask_array & lit_pixel_mask
                    final_nonneg_mask = country_mask_array & non_negative_mask
                    # Also calculate total valid pixels *within the mask* for percentages later?
                    # total_valid_pixels_in_mask = np.sum(country_mask_array & ~nan_mask) # Optional
                else:
                    # No country mask, use the base masks
                    final_lit_mask = lit_pixel_mask
                    final_nonneg_mask = non_negative_mask
                    # total_valid_pixels_in_mask = np.sum(~nan_mask) # Optional

                # --- Calculate Metrics ---
                # 1. Lit Pixels (> 0) within mask
                lit_pixels_in_mask_values = raw_data[final_lit_mask]
                count_lit = lit_pixels_in_mask_values.size
                metrics['count_lit'] = count_lit
                if count_lit > 0:
                    with warnings.catch_warnings(): # Suppress mean/median of empty slice
                        warnings.simplefilter("ignore", category=RuntimeWarning)
                        metrics['sum_lit'] = np.nansum(lit_pixels_in_mask_values) # Sum of Lights (SOL)
                        metrics['mean_lit'] = np.nanmean(lit_pixels_in_mask_values)
                        metrics['median_lit'] = np.nanmedian(lit_pixels_in_mask_values)
                        metrics['max_lit'] = np.nanmax(lit_pixels_in_mask_values) # Max value among lit pixels
                    # Store these values for overall range calculation if needed
                    all_lit_pixels_for_range.append(lit_pixels_in_mask_values)
                else:
                    metrics['sum_lit'] = 0.0
                    metrics['mean_lit'] = np.nan
                    metrics['median_lit'] = np.nan
                    metrics['max_lit'] = np.nan

                # 2. Non-Negative Pixels (>= 0) within mask
                nonneg_pixels_in_mask_values = raw_data[final_nonneg_mask]
                count_nonneg = nonneg_pixels_in_mask_values.size
                metrics['count_non_negative'] = count_nonneg
                if count_nonneg > 0:
                     with warnings.catch_warnings():
                        warnings.simplefilter("ignore", category=RuntimeWarning)
                        metrics['sum_all'] = np.nansum(nonneg_pixels_in_mask_values) # Sum including zeros
                        metrics['mean_all'] = np.nanmean(nonneg_pixels_in_mask_values) # Mean including zeros
                        metrics['median_all'] = np.nanmedian(nonneg_pixels_in_mask_values) # Median including zeros
                else:
                    metrics['sum_all'] = 0.0
                    metrics['mean_all'] = np.nan
                    metrics['median_all'] = np.nan

        except Exception as e:
            print(f"\n  ERROR reading/processing {os.path.basename(filepath)}: {e}")
            # Ensure metrics dictionary has default null values if processing fails
            metrics.setdefault('count_lit', 0); metrics.setdefault('sum_lit', 0.0)
            metrics.setdefault('mean_lit', np.nan); metrics.setdefault('median_lit', np.nan); metrics.setdefault('max_lit', np.nan)
            metrics.setdefault('count_non_negative', 0); metrics.setdefault('sum_all', 0.0)
            metrics.setdefault('mean_all', np.nan); metrics.setdefault('median_all', np.nan)

        monthly_metrics_list.append(metrics)

    print("\n--- Pass 1 Summary ---") # Clear the processing line
    valid_months_count = sum(1 for m in monthly_metrics_list if m.get('count_lit', 0) > 0)
    print(f" Found lit pixels (within mask, if applied) in {valid_months_count} out of {len(sorted_files)} months.")
    if not np.isfinite(max_raw_value_overall): max_raw_value_overall = 1.0 # Handle case where all data might be NaN/invalid
    if not np.isfinite(min_raw_value_overall): min_raw_value_overall = 0.0
    print(f" Overall raw value range observed (across all pixels): {min_raw_value_overall:.4f} to {max_raw_value_overall:.4f}")

    # --- Determine Visualization Range (vmin, vmax) ---
    vis_vmin, vis_vmax = 0.0, 1.0 # Default fallback

    if all_lit_pixels_for_range:
        print(" Determining visualization range using percentiles of 'lit' pixel values (within mask)...")
        try:
            # Filter out any potential non-array entries (though append should only add arrays)
            all_lit_pixels_filt = [arr for arr in all_lit_pixels_for_range if isinstance(arr, np.ndarray) and arr.size > 0]
            if all_lit_pixels_filt:
                 # Concatenate all valid lit pixel values into one large array
                 concatenated_data = np.concatenate(all_lit_pixels_filt)
                 del all_lit_pixels_for_range # Free memory
                 if concatenated_data.size > 0:
                     with warnings.catch_warnings(): # Ignore warnings from percentile on potentially weird data
                         warnings.simplefilter("ignore", category=RuntimeWarning)
                         # Use 0.5 and 99.5 percentile for robustness against outliers
                         p_low = np.nanpercentile(concatenated_data, 0.5)
                         p_high = np.nanpercentile(concatenated_data, 99.5)

                     if np.isfinite(p_low) and np.isfinite(p_high) and p_high > p_low:
                         vis_vmin = max(0.0, p_low) # Ensure vmin is not negative
                         vis_vmax = p_high
                         print(f" Using 0.5% - 99.5% range of lit pixels: {vis_vmin:.4f} - {vis_vmax:.4f}")
                     else:
                         # Fallback if percentiles are weird (e.g., all data is the same)
                         print(" Warn: Percentile calculation resulted in non-finite or invalid range. Falling back.")
                         vis_vmin = max(0.0, min_raw_value_overall) # Use observed min (>=0)
                         vis_vmax = max(vis_vmin + 1e-6, max_raw_value_overall) # Use observed max, ensure > vmin
                         print(f" Using observed min/max range (fallback): {vis_vmin:.4f} - {vis_vmax:.4f}")
                 else:
                      print(" Warn: No valid lit pixel data found after filtering for range calculation. Using default range [0, 1].")
                 if 'concatenated_data' in locals(): del concatenated_data # Free memory
            else:
                 print(" Warn: No arrays with lit pixel data found for range calculation. Using default range [0, 1].")
        except Exception as e:
            print(f" ERROR during visualization range calculation: {e}. Using default range [0, 1].")
            vis_vmin, vis_vmax = 0.0, 1.0
    else:
        print(" Warn: No lit pixels recorded across all files (or mask excluded all). Using default range [0, 1].")
        vis_vmin, vis_vmax = 0.0, 1.0

    # Final check to ensure vmax > vmin
    if vis_vmax <= vis_vmin:
        vis_vmax = vis_vmin + 1.0 # Add a small amount if they are equal or inverted

    # Define the normalization based on user choice
    if normalize_animation:
        print(f"--- Using NORMALIZED animation range: {vis_vmin:.4f} to {vis_vmax:.4f} ---")
        norm = mcolors.Normalize(vmin=vis_vmin, vmax=vis_vmax)
    else:
        # Use 0 as the minimum, and the calculated percentile/max as the maximum
        fixed_vmin = 0.0
        fixed_vmax = vis_vmax # Use the previously determined upper bound
        print(f"--- Using FIXED animation scaling: {fixed_vmin:.4f} to {fixed_vmax:.4f} ---")
        norm = mcolors.Normalize(vmin=fixed_vmin, vmax=fixed_vmax)


    # --- 3. Save Calculated Statistics to CSV ---
    csv_filename = f"{output_path_base}_statistics.csv"
    # Add sum_lit and max_lit to headers
    csv_headers = ['Date', 'Mean_Lit', 'Median_Lit', 'Max_Lit', 'Sum_Lit', 'Count_Lit',
                   'Mean_AllNonNeg', 'Median_AllNonNeg', 'Sum_AllNonNeg', 'Count_AllNonNeg']
    print(f"\nSaving calculated statistics to {csv_filename}...")
    try:
        with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(csv_headers)
            count_written = 0
            for stats in monthly_metrics_list:
                # Helper to format numbers or return empty string for NaN/None
                def fmt(val, precision=6):
                    if val is None or not np.isfinite(val): return ''
                    return f"{val:.{precision}f}"

                row = [
                    stats['date'].strftime('%Y-%m-%d'),
                    fmt(stats.get('mean_lit')),
                    fmt(stats.get('median_lit')),
                    fmt(stats.get('max_lit')),      # Added Max Lit
                    fmt(stats.get('sum_lit')),       # Added Sum Lit
                    stats.get('count_lit', 0),
                    fmt(stats.get('mean_all')),
                    fmt(stats.get('median_all')),
                    fmt(stats.get('sum_all')),       # Added Sum All Non-Negative
                    stats.get('count_non_negative', 0)
                ]
                csvwriter.writerow(row)
                count_written += 1
            print(f"  Successfully wrote {count_written} rows to CSV.")
    except Exception as e:
        print(f"  ERROR writing CSV file: {e}")


    # --- 4. Prepare Frames ---
    frames = []
    cmap = cm.get_cmap(cmap_name)
    print("\n--- Pass 2: Creating animation frames ---")
    if len(outside_mask_color) == 4:
        # Normalize color to 0-1 range for numpy operations
        outside_mask_color_np = np.array(outside_mask_color[:4]) / 255.0
    else:
        print(f"Warn: Invalid outside_mask_color {outside_mask_color}. Must be RGBA tuple. Using transparent.")
        outside_mask_color_np = np.array([0.0, 0.0, 0.0, 0.0])

    for i, filepath in enumerate(sorted_files):
        print(f"  Creating frame {i+1}/{len(sorted_files)}...", end='\r')
        try:
            with rasterio.open(filepath) as src:
                nodata_val = src.nodata
                data = src.read(1).astype(np.float32)
                if nodata_val is not None:
                    data[data == nodata_val] = np.nan

                # Apply colormap: map NaN to a transparent/specific color *before* normalization if possible
                # or handle it after. Let's handle NaNs *after* normalization.
                # Values <= 0 are often treated as "no light", map them to the bottom of cmap or make transparent
                # Let's map NaNs and <=0 to vmin for coloring purposes, then handle transparency separately if needed.
                processed_data = data.copy()
                # Treat non-positive values as the minimum for color mapping (often black/dark)
                non_positive_mask = (~np.isnan(processed_data)) & (processed_data <= 0)
                processed_data[non_positive_mask] = norm.vmin # Map 0 or negative to min color
                # Map NaN explicitly to vmin as well, or could map to a separate color index if cmap supported bad values well
                nan_mask_frame = np.isnan(processed_data)
                processed_data[nan_mask_frame] = norm.vmin

                # Apply normalization and colormap
                # Use np.clip to ensure data stays within norm range after manipulation
                normalized_data = norm(np.clip(processed_data, norm.vmin, norm.vmax))
                rgba_image = cmap(normalized_data) # Shape (H, W, 4)

                # --- Apply Masks (Transparency/Color) ---
                # 1. Make original NaN values transparent (override cmap color)
                rgba_image[nan_mask_frame, 3] = 0.0 # Set alpha to 0 for NaN areas

                # 2. Apply country mask if available
                if mask_outside_country and country_mask_available:
                    # Identify pixels *outside* the country boundary
                    outside_pixels_mask = ~country_mask_array
                    if np.any(outside_pixels_mask): # Check if there's anything to mask
                        # Apply the specified outside color/transparency
                        # If alpha is 0, just make transparent
                        if outside_mask_color_np[3] == 0.0:
                             rgba_image[outside_pixels_mask, 3] = 0.0
                        else:
                             # Apply the RGBA color
                             rgba_image[outside_pixels_mask, :] = outside_mask_color_np

                # Convert to PIL Image for text drawing
                pil_image = Image.fromarray((rgba_image * 255).astype(np.uint8), 'RGBA')
                draw = ImageDraw.Draw(pil_image)

                # Add Text Overlays if font loaded successfully
                if font:
                    # Use calculated font size for padding estimate
                    padding = max(5, int(calculated_font_size * 0.4)) # Smaller padding relative to font size

                    # Watermark (Top-Left)
                    if watermark_text:
                        try:
                            # Use textbbox if available for better positioning (requires Pillow >= 8.0.0)
                            try:
                                wm_bbox = draw.textbbox((padding, padding), watermark_text, font=font)
                                # Draw text at the calculated top-left based on bbox
                                draw.text((padding, padding), watermark_text, fill=(255, 255, 255, 180), font=font)
                            except AttributeError: # Fallback for older Pillow versions
                                draw.text((padding, padding), watermark_text, fill=(255, 255, 255, 180), font=font)
                        except Exception as text_err:
                            if i == 0: print(f"\nWarn: Watermark draw error: {text_err}") # Print only once

                    # Date (Bottom-Right)
                    date_str = date_labels[i]
                    try:
                        # Use textbbox if available for accurate width/height
                        try:
                            date_bbox = draw.textbbox((0, 0), date_str, font=font, anchor='lt') # Use (0,0) as reference
                            text_width = date_bbox[2] - date_bbox[0]
                            text_height = date_bbox[3] - date_bbox[1]
                            # Position based on bottom-right corner of the text box
                            pos_x = pil_image.width - text_width - padding
                            pos_y = pil_image.height - text_height - padding
                            draw.text((pos_x, pos_y), date_str, fill=(255, 255, 255, 200), font=font)
                        except AttributeError: # Fallback for older Pillow versions (less accurate size)
                             # Estimate size based on font size and string length (less accurate)
                             est_width = int(len(date_str) * calculated_font_size * 0.6)
                             est_height = int(calculated_font_size * 1.2)
                             pos_x = pil_image.width - est_width - padding
                             pos_y = pil_image.height - est_height - padding
                             draw.text((pos_x, pos_y), date_str, fill=(255, 255, 255, 200), font=font)

                    except Exception as text_err:
                         if i == 0: print(f"\nWarn: Date draw error: {text_err}") # Print only once

                # Append the final frame (as numpy array)
                frames.append(np.array(pil_image))
        except Exception as e:
            print(f"\nERROR creating frame {i+1} for {os.path.basename(filepath)}: {e}")
            # Optionally append a blank or placeholder frame, or just skip
            # For now, we just skip the problematic frame

    print("\nFinished creating frames.") # Clear the progress line


    # --- 5. Create Animation ---
    if frames:
        output_filename_anim = f"{output_path_base}.{output_format.lower()}"
        print(f"\nSaving animation: {output_filename_anim}...")
        # Ensure duration is at least a minimum value (e.g., 10ms)
        frame_duration_ms = max(10, int(1000 / fps))

        try:
            if output_format.lower() == 'gif':
                # Use Pillow plugin for GIF for potentially better transparency handling
                imageio.mimsave(output_filename_anim, frames, format='GIF', duration=frame_duration_ms, loop=0) # loop=0 means infinite loop
                print(" GIF animation saved (using Pillow).")
            elif output_format.lower() == 'mp4':
                print(" INFO: Saving MP4. Transparency may require specific players (like VLC) or might be lost depending on codec.")
                # Use yuva420p for alpha channel support with libx264
                # Note: Compatibility of yuva420p can vary. yuv420p is more common but lacks alpha.
                # Ensure macro_block_size is compatible with dimensions if needed (often 16)
                # Adjust preset for speed vs compression ('ultrafast', 'medium', 'slow')
                imageio.mimsave(output_filename_anim, frames, format='FFMPEG', fps=fps,
                                output_params=[
                                    '-vcodec', 'libx264',
                                    '-crf', str(mp4_crf),
                                    '-preset', 'medium',
                                    '-pix_fmt', 'yuva420p' # Pixel format supporting alpha
                                    ])
                print(f" MP4 animation saved (CRF={mp4_crf}, Preset=medium, PixFmt=yuva420p).")
            else:
                # General case for other formats if imageio supports them
                imageio.mimsave(output_filename_anim, frames, format=output_format, fps=fps)
                print(f" Animation saved in format '{output_format}'.")
        except Exception as e:
            print(f"\nError saving animation: {e}")
            print("  Troubleshooting tips:")
            print("  - Ensure ffmpeg is installed and accessible in your system PATH (for MP4).")
            print("  - Try a different `output_format` like 'gif'.")
            print("  - If using MP4 with transparency, ensure your player supports the 'yuva420p' pixel format.")
            print("  - Check image dimensions; some codecs have restrictions (e.g., width/height divisible by 2).")
            print(f"  - Frame dimensions: {frames[0].shape if frames else 'N/A'}")

    else:
        print("\nNo frames were generated, skipping animation saving.")


    # --- 6. Create and Save Graph ---
    output_filename_graph = f"{output_path_base}_graph.png"
    print(f"\nGenerating and saving statistics graph: {output_filename_graph}...")

    # --- Plotting Setup ---
    # Choose metrics to plot. Let's plot Mean Lit and Sum Lit for comparison.
    plot_metrics = {
        'mean_lit': {'label': 'Mean Radiance (Lit Pixels)', 'color': 'tab:blue', 'marker': 'o'},
        'sum_lit': {'label': 'Sum of Radiance (Lit Pixels)', 'color': 'tab:red', 'marker': 's'}
    }
    # You could also plot 'median_lit', 'mean_all', 'sum_all', 'count_lit' etc.

    plt.style.use('seaborn-v0_8-darkgrid') # Use a nice style
    fig, ax1 = plt.subplots(figsize=(15, 7))
    ax2 = ax1.twinx() # Create a second y-axis for potentially different scales (like Sum vs Mean)
    lines = []
    labels = []
    axes = {'mean_lit': ax1, 'sum_lit': ax2} # Assign metrics to axes

    plot_successful = False
    for metric_key, props in plot_metrics.items():
        # Extract data points, filtering out NaNs
        plot_dates = [s['date'] for s in monthly_metrics_list if np.isfinite(s.get(metric_key, np.nan))]
        plot_values = [s[metric_key] for s in monthly_metrics_list if np.isfinite(s.get(metric_key, np.nan))]

        print(f"  Graph points for '{metric_key}': {len(plot_dates)}")

        if len(plot_dates) > 1: # Need at least 2 points to draw a line
            current_ax = axes.get(metric_key, ax1) # Default to ax1 if not specified
            line, = current_ax.plot(plot_dates, plot_values,
                                    marker=props.get('marker', '.'),
                                    linestyle='-',
                                    markersize=5,
                                    color=props.get('color', None),
                                    label=props.get('label', metric_key))
            lines.append(line)
            labels.append(props.get('label', metric_key))
            current_ax.set_ylabel(props.get('label', metric_key), color=props.get('color', 'black'))
            current_ax.tick_params(axis='y', labelcolor=props.get('color', 'black'))
            # Ensure y-axis starts at 0 for radiance values
            current_ax.set_ylim(bottom=0)
            plot_successful = True
        elif len(plot_dates) == 1:
             # Plot single points if only one data point exists
             current_ax = axes.get(metric_key, ax1)
             current_ax.plot(plot_dates, plot_values,
                             marker=props.get('marker', 'o'),
                             markersize=6, linestyle='', # No line for single point
                             color=props.get('color', None),
                             label=props.get('label', metric_key) + " (single point)")
             # Need to handle labels manually for single points if using combined legend
             lines.append(current_ax.get_lines()[-1]) # Get the plotted object
             labels.append(props.get('label', metric_key) + " (single point)")
             current_ax.set_ylabel(props.get('label', metric_key), color=props.get('color', 'black'))
             current_ax.tick_params(axis='y', labelcolor=props.get('color', 'black'))
             current_ax.set_ylim(bottom=0)
             plot_successful = True
        else:
            print(f"  No valid data found for '{metric_key}', skipping plot for this metric.")

    if plot_successful:
        # --- Formatting ---
        # X-axis formatting
        ax1.xaxis.set_major_locator(mdates.YearLocator(base=1)) # Tick every year
        ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
        ax1.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1, 7])) # Minor ticks Jan/Jul
        ax1.xaxis.set_minor_formatter(mdates.DateFormatter('%b')) # Show month abbr for minor
        plt.setp(ax1.xaxis.get_minorticklabels(), rotation=45, ha='right') # Rotate minor ticks
        plt.setp(ax1.xaxis.get_majorticklabels(), rotation=0, ha='center')

        ax1.set_xlabel("Time")
        ax1.set_title(graph_title)
        # Combine legends from both axes
        fig.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=len(labels), frameon=False) # Place legend below plot

        # Adjust layout
        fig.tight_layout(rect=[0, 0.05, 1, 1]) # Adjust rect to make space for legend below x-axis if needed
        # plt.subplots_adjust(bottom=0.2) # Alternative way to add space at bottom

        # Save the figure
        try:
            plt.savefig(output_filename_graph, dpi=300, bbox_inches='tight')
            print("  Graph saved successfully.")
        except Exception as e:
            print(f"ERROR saving graph: {e}")

        plt.close(fig) # Close the plot figure to free memory
    else:
        print(f"  No valid data found for any specified metrics ({', '.join(plot_metrics.keys())}). Skipping graph generation.")


    print("\nProcessing finished.")



In [17]:
# --- Example Usage (replace with your actual paths and settings) ---
if __name__ == '__main__':
    print("Running example usage...")

    # --- Configuration ---
    INPUT_DIR = r'C:/Users/rodri/My Drive/GEE_Nightlights_Japan'  # REQUIRED: Set this path
    OUTPUT_BASE = r'C:/Users/rodri/Desktop/Nightlights/All_Nightlights'     # REQUIRED: Set base output path/name
    SHAPEFILE_PATH = "C:/Users/rodri/Desktop/NIghtlights/Natural_Earth_Level_0/ne_10m_admin_0_countries.shp" # REQUIRED if masking: Set this path
    COUNTRY_NAME = "Japan"          # REQUIRED if masking: Set this name (e.g., "Nigeria")

    # Optional settings
    MASK_COUNTRY = True          # Set to False to disable masking
    OUTPUT_FMT = 'mp4'           # 'gif' or 'mp4'
    ANIM_FPS = 5                 # Frames per second
    NORMALIZE = False            # Use fixed 0-max scaling (False) or percentile scaling (True)
    CMAP = 'plasma'             # Colormap (e.g., 'plasma', 'viridis', 'magma', 'inferno', 'hot')
    MP4_QUALITY = 22             # Lower = better quality, larger file (for MP4)
    TEXT_SIZE = 'medium'         # 'small', 'medium', 'large'
    FONT_OVERRIDE = None         # Set to a path like "C:/Windows/Fonts/arial.ttf" or "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" to force a font
    WATERMARK = "Anaplian.com" # Set to None or "" to disable
    GRAPH_PLOT_TITLE = f"Night Light Trends for {COUNTRY_NAME}" if MASK_COUNTRY else "Night Light Trends"
    DEBUG_MODE = False           # Set to True for extra debug prints, especially for masking

    # --- Create output directory if it doesn't exist ---
    output_dir = os.path.dirname(OUTPUT_BASE)
    if output_dir and not os.path.exists(output_dir):
        print(f"Creating output directory: {output_dir}")
        os.makedirs(output_dir)

    # --- Basic Input Validation ---
    if not os.path.isdir(INPUT_DIR):
        print(f"ERROR: Input directory not found: {INPUT_DIR}")
    elif MASK_COUNTRY and (not SHAPEFILE_PATH or not os.path.exists(SHAPEFILE_PATH)):
         print(f"ERROR: Masking enabled, but shapefile not found or path not set: {SHAPEFILE_PATH}")
    elif MASK_COUNTRY and not COUNTRY_NAME:
         print(f"ERROR: Masking enabled, but country name not set.")
    else:
        # --- Run the main function ---
        create_nightlight_timelapse_and_graph(
            input_folder=INPUT_DIR,
            output_path_base=OUTPUT_BASE,
            output_format=OUTPUT_FMT,
            cmap_name=CMAP,
            fps=ANIM_FPS,
            normalize_animation=NORMALIZE,
            mask_outside_country=MASK_COUNTRY,
            country_boundary_shapefile_path=SHAPEFILE_PATH,
            target_country_name=COUNTRY_NAME,
            outside_mask_color=(0, 0, 0, 128), # Example: Semi-transparent dark grey mask
            mp4_crf=MP4_QUALITY,
            text_size_category=TEXT_SIZE,
            font_path=FONT_OVERRIDE,
            graph_title=GRAPH_PLOT_TITLE,
            watermark_text=WATERMARK,
            debug_masking=DEBUG_MODE
        )

        print("\nExample script finished.")


Running example usage...
Starting analysis for folder: C:/Users/rodri/My Drive/GEE_Nightlights_Japan
Output base: C:/Users/rodri/Desktop/Nightlights/All_Nightlights
Format: mp4, FPS: 5, Colormap: plasma
Normalize Animation Frames: False
Mask Outside Country: True
Target Country Name: 'Japan'
MP4 CRF Value: 22
Text Size Category: medium
Found and sorted 155 TIFF files.
Raster Properties: Shape=(4785, 6914), CRS=EPSG:4326

Attempting to load font...
  Found font 'DejaVuSans' directly by name.
  Loaded system font: DejaVuSans size 79

Preparing country mask for 'Japan'...
  Found 'Japan' in shapefile column 'NAME'. (1 feature(s))
  Rasterizing country polygon(s)...
  Country mask created (1892109 pixels inside / 33083490 total).

--- Pass 1: Calculating Metrics & Determining Value Range ---
  Processing Japan_VIIRS_2025_02.tif (2025-02)...
--- Pass 1 Summary ---
 Found lit pixels (within mask, if applied) in 155 out of 155 months.
 Overall raw value range observed (across all pixels): -1.

  cmap = cm.get_cmap(cmap_name)


  Creating frame 155/155...




Finished creating frames.

Saving animation: C:/Users/rodri/Desktop/Nightlights/All_Nightlights.mp4...
 INFO: Saving MP4. Transparency may require specific players (like VLC) or might be lost depending on codec.
 MP4 animation saved (CRF=22, Preset=medium, PixFmt=yuva420p).

Generating and saving statistics graph: C:/Users/rodri/Desktop/Nightlights/All_Nightlights_graph.png...
  Graph points for 'mean_lit': 155
  Graph points for 'sum_lit': 155
  Graph saved successfully.

Processing finished.

Example script finished.
