In [1]:
import os
from datetime import datetime

dir = "/mnt/csdrive/landsat/combined/"

def string_to_date(date_string) -> datetime:
    '''
    Convert a string to a datetime object
    '''
    try:
        return datetime.strptime(date_string, "%Y%m%d")
    except ValueError as e:
        raise ValueError(f"Invalid date string format: {date_string}. Expected format is YYYYMMDD.") from e

def update_date_range(current_earliest, current_latest, new_date):
    '''
    Update the earliest and latest dates
    '''
    if current_earliest is None or new_date < current_earliest:
        current_earliest = new_date
    if current_latest is None or new_date > current_latest:
        current_latest = new_date
    return current_earliest, current_latest


tif_file_count = 0
mask_earliest_satellite_date = mask_latest_satellite_date = None
mask_earliest_process_date = mask_latest_process_date = None
rgb_earliest_satellite_date = rgb_latest_satellite_date = None
rgb_earliest_process_date = rgb_latest_process_date = None
conus_count = alaska_count = hawaii_count = 0
for file in os.listdir(dir):
    if not file.endswith(".TIF"):
        continue

    # Count total number of TIF files
    tif_file_count += 1
    
    # Count number of TIF files with CONUS, ALASKA, and HAWAII
    if "CU" in file:
        conus_count += 1
    elif "AK" in file:
        alaska_count += 1
    elif "HI" in file:
        hawaii_count += 1

    # Get earliest and latest date for mask and RGB
    satellite_date = string_to_date(file.split("_")[3])
    process_date = string_to_date(file.split("_")[4])
    if "B2" in file:
        rgb_earliest_satellite_date, rgb_latest_satellite_date = update_date_range(rgb_earliest_satellite_date, rgb_latest_satellite_date, satellite_date)
        rgb_earliest_process_date, rgb_latest_process_date = update_date_range(rgb_earliest_process_date, rgb_latest_process_date, process_date)
    elif "BC" in file:
        mask_earliest_satellite_date, mask_latest_satellite_date = update_date_range(mask_earliest_satellite_date, mask_latest_satellite_date, satellite_date)
        mask_earliest_process_date, mask_latest_process_date = update_date_range(mask_earliest_process_date, mask_latest_process_date, process_date)
    
print(f"Total number of TIF files: {tif_file_count}")
print(f"Number of CONUS TIF files: {conus_count}")
print(f"Number of ALASKA TIF files: {alaska_count}")
print(f"Number of HAWAII TIF files: {hawaii_count}")


print(f"Mask satellite date range: {mask_earliest_satellite_date:%Y-%m-%d} to {mask_latest_satellite_date:%Y-%m-%d}")
print(f"RGB satellite date range:  {rgb_earliest_satellite_date:%Y-%m-%d} to {rgb_latest_satellite_date:%Y-%m-%d}")

print(f"Mask process date range:   {mask_earliest_process_date:%Y-%m-%d} to {mask_latest_process_date:%Y-%m-%d}")
print(f"RGB process date range:    {rgb_earliest_process_date:%Y-%m-%d} to {rgb_latest_process_date:%Y-%m-%d}")

Total number of TIF files: 10265
Number of CONUS TIF files: 9176
Number of ALASKA TIF files: 913
Number of HAWAII TIF files: 176
Mask satellite date range: 2024-10-19 to 2024-11-08
RGB satellite date range:  2024-10-28 to 2024-11-11
Mask process date range:   2024-10-25 to 2024-11-13
RGB process date range:    2024-11-02 to 2024-11-19
