In [1]:
!pip install rasterio
!pip install rasterstats
!pip install exactextract

from monthly_ntl_clean_int_v3_base import downloadh5_dates, mask_to_zero, download_file, geturl, extract_offnadir_and_quality, apply_quality_mask, mosaic, zonalStats, interpolate_ntl_1d, apply_ephemeral_mask, resample_to_black_marble

from pathlib import Path
import os
import datetime
import pandas as pd
import time
import re
import shutil
from collections import defaultdict
import gzip
from exactextract import exact_extract
import io
import tempfile
import requests
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from pathlib import Path
from bs4 import BeautifulSoup
from getpass import getpass
import urllib3
import numpy as np
from rasterio.enums import Resampling
from rasterio.warp import reproject, calculate_default_transform
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
##set-up token and local directories
token = ''
#token from NASA Get it here: https://ladsweb.modaps.eosdis.nasa.gov/tools-and-services/data-download-scripts/#tokens

loc_dir = Path(os.getcwd())

#do not change these lines
h5_folder = "01 H5" #name of location folder within local directory where h5 will be downloaded
geotiff_folder = "02 Geotiff" #name of location folder within local directory where geotiff will be stored
masked_folder = "03 Masked" #name of location folder within local directory where mosaic geotiff will be stored
average_folder = "04 Average" #name of location folder within local directory where averaged mosaic geotiff will be stored
mosaic_folder = "05 Mosaic" #name of location folder within local directory where mosaic geotiff will be stored
csv_folder = "06 CSV" #name of location folder within local directory where csv will be stored
shapefile_folder = "Shapefile" #name of location folder within local directory where shapefile is stored

#add your shapefile here
zones = "ph.shp"
#this corresponds to your adm bound code
zones_field = "ADM1_PCODE"
#this corresponds to the tiles needed to be downloaded based on a specific area of interest
h5tiles_csv = "PH_TileList3.csv"

loc_dir = Path(os.getcwd())

# Define paths
h5_dir = loc_dir / h5_folder
geotiff_dir = loc_dir / geotiff_folder
masked_dir = loc_dir / masked_folder
mosaic_dir = loc_dir / mosaic_folder
shapefile_dir = loc_dir / shapefile_folder / zones
csv_dir = loc_dir / csv_folder

# Read the CSV of tiles
tiles = pd.read_csv(loc_dir / h5tiles_csv)
h5tiles_list = tiles['TileID'].tolist()

# Define the year for which data will be downloaded and processed
year = 2023  # Example: User input for the year

# angles = ["OffNadir_Composite_Snow_Free", "NearNadir_Composite_Snow_Free", "AllAngle_Composite_Snow_Free"]
angles = ["AllAngle_Composite_Snow_Free"]

In [None]:
#optional. for auto-upload to your google drive.

import os
import zipfile
from google.colab import drive
from shutil import move

# --- MOUNT GOOGLE DRIVE ---
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Replace with your actual EOG credentials; Register at: https://eogauth-new.mines.edu/realms/eog/protocol/openid-connect/auth
os.environ["EOG_USERNAME"] = ""
os.environ["EOG_PASSWORD"] = ""

def get_access_token(username, password):
    try:
        token_url = "https://eogauth-new.mines.edu/realms/eog/protocol/openid-connect/token"
        params = {
            "client_id": "",
            "client_secret": "",
            "username": username,
            "password": password,
            "grant_type": "password"
        }
        response = requests.post(token_url, data=params)
        response.raise_for_status()
        return response.json().get("access_token")
    except Exception as e:
        print(f"❌ Failed to get access token: {e}")
        return None

def download_and_clip_lit_mask(year: int, output_dir: str, aoi_shapefile: str):
    """
    Downloads and clips annual lit_mask.dat.tif.gz from NOAA for a selected year.
    Saves the clipped GeoTIFF to output_dir.
    """

    # Determine correct version
    version = "v21" if year <= 2021 else "v22"
    base_url = f"https://eogdata.mines.edu/nighttime_light/annual/{version}"
    product_keyword = "lit_mask.dat.tif.gz"

    # Auth
    username = os.getenv("EOG_USERNAME") or input("EOG Username: ")
    password = os.getenv("EOG_PASSWORD") or getpass("EOG Password: ")
    token = get_access_token(username, password)
    if not token:
        return

    headers = {"Authorization": f"Bearer {token}"}
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Read AOI
    aoi = gpd.read_file(aoi_shapefile).to_crs("EPSG:4326")
    geometry = [aoi.unary_union]

    # Construct folder URL
    folder_url = f"{base_url}/{year}/"
    print(f"\n🌐 Searching: {folder_url}")

    session = requests.Session()
    session.headers.update(headers)

    try:
        resp = session.get(folder_url, verify=False)
        if resp.status_code != 200:
            print(f"❌ Cannot access {folder_url}")
            return

        soup = BeautifulSoup(resp.text, "html.parser")
        file_links = [a['href'] for a in soup.find_all('a', href=True) if product_keyword in a['href']]
        if not file_links:
            print(f"❌ No lit_mask file found for {year}")
            return

        filename = file_links[0]
        clipped_outfile = output_path / f"{year}_lit_mask_clipped.tif"
        if clipped_outfile.exists():
            print(f"⏩ Skipping existing: {clipped_outfile.name}")
            return

        full_url = folder_url + filename
        print(f"⬇️ Downloading and clipping: {filename}")

        with session.get(full_url, stream=True, verify=False) as r:
            r.raise_for_status()

            with tempfile.NamedTemporaryFile(delete=False, suffix=".tif") as temp_tif:
                with gzip.open(io.BytesIO(r.content)) as gz:
                    shutil.copyfileobj(gz, temp_tif)

        # Clip the temporary .tif to AOI
        with rasterio.open(temp_tif.name) as src:
            out_image, out_transform = mask(src, geometry, crop=True)
            out_meta = src.meta.copy()
            out_meta.update({
                "driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform
            })

            with rasterio.open(clipped_outfile, "w", **out_meta) as dest:
                dest.write(out_image)

        os.remove(temp_tif.name)
        print(f"✅ Saved: {clipped_outfile.name}")

    except Exception as e:
        print(f"❌ Error for year {year}: {e}")

# function to resampling mask to match ref raster

def resample_mask_to_match(mask_path, reference_path, output_path):
    """Resample mask raster to match the dimensions and projection of a reference raster."""

    # Check if required files exist
    if not os.path.exists(mask_path):
        raise FileNotFoundError(f"⚠️ Mask file not found: {mask_path}")
    if not os.path.exists(reference_path):
        raise FileNotFoundError(f"⚠️ Reference file not found: {reference_path}")

    with rasterio.open(reference_path) as ref:
        ref_meta = ref.meta.copy()
        ref_transform = ref.transform
        ref_crs = ref.crs
        ref_height = ref.height
        ref_width = ref.width

        with rasterio.open(mask_path) as mask:
            mask_data = mask.read(1)  # Read single-band data
            mask_transform = mask.transform
            mask_crs = mask.crs

            # Prepare resampled array
            mask_resampled = np.empty((ref_height, ref_width), dtype=mask_data.dtype)

            # Perform the resampling
            reproject(
                source=mask_data,
                destination=mask_resampled,
                src_transform=mask_transform,
                src_crs=mask_crs,
                dst_transform=ref_transform,
                dst_crs=ref_crs,
                resampling=Resampling.nearest
            )

            # Update metadata
            ref_meta.update({
                "height": ref_height,
                "width": ref_width,
                "transform": ref_transform,
                "count": 1,  # Single band
                "dtype": mask_data.dtype
            })

            # Save resampled mask
            with rasterio.open(output_path, "w", **ref_meta) as dest:
                dest.write(mask_resampled, 1)  # Write to the first band

    print(f"✅ Resampling complete. Output saved at: {output_path}")
    return output_path

download_and_clip_lit_mask(
    year=year,
    output_dir="ntl_clipped_outputs",
    aoi_shapefile=shapefile_dir
)

ephemeral_mask_path = f"ntl_clipped_outputs/{year}_lit_mask_clipped.tif"

  geometry = [aoi.unary_union]



🌐 Searching: https://eogdata.mines.edu/nighttime_light/annual/v22/2023/
⬇️ Downloading and clipping: VNL_v22_npp_2023_global_vcmslcfg_c202402131000.lit_mask.dat.tif.gz
✅ Saved: 2023_lit_mask_clipped.tif


In [3]:
### main processor

# Step 1: Download H5 files for the specified year
try:
    downloadh5_dates(year, loc_dir, h5_folder, token, h5tiles_list)
except Exception as e:
    print(f"Unable to download. Error: {e}")

# Step 2: Convert H5 files to GeoTIFF and filter quality
paths = h5_dir.glob('**/*.h5')  # Locate all H5 files in the specified directory

for path in paths:
    a = path.name.split('.')
    # dest = Path(str(path.parents[1]).replace(h5_folder, geotiff_folder)) / a[2]
    for angle in angles:
        dest = Path(str(path.parents[1]).replace(h5_folder, geotiff_folder)) / a[2] / angle

        if not os.path.exists(dest):
            os.makedirs(dest)

        print(f'Extracting {angle} and Quality layers for:', path)
        start = time.perf_counter()
        composite_output, quality_output = extract_offnadir_and_quality(path, dest, angle)
        print("Extraction finished in %.4f seconds." % (time.perf_counter() - start))

        if composite_output and quality_output:
            start = time.perf_counter()
            quality_filtered_output = apply_quality_mask(Path(composite_output), Path(quality_output), Path(dest))
            # filtered_output = apply_quality_mask(Path(composite_output), Path(quality_output), Path(dest))
            print("Filtering finished in %.4f seconds." % (time.perf_counter() - start))

            # ✅ Ensure `quality_filtered_output` exists before calling resampling
            if not os.path.exists(quality_filtered_output):
                raise RuntimeError("❌ Failed to generate quality_filtered_output. Stopping execution.")

            # # Step 3: Resampling
            resampled_mask_path = dest / "resampled_mask.tif"
            print("📌 Running Resampling Step...")
            resample_mask_to_match(ephemeral_mask_path, quality_filtered_output, resampled_mask_path)

            # Step 4: Apply ephemeral mask
            ephemeral_output_path = dest / f"{Path(quality_filtered_output).stem}_EphemeralMasked.tif"
            print("📌 Applying Ephemeral Mask...")
            ephemeral_filtered_output = apply_ephemeral_mask(quality_filtered_output, resampled_mask_path, ephemeral_output_path)

            # Step 5: Resample to Black Marble
            resampled_output_path = dest / f"{Path(ephemeral_filtered_output).stem}_Resampled.tif"
            print("📌 Resampling to Black Marble...")
            final_output = resample_to_black_marble(ephemeral_filtered_output, quality_filtered_output, resampled_output_path)

        else:
            print(f"⚠️ Extraction failed for {path}, skipping filtering!")
            print(f"  - Composite Output: {composite_output}")
            print(f"  - Quality Output: {quality_output}")

paths = geotiff_dir.glob('**/*/*/*Resampled*.tif')

# Step 6: Apply linear imputation
# Organize by (tile_id, angle)
paths_by_id = defaultdict(lambda: defaultdict(list))
for path in paths:
    tile_id = path.parent.parent.name
    angle = path.parent.name
    parts = path.stem.split('.')
    if len(parts) > 4:
        unique_id = parts[1]  # e.g., A2021035
        paths_by_id[(tile_id, angle)][unique_id] = path

from datetime import datetime

# Iterate per (tile_id, angle)
for (tile_id, angle), tif_dict in paths_by_id.items():
    # Map from datetime → path
    date_map = {}
    for k, p in tif_dict.items():
        try:
            dt = datetime.strptime(k[1:], "%Y%j")  # strip 'A'
            # dt = datetime.datetime.strptime(k[1:], "%Y%j")
            date_map[dt] = p
        except Exception as e:
            print(f"❌ Failed to parse date from {k}: {e}")

    years = sorted(set(d.year for d in date_map))
    for year in years:
        if year == min(years):
            continue  # skip first year — no Dec from previous year

        # Build rolling window: Dec of prev year + Jan–Dec of current year
        window_months = [datetime(year - 1, 12, 1)] + [datetime(year, m, 1) for m in range(1, 13)]
        tif_paths = [date_map.get(dt) for dt in window_months if dt in date_map]

        if len(tif_paths) < 13:
            print(f"⚠️ Skipping {tile_id}-{angle} {year}: incomplete 13-month window.")
            continue

        print(f"⏳ Interpolating pixel time series for {tile_id} - {angle} ({year})")
        output_dir = tif_paths[0].parent  # output stays in angle folder
        start = time.perf_counter()

        interpolated_paths = interpolate_ntl_1d(
            tif_paths=tif_paths,
            output_dir=output_dir,
            target_year=year,
            masked_value=6553.5
        )

        target_dates = [d for d in [datetime.strptime(p.stem.split('.')[1][1:], "%Y%j") for p in tif_paths]
                        if d.year == year]

        # Sanity check
        if len(target_dates) != len(interpolated_paths):
            print(f"⚠️ Mismatch: {len(target_dates)} dates vs {len(interpolated_paths)} interpolated files")

        # Rename properly
        for date, interp_path in zip(target_dates, interpolated_paths):
            date_str = f"A{date.year}{date.timetuple().tm_yday:03d}"
            orig_match = [p for p in tif_paths if date_str in p.name]

            if orig_match:
                expected_name = orig_match[0].name.replace('Resampled', 'Interpolated')
                renamed_path = interp_path.parent / expected_name
                interp_path.rename(renamed_path)
                print(f"📝 Renamed {interp_path.name} → {renamed_path.name}")
            else:
                print(f"⚠️ No match for {date_str}")


# Step 7: Apply Zonal Statistics
paths_by_id = defaultdict(lambda: defaultdict(list))

for tif in geotiff_dir.glob('**/*/*/*Interpolated*.tif'):

    filename = tif.stem  # no .tif
    parts = filename.split('_')

    print(f"🔍 Scanning file: {filename}")  # Add this

    if len(parts) >= 5 and 'VNP46A3' in parts[4]:
        main_block = parts[4]  # e.g., 'VNP46A3.A2022274.h29v06...'
        try:
            unique_id = main_block.split('.')[1]  # extracts 'A2022274'
            angle = Path(tif).parent.name  # keep this as is
            if angle in angles:
                paths_by_id[unique_id][angle].append(tif)
        except IndexError:
            print(f"⚠️ Could not extract unique_id from {filename}")

for unique_id, angles_dict in paths_by_id.items():
    for angle, tif_list in angles_dict.items():
        print(f"Processing {unique_id} for {angle}: {len(tif_list)} files")

        mosaic_output = mosaic_dir / unique_id / angle / f"Mosaic_{angle}_{unique_id}.tif"

        output_csv = csv_dir / unique_id / angle / f"{mosaic_output.stem}_{year}_v3.csv"

        if output_csv.exists():
            print(f"Skipping {unique_id} - {angle}, CSV already exists: {output_csv}")
            continue

        print(f"Processing {unique_id} for {angle}: {len(tif_list)} files")

        os.makedirs(mosaic_output.parent, exist_ok=True)

        if tif_list:
            print(f"Mosaicking {len(tif_list)} files into {mosaic_output}")
            start = time.perf_counter()
            mosaic(tif_list, str(mosaic_output))
            print("Finished in %.4f seconds." % (time.perf_counter() - start))

            os.makedirs(csv_dir, exist_ok=True)
            output_csv = csv_dir / unique_id / angle / f"{mosaic_output.stem}_{year}_v3.csv"
            os.makedirs(output_csv.parent, exist_ok=True)


            if output_csv.exists():
                print(f"Skipping Zonal Statistics: {output_csv} already exists.")
            else:
                print(f'Performing Zonal Statistics: {output_csv}')
                start = time.perf_counter()
                zonalStats(mosaic_output, shapefile_dir, output_csv, zones_field)
                print("Finished in %.4f seconds." % (time.perf_counter() - start))

            print(f'Performing Zonal Statistics: {output_csv}')
            start = time.perf_counter()
            print("Finished in %.4f seconds." % (time.perf_counter() - start))
        else:
            print(f"No files found for {unique_id} - {angle}, skipping.")

# uncomment if needed
# # --- CONFIGURATION ---
# GDRIVE_FOLDER_ID = "id_regency_bmi_2017"  # Change to your actual Google Drive folder ID

# GDRIVE_UPLOAD_FOLDER = f"/content/drive/MyDrive/{GDRIVE_FOLDER_ID}"

# # Ensure the upload folder exists in Google Drive
# if not os.path.exists(GDRIVE_UPLOAD_FOLDER):
#     os.makedirs(GDRIVE_UPLOAD_FOLDER)

# # Define the specific folder you want to upload
# TARGET_FOLDER = "06 CSV"  # Change this if necessary

# # Check if the target folder exists in Colab
# folder_path = os.path.join("/content", TARGET_FOLDER)
# if not os.path.exists(folder_path):
#     print(f"❌ Folder '{TARGET_FOLDER}' not found in Colab environment.")
# else:
#     zip_filename = f"{TARGET_FOLDER}.zip"

#     # Compress the target folder into a ZIP file
#     with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
#         for root, dirs, files in os.walk(folder_path):
#             for file in files:
#                 if file.endswith((".gsheet", ".gdoc", ".gslides")):
#                     print(f"Skipping Google file: {file}")
#                     continue

#                 file_path = os.path.join(root, file)
#                 arcname = os.path.relpath(file_path, folder_path)
#                 zipf.write(file_path, arcname)

#     print(f"📦 Folder '{TARGET_FOLDER}' compressed as '{zip_filename}'.")

#     # Move ZIP file to Google Drive folder
#     drive_path = os.path.join(GDRIVE_UPLOAD_FOLDER, zip_filename)
#     move(zip_filename, drive_path)
#     print(f"✅ Uploaded '{zip_filename}' to Google Drive at: {GDRIVE_UPLOAD_FOLDER}")

# print("🎉 Upload completed! Check your Google Drive folder.")

🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023091.h29v08.001.2023128224032.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023305.h29v08.001.2024005193108.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023152.h29v08.001.2023199005155.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023213.h29v08.001.2023290162442.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023060.h29v08.001.2023098211823.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023001.h29v08.001.2023039142538.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023182.h29v08.001.2023240145712.h5_Filtered_EphemeralMasked_Interpolated
🔍 Scanning file: AllAngle_Composite_Snow_Free_VNP46A3.A2023335.h29v08.001.2024016035159.h5_Filter

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  shell = type(geom.exterior)(zip(*func(*zip(*geom.exterior.coords))))
  return lib.area(geometry, **kwargs)
  type(ring)(zip(*func(*zip(*ring.coords))))


Zonal statistics saved to /content/06 CSV/A2023091/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023091_2023_v3.csv
Finished in 84.1213 seconds.
Performing Zonal Statistics: /content/06 CSV/A2023091/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023091_2023_v3.csv
Finished in 0.0000 seconds.
Processing A2023305 for AllAngle_Composite_Snow_Free: 1 files
Processing A2023305 for AllAngle_Composite_Snow_Free: 1 files
Mosaicking 1 files into /content/05 Mosaic/A2023305/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023305.tif
Finished in 0.0581 seconds.
Performing Zonal Statistics: /content/06 CSV/A2023305/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023305_2023_v3.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  shell = type(geom.exterior)(zip(*func(*zip(*geom.exterior.coords))))
  return lib.area(geometry, **kwargs)
  type(ring)(zip(*func(*zip(*ring.coords))))


Zonal statistics saved to /content/06 CSV/A2023305/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023305_2023_v3.csv
Finished in 86.2911 seconds.
Performing Zonal Statistics: /content/06 CSV/A2023305/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023305_2023_v3.csv
Finished in 0.0000 seconds.
Processing A2023152 for AllAngle_Composite_Snow_Free: 1 files
Processing A2023152 for AllAngle_Composite_Snow_Free: 1 files
Mosaicking 1 files into /content/05 Mosaic/A2023152/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023152.tif
Finished in 0.0547 seconds.
Performing Zonal Statistics: /content/06 CSV/A2023152/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023152_2023_v3.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  shell = type(geom.exterior)(zip(*func(*zip(*geom.exterior.coords))))
  return lib.area(geometry, **kwargs)
  type(ring)(zip(*func(*zip(*ring.coords))))


Zonal statistics saved to /content/06 CSV/A2023152/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023152_2023_v3.csv
Finished in 85.2430 seconds.
Performing Zonal Statistics: /content/06 CSV/A2023152/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023152_2023_v3.csv
Finished in 0.0000 seconds.
Processing A2023213 for AllAngle_Composite_Snow_Free: 1 files
Processing A2023213 for AllAngle_Composite_Snow_Free: 1 files
Mosaicking 1 files into /content/05 Mosaic/A2023213/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023213.tif
Finished in 0.0525 seconds.
Performing Zonal Statistics: /content/06 CSV/A2023213/AllAngle_Composite_Snow_Free/Mosaic_AllAngle_Composite_Snow_Free_A2023213_2023_v3.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  shell = type(geom.exterior)(zip(*func(*zip(*geom.exterior.coords))))
  return lib.area(geometry, **kwargs)
  type(ring)(zip(*func(*zip(*ring.coords))))


KeyboardInterrupt: 

In [None]:
# shutil.rmtree(h5_dir)
# shutil.rmtree(geotiff_dir)
# # shutil.rmtree(masked_dir)
# # shutil.rmtree(average_dir)
shutil.rmtree(mosaic_dir)
shutil.rmtree(csv_dir)