In [2]:
"""
lidar_pipeline.py

A unified pipeline for LiDAR metric differencing, overview generation, and VRT construction.
This module can be imported into Jupyter or run as a script without needing CLI arguments; simply modify the
configuration at the bottom of this file and call `run()`.
"""
import os
import glob
import subprocess
import time
import traceback

Done: 0 processed, 0 skipped.
Done: 0 processed, 0 skipped.
Done: 0 processed, 0 skipped.
Skipping 449000_3802000: no match in second folder.
Skipping 449000_3803000: no match in second folder.
Skipping 449000_3804000: no match in second folder.
Skipping 449000_3805000: no match in second folder.
Skipping 449000_3806000: no match in second folder.
Skipping 449000_3807000: no match in second folder.
Skipping 449000_3808000: no match in second folder.
Skipping 450000_3798000: no match in second folder.
Skipping 450000_3799000: no match in second folder.
Skipping 450000_3800000: no match in second folder.
Skipping 450000_3801000: no match in second folder.
Skipping 450000_3802000: no match in second folder.
Skipping 450000_3803000: no match in second folder.
Skipping 450000_3804000: no match in second folder.
Skipping 450000_3805000: no match in second folder.
Skipping 450000_3806000: no match in second folder.
Processed 450000_3807000 (dsm).
Processed 450000_3808000 (dsm).
Skipping 45100

In [None]:
def index_files(folder, year_tag, suffix_key):
    """
    Index raster files in a folder by stripping year_tag and suffix_key from filenames.
    Args:
        folder (str): Path to the directory containing raster files.
        year_tag (str): Year identifier in filenames (e.g., '2017').
        suffix_key (str): Metric suffix key in filenames (e.g., 'chm').
    Returns:
        dict: Mapping from tile base name to full file path.
    """
    index = {}
    suffix = f"_{year_tag}_{suffix_key}.tif"
    for file in os.listdir(folder):
        if file.endswith(suffix) and "hillshade" not in file.lower():
            base = file.replace(suffix, "")
            index[base] = os.path.join(folder, file)
    return index


def parse_coords_from_filename(tile_name):
    """
    Extract bottom-left coordinates from a tile name formatted as 'X_Y'.
    Args:
        tile_name (str): Tile identifier string.
    Returns:
        tuple: (x_min, y_min) as integers.
    """
    parts = tile_name.split("_")
    if len(parts) < 2:
        raise ValueError(f"Invalid tile name format: {tile_name}")
    return int(parts[0]), int(parts[1])


def compute_extent_from_tile(tile_name, tile_size, buffer):
    """
    Compute bounding box extent for a tile with buffer.
    Args:
        tile_name (str): Tile identifier string.
        tile_size (tuple): (width, height) in same units as coordinates.
        buffer (float): Buffer distance to extend the extent.
    Returns:
        list: [xmin, ymin, xmax, ymax]
    """
    x_min, y_min = parse_coords_from_filename(tile_name)
    xmin = x_min - buffer
    ymin = y_min - buffer
    xmax = x_min + tile_size[0] + buffer
    ymax = y_min + tile_size[1] + buffer
    return [xmin, ymin, xmax, ymax]


def warp_if_needed(src_path, extent, resolution, dest_path):
    """
    Warp a raster to a specified extent and resolution.
    Args:
        src_path (str): Source raster file path.
        extent (list): [xmin, ymin, xmax, ymax].
        resolution (tuple or None): (x_res, y_res) or None to keep source resolution.
        dest_path (str): Path to save warped raster.
    Returns:
        str: Path to the warped raster (dest_path).
    """
    cmd = [
        "gdalwarp", "-overwrite", "-of", "GTiff",
        "-te", *map(str, extent),
        "-r", "near",
        "-srcnodata", "nan",
        "-dstnodata", "nan",
        "-co", "TILED=YES",
        "-co", "COMPRESS=LZW"
    ]
    if resolution:
        cmd += ["-tr", str(resolution[0]), str(abs(resolution[1]))]
    cmd += [src_path, dest_path]
    subprocess.run(cmd, check=True)
    return dest_path


def compute_difference(folder1, tag1, folder2, tag2, output_dir, tile_size, buffer, resolution, suffix_key):
    """
    Compute masked difference (A - B) between corresponding tiles in two folders.
    Args:
        folder1 (str): Path to first timepoint metric folder.
        tag1 (str): Year tag for first dataset.
        folder2 (str): Path to second timepoint metric folder.
        tag2 (str): Year tag for second dataset.
        output_dir (str): Directory to store difference outputs.
        tile_size (tuple): (width, height) of each tile.
        buffer (float): Buffer distance around each tile.
        resolution (tuple or None): Target resolution for warping.
        suffix_key (str): Key suffix for metric (e.g., 'chm').
    """
    os.makedirs(output_dir, exist_ok=True)
    idx1 = index_files(folder1, tag1, suffix_key)
    idx2 = index_files(folder2, tag2, suffix_key)

    processed, skipped = 0, 0
    for key in sorted(idx1):
        if key not in idx2:
            print(f"Skipping {key}: no match in second folder.")
            skipped += 1
            continue
        f1, f2 = idx1[key], idx2[key]
        out_file = os.path.join(output_dir, f"{key}_{suffix_key}_diff.tif")
        temp1 = out_file.replace(".tif", f"_tmp_{tag1}.tif")
        temp2 = out_file.replace(".tif", f"_tmp_{tag2}.tif")
        try:
            ext = compute_extent_from_tile(key, tile_size, buffer)
            warp_if_needed(f1, ext, resolution, temp1)
            warp_if_needed(f2, ext, resolution, temp2)

            raw_diff = out_file.replace(".tif", "_raw.tif")
            filt = out_file.replace(".tif", "_filtered.tif")

            subprocess.run([
                "gdal_calc",
                "-A", temp2,
                "-B", temp1,
                "--calc=A-B",
                "--outfile", raw_diff,
                "--type", "Float32",
                "--overwrite"
            ], check=True)

            subprocess.run([
                "gdal_calc",
                "-A", raw_diff,
                "--calc", "where((A>=-100)&(A<=100),A,-9999)",
                "--NoDataValue", "-9999",
                "--type", "Float32",
                "--co", "TILED=YES",
                "--co", "COMPRESS=LZW",
                "--overwrite",
                "--outfile", filt
            ], check=True)

            os.replace(filt, out_file)
            subprocess.run(["gdal_edit", "-a_nodata", "-9999", out_file], check=True)
            os.remove(raw_diff)
            os.remove(temp1)
            os.remove(temp2)

            print(f"Processed {key} ({suffix_key}).")
            processed += 1
        except Exception as e:
            print(f"Error processing {key}: {e}")
            traceback.print_exc()
            skipped += 1

    print(f"Done: {processed} processed, {skipped} skipped.")


def add_overviews(parent_dir, levels):
    """
    Add internal overviews to all TIFFs under a base directory.
    Args:
        parent_dir (str): Root directory containing metric/Diff subfolders.
        levels (list[int]): Powers-of-two overview levels (e.g., [2,4,8,16,32]).
    """
    for root, _, files in os.walk(parent_dir):
        for f in files:
            if f.lower().endswith('.tif') and 'vrt' not in f:
                path = os.path.join(root, f)
                cmd = ['gdaladdo', '-r', 'average', path] + list(map(str, levels))
                try:
                    subprocess.run(cmd, check=True, capture_output=True)
                    print(f"Overviews: {f}")
                except subprocess.CalledProcessError as e:
                    print(f"Failed overview for {f}")


def build_vrt(parent_dir, metric_folder, prefix, tag, nodata='-9999'):
    """
    Build a VRT mosaic from all TIFFs in a subfolder.
    Args:
        parent_dir (str): Root directory containing metric/Diff subfolders.
        metric_folder (str): Subfolder name (e.g., 'CHM_Diff').
        prefix (str): Prefix for VRT filename.
        tag (str): Tag (e.g., '2019') appended to VRT name.
        nodata (str): NoData value to hide.
    """
    sub = os.path.join(parent_dir, metric_folder)
    files = sorted([os.path.join(sub, f) for f in os.listdir(sub) if f.endswith('.tif')])
    if not files:
        return
    filelist = os.path.join(parent_dir, f"{prefix}_{tag}_filelist.txt")
    with open(filelist, 'w') as fh:
        for p in files:
            fh.write(p.replace('\\', '/') + '\n')
    vrt = os.path.join(parent_dir, f"{prefix}_{tag}.vrt")
    cmd = [
        'gdalbuildvrt',
        '-srcnodata', nodata,
        '-vrtnodata', nodata,
        '-hidenodata',
        '-input_file_list', filelist,
        vrt
    ]
    try:
        subprocess.run(cmd, check=True, capture_output=True)
        print(f"VRT created: {vrt}")
    except subprocess.CalledProcessError as e:
        print(f"VRT error for {metric_folder}: {e.stderr}")


def run(
    folder1_base,
    folder2_base,
    tag1,
    tag2,
    output_base,
    tile_size=(1000, 1000),
    buffer=20,
    resolution=None,
    overview_levels=[2,4,8,16,32],
    metrics=None
):
    """
    Execute the full pipeline end-to-end without requiring CLI args.
    """
    if metrics is None:
        metrics = [
            'CHM_normalized_tiles','Canopy_Cover_tiles','Density_Tiles',
            'DSM_tiles','DTM_tiles','Rumple_tiles'
        ]

    for metric in metrics:
        compute_difference(
            os.path.join(folder1_base, metric), tag1,
            os.path.join(folder2_base, metric), tag2,
            os.path.join(output_base, metric.replace('_tiles','_Diff')),
            tile_size, buffer, resolution, metric.replace('_tiles','').lower()
        )

    add_overviews(output_base, overview_levels)

    for metric in metrics:
        build_vrt(output_base, metric.replace('_tiles','_Diff'), metric.replace('_tiles','_Diff'), tag2)


if __name__ == '__main__':
    # === Configuration ===
    folder1_base = r"C:/Users/sreeja/Documents/AZ Dataset Analysis/extracted_2017_metrics"
    folder2_base = r"C:/Users/sreeja/Documents/AZ Dataset Analysis/extracted_2019_metrics"
    tag1 = "2017"
    tag2 = "2019"
    output_base = r"C:/Users/sreeja/Documents/AZ Dataset Analysis/Differences"
    tile_size = (1000, 1000)
    buffer = 20
    resolution = None  # e.g. (1.0, 1.0) or None for native
    overview_levels = [2,4,8,16,32]
    metrics = [
        'CHM_normalized_tiles','Canopy_Cover_tiles','Density_Tiles',
        'DSM_tiles','DTM_tiles','Rumple_tiles'
    ]

    run(
        folder1_base,
        folder2_base,
        tag1,
        tag2,
        output_base,
        tile_size,
        buffer,
        resolution,
        overview_levels,
        metrics
    )
