In [1]:
from datetime import datetime
start_time = datetime.now()
print(f"Start time: {start_time}")

Start time: 2024-05-16 12:54:53.036834


# The folder

In [2]:
THEFOLDER = "C:\\Users\\smdur\\OneDrive\\Desktop\\GlobalPCL17"

# Tile western Conus PCL

In [3]:
import os
from itertools import product
import rasterio as rio
from rasterio import windows

in_path = "C:\\Users\\smdur\\OneDrive\\Desktop\\PCLCONUS\\Input\\PCL\\"
input_filename = 'pcl_west_wgs.tif'

out_path = f"{THEFOLDER}\\PCLTILES\\"
output_filename = 'pcltile_{}-{}.tif'

widthtile = 5000
heighttile = 5000

def get_tiles(ds, width=widthtile, height=heighttile):
    nols, nrows = ds.meta['width'], ds.meta['height']
    offsets = product(range(0, nols, width), range(0, nrows, height))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in offsets:
        window = windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

os.makedirs(out_path, exist_ok=True)

tile_numbers = []

with rio.open(os.path.join(in_path, input_filename)) as inds:
    tile_width, tile_height = widthtile, heighttile
    nodata = inds.nodata
    meta = inds.meta.copy()
    for window, transform in get_tiles(inds):
        data = inds.read(window=window)
        if nodata is not None and not (data == nodata).all():
            meta['transform'] = transform
            meta['width'], meta['height'] = window.width, window.height
            tile_number = f"{int(window.col_off)}-{int(window.row_off)}"
            tile_numbers.append(tile_number)
            outpath = os.path.join(out_path, output_filename.format(int(window.col_off), int(window.row_off)))
            with rio.open(outpath, 'w', **meta) as outds:
                outds.write(data)

# Print or store the tile numbers
TILENUMBER = tile_numbers

del in_path, input_filename, tile_numbers
del out_path, output_filename, widthtile, heighttile, tile_width, tile_height
del meta, nodata, window, inds, get_tiles

# Downlaod training data and create training samples

In [4]:
import os
import glob
import subprocess
from concurrent.futures import ThreadPoolExecutor
from osgeo import gdal
import rioxarray
import planetary_computer
from pystac_client import Client
import osmnx as ox
import rasterio
from rasterio.features import rasterize
from rasterio.windows import from_bounds, Window
import numpy as np
import scipy.ndimage
from shapely.geometry import box, Point
from geopandas import GeoDataFrame
import matplotlib.pyplot as plt
from rasterio.plot import show

TILENUMBER = ['75000-35000', '75000-40000', '75000-45000']
CHIP_SIZE = 256  

def delete_non_resampled_files(resampled_files, tif_dir):
    for file in os.listdir(tif_dir):
        if file not in resampled_files and file.endswith('.tif'):
            file_path = os.path.join(tif_dir, file)
            try:
                os.remove(file_path)
                print(f"Deleted: {file_path}")
            except Exception as e:
                print(f"Failed to delete {file_path}: {e}")

def process_dem(tif_path, tif_dir, tile_number):
    tif_data = rioxarray.open_rasterio(tif_path)
    bbox_of_interest = tif_data.rio.bounds()
    catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = catalog.search(collections=["cop-dem-glo-30"], bbox=bbox_of_interest)
    items = list(search.get_items())
    
    def process_item(item, idx):
        signed_asset = planetary_computer.sign(item.assets["data"])
        data = rioxarray.open_rasterio(signed_asset.href).squeeze().drop("band")
        data.rio.write_crs("EPSG:4326", inplace=True)
        output_tif_path = os.path.join(tif_dir, f"output_dataDEM_{idx}.tif")
        data.rio.to_raster(output_tif_path)
    
    with ThreadPoolExecutor(max_workers=4) as executor:
        for i, item in enumerate(items):
            executor.submit(process_item, item, i)

    output_tif = os.path.join(tif_dir, f"outputtile_DEM_{tile_number}.tif")
    merge_command = [
        "python", "C:\\Users\\smdur\\anaconda3\\envs\\globalpcl\\Scripts\\gdal_merge.py",
        "--config", "CHECK_DISK_FREE_SPACE", "FALSE",
        "-o", output_tif,
        "-n", "-9999", "-a_nodata", "-9999"] + glob.glob(os.path.join(tif_dir, "output_dataDEM_*.tif"))

    process_hag = subprocess.run(merge_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    if process_hag.returncode != 0:
        print(f"Error in merging DEM: {process_hag.stderr}")
        return None

    src_ds = gdal.Open(output_tif, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataDEM_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None

    os.remove(output_tif)
    for tif in glob.glob(os.path.join(tif_dir, "output_dataDEM_*.tif")):
        try:
            os.remove(tif)
        except Exception as e:
            print(f"Failed to delete {tif}: {e}")

    return output_resampled_path

    del tif_data, bbox_of_interest, catalog, search, items
    del output_tif, merge_command, process_hag
    del src_ds, target_ds, driver, output_resampled_path


def process_lidar(tif_path, tif_dir, tile_number):
    lidar_dir = r"C:\Users\smdur\OneDrive\Desktop\GlobalData\LIDAR2"
    lidar_tifs = glob.glob(os.path.join(lidar_dir, "*.tif"))

    # Get the bounding box of the input tif_path
    with rasterio.open(tif_path) as src:
        bbox = src.bounds
        input_geom = box(bbox.left, bbox.bottom, bbox.right, bbox.top)

    # Find overlapping LIDAR tiles
    overlapping_tifs = []
    for tif in lidar_tifs:
        with rasterio.open(tif) as src:
            lidar_bbox = src.bounds
            lidar_geom = box(lidar_bbox.left, lidar_bbox.bottom, lidar_bbox.right, lidar_bbox.top)
            if input_geom.intersects(lidar_geom):
                overlapping_tifs.append(tif)

    if not overlapping_tifs:
        print(f"No overlapping LIDAR tiles found for {tile_number}")
        return None

    output_tif = os.path.join(tif_dir, f"outputtile_lidar_{tile_number}.tif")
    merge_command = [
        "python", "C:\\Users\\smdur\\anaconda3\\envs\\globalpcl\\Scripts\\gdal_merge.py",
        "--config", "CHECK_DISK_FREE_SPACE", "FALSE",
        "-o", output_tif,
        "-n", "255", "-a_nodata", "255"] + overlapping_tifs

    process_hag = subprocess.run(merge_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    if process_hag.returncode != 0:
        print(f"Error in merging LIDAR: {process_hag.stderr}")
        return None

    src_ds = gdal.Open(output_tif, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataLIDAR_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None

    os.remove(output_tif)
    return output_resampled_path

    del lidar_tifs, bbox, input_geom, overlapping_tifs, lidar_bbox, lidar_geom
    del output_tif, merge_command, process_hag
    del src_ds, target_ds, driver, output_resampled_path


def process_rivers(tif_path, tif_dir, tile_number):
    dem_data = rioxarray.open_rasterio(tif_path)
    bbox = dem_data.rio.bounds()
    custom_filter = '["waterway"~"river"]'
    graph = ox.graph_from_bbox(bbox[3], bbox[1], bbox[2], bbox[0], custom_filter=custom_filter, simplify=True, retain_all=True, truncate_by_edge=True)
    gdf = ox.graph_to_gdfs(graph, nodes=False)

    with rasterio.open(tif_path) as src:
        window = from_bounds(*src.bounds, src.transform)
        transform = rasterio.windows.transform(window, src.transform)
        raster = np.zeros((int(window.height), int(window.width)), dtype=np.uint8)
        shapes = ((geom, 1) for geom in gdf['geometry'])
        burned = rasterize(shapes, out=raster, fill=0, transform=transform, all_touched=True)
        distance_grid = scipy.ndimage.distance_transform_edt(burned == 0)
        decay_grid = np.exp(-0.07 * distance_grid)

        clipped_meta = src.meta.copy()
        clipped_meta.update({"driver": "GTiff", "height": int(window.height), "width": int(window.width), "transform": transform, "dtype": rasterio.float32, "count": 1, "compress": 'lzw'})
        output_path = os.path.join(tif_dir, f'exponential_decay_CO_river_{tile_number}.tif')
        with rasterio.open(output_path, 'w', **clipped_meta) as dst:
            dst.write(decay_grid.astype(np.float32), 1)

    src_ds = gdal.Open(output_path, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataRivers_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None
    os.remove(output_path)

    return output_resampled_path

    del dem_data, bbox, custom_filter, graph, gdf
    del window, transform, raster, shapes, burned, distance_grid, decay_grid
    del clipped_meta, output_path
    del src_ds, target_ds, driver, output_resampled_path


def process_roads(tif_path, tif_dir, tile_number):
    extent_data = rioxarray.open_rasterio(tif_path)
    bbox = extent_data.rio.bounds()
    graph = ox.graph_from_bbox(bbox[3], bbox[1], bbox[2], bbox[0], network_type='drive', simplify=True)
    gdf = ox.graph_to_gdfs(graph, nodes=False)

    with rasterio.open(tif_path) as src:
        window = from_bounds(*src.bounds, src.transform)
        transform = rasterio.windows.transform(window, src.transform)
        raster = np.zeros((int(window.height), int(window.width)), dtype=np.uint8)
        shapes = ((geom, 1) for geom in gdf['geometry'])
        burned = rasterize(shapes, out=raster, fill=0, transform=transform, all_touched=True)
        distance_grid = scipy.ndimage.distance_transform_edt(burned == 0)
        decay_grid = np.exp(-0.07 * distance_grid)

        clipped_meta = src.meta.copy()
        clipped_meta.update({"driver": "GTiff", "height": int(window.height), "width": int(window.width), "transform": transform, "dtype": rasterio.float32, "count": 1, "compress": 'lzw'})
        output_path = os.path.join(tif_dir, f'exponential_decay_CO_roads_{tile_number}.tif')
        with rasterio.open(output_path, 'w', **clipped_meta) as dst:
            dst.write(decay_grid.astype(np.float32), 1)

    src_ds = gdal.Open(output_path, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataRoads_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None
    os.remove(output_path)

    return output_resampled_path

    del extent_data, bbox, graph, gdf
    del window, transform, raster, shapes, burned, distance_grid, decay_grid
    del clipped_meta, output_path
    del src_ds, target_ds, driver, output_resampled_path


def generate_random_points(geometry, num_points):
    points = []
    min_x, min_y, max_x, max_y = geometry.bounds
    while len(points) < num_points:
        random_point = Point(np.random.uniform(min_x, max_x), np.random.uniform(min_y, max_y))
        if random_point.within(geometry):
            points.append(random_point)
    return points

    del bounds, crs, img, rect, buffered_rect, random_points
    del gdf_points, gdf_points_wgs84, lat_long


def process_chips(tif_path, tif_dir, lat_long, chip_size=128):
    resampled_lidar_path = os.path.join(tif_dir, f"output_resampled_dataLIDAR_{tile_number}.tif")
    resampled_dem_path = os.path.join(tif_dir, f"output_resampled_dataDEM_{tile_number}.tif")
    resampled_rivers_path = os.path.join(tif_dir, f"output_resampled_dataRivers_{tile_number}.tif")
    resampled_roads_path = os.path.join(tif_dir, f"output_resampled_dataRoads_{tile_number}.tif")

    training_chips_dir = os.path.join(THEFOLDER, "trainingchips")
    os.makedirs(training_chips_dir, exist_ok=True)

    for i, (lat, lon) in enumerate(lat_long):
        try:
            paths = [resampled_lidar_path, resampled_dem_path, resampled_rivers_path, resampled_roads_path, tif_path]
            labels = ['lidar', 'dem', 'rivers', 'roads', 'pcllabels']
            
            for path, label in zip(paths, labels):
                with rasterio.open(path) as src:
                    col, row = src.index(lon, lat)
                    window = Window(col - chip_size // 2, row - chip_size // 2, chip_size, chip_size)
                    chip_data = src.read(1, window=window)
                    
                    out_meta = src.meta.copy()
                    out_meta.update({
                        "driver": "GTiff",
                        "height": chip_size,
                        "width": chip_size,
                        "transform": src.window_transform(window)
                    })

                    chip_output_dir = os.path.join(training_chips_dir, label)
                    os.makedirs(chip_output_dir, exist_ok=True)
                    
                    chip_output_path = os.path.join(chip_output_dir, f"{label.upper()}_Chip_{tile_number}_{i}.tif")

                    if chip_data.shape == (chip_size, chip_size) and np.any(chip_data != src.nodata):
                        with rasterio.open(chip_output_path, "w", **out_meta) as dest:
                            dest.write(chip_data, 1)
                    else:
                        print(f"Skipping {label} chip {i} because it is not properly shaped or is filled with nodata.")
        except Exception as e:
            print(f"An error occurred while processing chip {i}: {e}")
            
    del resampled_lidar_path, resampled_dem_path, resampled_rivers_path, resampled_roads_path
    del training_chips_dir, paths, labels, col, row, window, chip_data, out_meta, chip_output_dir, chip_output_path


if __name__ == "__main__":
    for tile_number in TILENUMBER:
        tif_path = f"{THEFOLDER}\\PCLTILES\\pcltile_{tile_number}.tif"
        tif_dir = f"{THEFOLDER}\\TIFFOUTPUT\\{tile_number}"
        os.makedirs(tif_dir, exist_ok=True)

        resampled_files = [
            process_dem(tif_path, tif_dir, tile_number),
            process_lidar(tif_path, tif_dir, tile_number),
            process_rivers(tif_path, tif_dir, tile_number),
            process_roads(tif_path, tif_dir, tile_number)
        ]

        delete_non_resampled_files([os.path.basename(f) for f in resampled_files], tif_dir)

        # Generate random points within the tile bounds
        with rasterio.open(tif_path) as src:
            bounds = src.bounds
            crs = src.crs
            img = src.read(1)

        rect = box(bounds.left, bounds.bottom, bounds.right, bounds.top)
        buffered_rect = rect.buffer(-0.15)
        random_points = generate_random_points(buffered_rect, 100)
        gdf_points = GeoDataFrame(geometry=random_points, crs=crs).to_crs(crs)
        gdf_points_wgs84 = gdf_points.to_crs(epsg=4326)
        lat_long = gdf_points_wgs84.geometry.apply(lambda geom: (geom.y, geom.x)).tolist()

        process_chips(tif_path, tif_dir, lat_long)

        print(f"Processing for tile {tile_number} completed.")

    print("All processing completed.")




Processing for tile 75000-35000 completed.
Processing for tile 75000-40000 completed.
Processing for tile 75000-45000 completed.
All processing completed.


# LoadChips

In [5]:
# import os
# import rasterio
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# training_chips_dir = os.path.join(THEFOLDER, "trainingchips")

# # Paths to datasets
# featurepath1 = os.path.join(training_chips_dir, "lidar")
# featurepath2 = os.path.join(training_chips_dir, "dem")
# featurepath3 = os.path.join(training_chips_dir, "roads")
# featurepath4 = os.path.join(training_chips_dir, "rivers")
# labelspath = os.path.join(training_chips_dir, "pcllabels")

# # Function to load GeoTIFF images as numpy arrays
# def load_geotiff(path):
#     with rasterio.open(path) as src:
#         return src.read(1)

# # Function to load and print progress
# def load_images(path):
#     files = [f for f in os.listdir(path) if f.endswith('.tif')]
#     images = []
#     for i, f in enumerate(files):
#         images.append(load_geotiff(os.path.join(path, f)))
#         if (i + 1) % 5000 == 0:
#             print(f"Loaded {i + 1} images from {path}")
#     return images

# # Load datasets
# hag_images = load_images(featurepath1)
# dem_images = load_images(featurepath2)
# roads_images = load_images(featurepath3)
# rivers_images = load_images(featurepath4)
# label_images = load_images(labelspath)

# # Convert lists to numpy arrays
# hag_images = np.array(hag_images).astype('float32')
# dem_images = np.array(dem_images).astype('float32')
# roads_images = np.array(roads_images).astype('float32')
# rivers_images = np.array(rivers_images).astype('float32')
# label_images = np.array(label_images).astype('float32')

# # Normalize images independently
# hag_max = hag_images.max()
# dem_max = dem_images.max()
# roads_max = roads_images.max()
# rivers_max = rivers_images.max()

# hag_images /= hag_max
# dem_images /= dem_max
# roads_images /= roads_max
# rivers_images /= rivers_max

# #del labelspath, load_geotiff, load_images

# print(f"HAG max value: {hag_max}")
# print(f"DEM max value: {dem_max}")
# print(f"Roads max value: {roads_max}")
# print(f"Rivers max value: {rivers_max}")


In [6]:
# import multiprocessing
# max_workers = mult#iprocessing.cpu_count()
# max_workers

In [7]:
# import os
# import rasterio
# import numpy as np
# from concurrent.futures import ThreadPoolExecutor
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# training_chips_dir = os.path.join(THEFOLDER, "trainingchips")

# # Paths to datasets
# featurepath1 = os.path.join(training_chips_dir, "lidar")
# featurepath2 = os.path.join(training_chips_dir, "dem")
# featurepath3 = os.path.join(training_chips_dir, "roads")
# featurepath4 = os.path.join(training_chips_dir, "rivers")
# labelspath = os.path.join(training_chips_dir, "pcllabels")

# # Function to load GeoTIFF images as numpy arrays
# def load_geotiff(path):
#     with rasterio.open(path) as src:
#         return src.read(1)

# # Function to load images in parallel with progress printing
# def load_images(path):
#     files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.tif')]
#     images = []

#     def load_and_count(file):
#         image = load_geotiff(file)
#         if (load_and_count.counter + 1) % 5000 == 0:
#             print(f"Loaded {load_and_count.counter + 1} images from {path}")
#         load_and_count.counter += 1
#         return image

#     load_and_count.counter = 0

#     with ThreadPoolExecutor(max_workers=16) as executor:
#         images = list(executor.map(load_and_count, files))

#     return images

# # Load datasets
# hag_images = load_images(featurepath1)
# dem_images = load_images(featurepath2)
# roads_images = load_images(featurepath3)
# rivers_images = load_images(featurepath4)
# label_images = load_images(labelspath)

# # Convert lists to numpy arrays
# hag_images = np.array(hag_images).astype('float32')
# dem_images = np.array(dem_images).astype('float32')
# roads_images = np.array(roads_images).astype('float32')
# rivers_images = np.array(rivers_images).astype('float32')
# label_images = np.array(label_images).astype('float32')

# # Normalize images independently
# hag_max = hag_images.max()
# dem_max = dem_images.max()
# roads_max = roads_images.max()
# rivers_max = rivers_images.max()

# hag_images /= hag_max
# dem_images /= dem_max
# roads_images /= roads_max
# rivers_images /= rivers_max

# del labelspath, load_geotiff, load_images

# print(f"HAG max value: {hag_max}")
# print(f"DEM max value: {dem_max}")
# print(f"Roads max value: {roads_max}")
# print(f"Rivers max value: {rivers_max}")




import os
import sys

# Set the environment variable before importing gdal
os.environ['USE_PATH_FOR_GDAL_PYTHON'] = 'YES'
os.add_dll_directory(os.path.join(os.getenv('CONDA_PREFIX'), 'Library', 'bin'))

import rasterio
import numpy as np
from concurrent.futures import ThreadPoolExecutor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

training_chips_dir = os.path.join(THEFOLDER, "trainingchips")

# Paths to datasets
featurepath1 = os.path.join(training_chips_dir, "lidar")
featurepath2 = os.path.join(training_chips_dir, "dem")
featurepath3 = os.path.join(training_chips_dir, "roads")
featurepath4 = os.path.join(training_chips_dir, "rivers")
labelspath = os.path.join(training_chips_dir, "pcllabels")

# Function to load GeoTIFF images as numpy arrays
def load_geotiff(path):
    with rasterio.open(path) as src:
        return src.read(1)

# Function to load images in parallel with progress printing
def load_images(path):
    files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.tif')]
    num_files = len(files)
    images = []

    def load_and_count(file):
        image = load_geotiff(file)
        if (load_and_count.counter + 1) % 5000 == 0:
            print(f"Loaded {load_and_count.counter + 1} images from {path}")
        load_and_count.counter += 1
        return image

    load_and_count.counter = 0

    max_workers = min(32, os.cpu_count() + 4)  # Default value if not specified

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        images = list(executor.map(load_and_count, files))

    return np.array(images).astype('float32')

# Load datasets
hag_images = load_images(featurepath1)
dem_images = load_images(featurepath2)
roads_images = load_images(featurepath3)
rivers_images = load_images(featurepath4)
label_images = load_images(labelspath)

# Normalize images independently
hag_max = hag_images.max()
dem_max = dem_images.max()
roads_max = roads_images.max()
rivers_max = rivers_images.max()

hag_images /= hag_max
dem_images /= dem_max
roads_images /= roads_max
rivers_images /= rivers_max


print(f"HAG max value: {hag_max}")
print(f"DEM max value: {dem_max}")
print(f"Roads max value: {roads_max}")
print(f"Rivers max value: {rivers_max}")


HAG max value: 33.0
DEM max value: 4183.34423828125
Roads max value: 1.0
Rivers max value: 1.0


In [8]:
print(f"HAG max value: {hag_max}")
print(f"DEM max value: {dem_max}")
print(f"Roads max value: {roads_max}")
print(f"Rivers max value: {rivers_max}")

HAG max value: 33.0
DEM max value: 4183.34423828125
Roads max value: 1.0
Rivers max value: 1.0


In [9]:
hag_images

array([[[0.6060606 , 0.6060606 , 0.57575756, ..., 0.21212122,
         0.33333334, 0.36363637],
        [0.54545456, 0.54545456, 0.54545456, ..., 0.27272728,
         0.33333334, 0.3939394 ],
        [0.5151515 , 0.4848485 , 0.4848485 , ..., 0.36363637,
         0.36363637, 0.3939394 ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ],
        [0.        , 0.        , 0.        , ..., 0.        ,
         0.        , 0.        ]],

       [[0.24242425, 0.3939394 , 0.5151515 , ..., 0.42424244,
         0.42424244, 0.45454547],
        [0.45454547, 0.45454547, 0.5151515 , ..., 0.42424244,
         0.42424244, 0.45454547],
        [0.42424244, 0.4848485 , 0.5151515 , ..., 0.3939394 ,
         0.42424244, 0.42424244],
        ...,
        [0.12121212, 0.09090909, 0.06060606, ..., 0.24242425,
         0.18181819, 0.15151516],
        [0.1

# Train the model

In [11]:
# import gc
# gc.collect()

# # Stack features along the last dimension
# feature_images = np.stack((hag_images, dem_images, roads_images, rivers_images), axis=-1)

# Free up memory by deleting the original arrays
# del hag_images
# del dem_images
# del roads_images
# del rivers_images

# If you want to ensure that the memory is freed immediately
import gc
gc.collect()

CHIP_SIZE=256

# Normalize labels if they range from 0 to 100
label_images /= 100

# Reshape labels for CNN input
label_images = np.expand_dims(label_images, axis=-1)

# Define the CNN model
model = Sequential([
    #Conv2D(16, (3, 3), activation='relu', input_shape=(128, 128, 4)),
    Conv2D(16, (3, 3), activation='relu', input_shape=(CHIP_SIZE, CHIP_SIZE, 4)),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dropout(0.5),
    #Dense(128 * 128, activation='sigmoid'),
    #tf.keras.layers.Reshape((128, 128, 1))
    Dense(CHIP_SIZE * CHIP_SIZE, activation='sigmoid'),
    tf.keras.layers.Reshape((CHIP_SIZE, CHIP_SIZE, 1))
])

# # Define custom weights for each feature
# weights = np.array([1.0, 0.8, 0.5, 0.3])  
# sample_weights = np.dot(feature_images, weights)

lr = 0.0006
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Compile and train the model with sample weights
model.compile(optimizer=optimizer, loss='mse')
model.fit(feature_images, label_images, batch_size=8, epochs=3, validation_split=0.3)#, sample_weight=sample_weights)


ResourceExhaustedError: {{function_node __wrapped__StatelessRandomUniformV2_device_/job:localhost/replica:0/task:0/device:CPU:0}} OOM when allocating tensor with shape[230400,65536] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu [Op:StatelessRandomUniformV2] name: 

In [None]:
#model.save("C:\\Users\\smdur\\OneDrive\\Desktop\\SavedModels\\cnn_model_lr0001_5_15_24")
#model.save("C:\\Users\\smdur\\OneDrive\\Desktop\\SavedModels\\cnn_model_lr0005_5_15_24")


In [None]:
#model = tf.keras.models.load_model(f"C:\\Users\\smdur\\OneDrive\\Desktop\\SavedModels\\cnn_model_lr0005_5_15_24")
#model = tf.keras.models.load_model(f"C:\\Users\\smdur\\OneDrive\\Desktop\\SavedModels\\cnn_model_lr0005_5_15_24")

# INFERENCE

In [None]:
# MAKE DATA
import os
import glob
import subprocess
from concurrent.futures import ThreadPoolExecutor
from osgeo import gdal
import rioxarray
import planetary_computer
from pystac_client import Client
import osmnx as ox
import rasterio
from rasterio.features import rasterize
from rasterio.windows import from_bounds, Window
import numpy as np
import scipy.ndimage
from shapely.geometry import box
from geopandas import GeoDataFrame
import matplotlib.pyplot as plt
from rasterio.plot import show
import rasterio as rio
from itertools import product
from rasterio import windows

TILENUMBER = ['70000-40000']
CHIP_SIZE = 256  

def delete_non_resampled_files(resampled_files, tif_dir):
    for file in os.listdir(tif_dir):
        if file not in resampled_files and file.endswith('.tif'):
            file_path = os.path.join(tif_dir, file)
            try:
                os.remove(file_path)
                print(f"Deleted: {file_path}")
            except Exception as e:
                print(f"Failed to delete {file_path}: {e}")

def process_dem(tif_path, tif_dir, tile_number):
    tif_data = rioxarray.open_rasterio(tif_path)
    bbox_of_interest = tif_data.rio.bounds()
    catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = catalog.search(collections=["cop-dem-glo-30"], bbox=bbox_of_interest)
    items = list(search.get_items())
    
    def process_item(item, idx):
        signed_asset = planetary_computer.sign(item.assets["data"])
        data = rioxarray.open_rasterio(signed_asset.href).squeeze().drop("band")
        data.rio.write_crs("EPSG:4326", inplace=True)
        output_tif_path = os.path.join(tif_dir, f"output_dataDEM_{idx}.tif")
        data.rio.to_raster(output_tif_path)
    
    with ThreadPoolExecutor(max_workers=4) as executor:
        for i, item in enumerate(items):
            executor.submit(process_item, item, i)

    output_tif = os.path.join(tif_dir, f"outputtile_DEM_{tile_number}.tif")
    merge_command = [
        "python", "C:\\Users\\smdur\\anaconda3\\envs\\globalpcl\\Scripts\\gdal_merge.py",
        "--config", "CHECK_DISK_FREE_SPACE", "FALSE",
        "-o", output_tif,
        "-n", "-9999", "-a_nodata", "-9999"] + glob.glob(os.path.join(tif_dir, "output_dataDEM_*.tif"))

    process_hag = subprocess.run(merge_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    if process_hag.returncode != 0:
        print(f"Error in merging DEM: {process_hag.stderr}")
        return None

    src_ds = gdal.Open(output_tif, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataDEM_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None

    os.remove(output_tif)
    for tif in glob.glob(os.path.join(tif_dir, "output_dataDEM_*.tif")):
        try:
            os.remove(tif)
        except Exception as e:
            print(f"Failed to delete {tif}: {e}")

    return output_resampled_path
    del tif_data, bbox_of_interest, catalog, search, items
    del output_tif, merge_command, process_hag
    del src_ds, target_ds, driver, output_resampled_path

def process_lidar(tif_path, tif_dir, tile_number):
    lidar_dir = r"C:\Users\smdur\OneDrive\Desktop\GlobalData\LIDAR2"
    lidar_tifs = glob.glob(os.path.join(lidar_dir, "*.tif"))

    # Get the bounding box of the input tif_path
    with rasterio.open(tif_path) as src:
        bbox = src.bounds
        input_geom = box(bbox.left, bbox.bottom, bbox.right, bbox.top)

    # Find overlapping LIDAR tiles
    overlapping_tifs = []
    for tif in lidar_tifs:
        with rasterio.open(tif) as src:
            lidar_bbox = src.bounds
            lidar_geom = box(lidar_bbox.left, lidar_bbox.bottom, lidar_bbox.right, lidar_bbox.top)
            if input_geom.intersects(lidar_geom):
                overlapping_tifs.append(tif)

    if not overlapping_tifs:
        print(f"No overlapping LIDAR tiles found for {tile_number}")
        return None

    output_tif = os.path.join(tif_dir, f"outputtile_lidar_{tile_number}.tif")
    merge_command = [
        "python", "C:\\Users\\smdur\\anaconda3\\envs\\globalpcl\\Scripts\\gdal_merge.py",
        "--config", "CHECK_DISK_FREE_SPACE", "FALSE",
        "-o", output_tif,
        "-n", "255", "-a_nodata", "255"] + overlapping_tifs

    process_hag = subprocess.run(merge_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    if process_hag.returncode != 0:
        print(f"Error in merging LIDAR: {process_hag.stderr}")
        return None

    src_ds = gdal.Open(output_tif, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataLIDAR_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None

    os.remove(output_tif)
    return output_resampled_path
    del lidar_tifs, bbox, input_geom, overlapping_tifs, lidar_bbox, lidar_geom
    del output_tif, merge_command, process_hag
    del src_ds, target_ds, driver, output_resampled_path

def process_rivers(tif_path, tif_dir, tile_number):
    dem_data = rioxarray.open_rasterio(tif_path)
    bbox = dem_data.rio.bounds()
    custom_filter = '["waterway"~"river"]'
    graph = ox.graph_from_bbox(bbox[3], bbox[1], bbox[2], bbox[0], custom_filter=custom_filter, simplify=True, retain_all=True, truncate_by_edge=True)
    gdf = ox.graph_to_gdfs(graph, nodes=False)

    with rasterio.open(tif_path) as src:
        window = from_bounds(*src.bounds, src.transform)
        transform = rasterio.windows.transform(window, src.transform)
        raster = np.zeros((int(window.height), int(window.width)), dtype=np.uint8)
        shapes = ((geom, 1) for geom in gdf['geometry'])
        burned = rasterize(shapes, out=raster, fill=0, transform=transform, all_touched=True)
        distance_grid = scipy.ndimage.distance_transform_edt(burned == 0)
        decay_grid = np.exp(-0.07 * distance_grid)

        clipped_meta = src.meta.copy()
        clipped_meta.update({"driver": "GTiff", "height": int(window.height), "width": int(window.width), "transform": transform, "dtype": rasterio.float32, "count": 1, "compress": 'lzw'})
        output_path = os.path.join(tif_dir, f'exponential_decay_CO_river_{tile_number}.tif')
        with rasterio.open(output_path, 'w', **clipped_meta) as dst:
            dst.write(decay_grid.astype(np.float32), 1)

    src_ds = gdal.Open(output_path, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataRivers_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None
    os.remove(output_path)

    return output_resampled_path
    del dem_data, bbox, custom_filter, graph, gdf
    del window, transform, raster, shapes, burned, distance_grid, decay_grid
    del clipped_meta, output_path
    del src_ds, target_ds, driver, output_resampled_path

def process_roads(tif_path, tif_dir, tile_number):
    extent_data = rioxarray.open_rasterio(tif_path)
    bbox = extent_data.rio.bounds()
    graph = ox.graph_from_bbox(bbox[3], bbox[1], bbox[2], bbox[0], network_type='drive', simplify=True)
    gdf = ox.graph_to_gdfs(graph, nodes=False)

    with rasterio.open(tif_path) as src:
        window = from_bounds(*src.bounds, src.transform)
        transform = rasterio.windows.transform(window, src.transform)
        raster = np.zeros((int(window.height), int(window.width)), dtype=np.uint8)
        shapes = ((geom, 1) for geom in gdf['geometry'])
        burned = rasterize(shapes, out=raster, fill=0, transform=transform, all_touched=True)
        distance_grid = scipy.ndimage.distance_transform_edt(burned == 0)
        decay_grid = np.exp(-0.07 * distance_grid)

        clipped_meta = src.meta.copy()
        clipped_meta.update({"driver": "GTiff", "height": int(window.height), "width": int(window.width), "transform": transform, "dtype": rasterio.float32, "count": 1, "compress": 'lzw'})
        output_path = os.path.join(tif_dir, f'exponential_decay_CO_roads_{tile_number}.tif')
        with rasterio.open(output_path, 'w', **clipped_meta) as dst:
            dst.write(decay_grid.astype(np.float32), 1)

    src_ds = gdal.Open(output_path, gdal.GA_ReadOnly)
    target_ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
    driver = gdal.GetDriverByName('GTiff')
    output_resampled_path = os.path.join(tif_dir, f"output_resampled_dataRoads_{tile_number}.tif")
    out_ds = driver.Create(output_resampled_path, target_ds.RasterXSize, target_ds.RasterYSize, 1, src_ds.GetRasterBand(1).DataType)
    out_ds.SetGeoTransform(target_ds.GetGeoTransform())
    out_ds.SetProjection(target_ds.GetProjection())
    gdal.ReprojectImage(src_ds, out_ds, src_ds.GetProjection(), target_ds.GetProjection(), gdal.GRA_Bilinear)
    src_ds, target_ds, out_ds = None, None, None
    os.remove(output_path)

    return output_resampled_path
    del extent_data, bbox, graph, gdf
    del window, transform, raster, shapes, burned, distance_grid, decay_grid
    del clipped_meta, output_path
    del src_ds, target_ds, driver, output_resampled_path

def get_tiles(ds, width=CHIP_SIZE, height=CHIP_SIZE):
    nols, nrows = ds.meta['width'], ds.meta['height']
    offsets = product(range(0, nols, width), range(0, nrows, height))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    for col_off, row_off in offsets:
        window = windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

def process_file(label, input_filepath, output_folder):
    with rio.open(input_filepath) as inds:
        nodata = inds.nodata  # Get the NoData value from the dataset
        meta = inds.meta.copy()
        
        for window, transform in get_tiles(inds):
            if window.width == CHIP_SIZE and window.height == CHIP_SIZE:  # Check if the tile dimensions are as expected
                data = inds.read(window=window)
                if nodata is not None:
                    valid_data_mask = (data != nodata)
                else:
                    valid_data_mask = (data == data)
                
                if valid_data_mask.any():  # Check if there's any valid data within the tile
                    meta['transform'] = transform
                    meta['width'], meta['height'] = window.width, window.height
                    outpath = os.path.join(output_folder, output_filename.format(int(window.col_off), int(window.row_off)))
                    with rio.open(outpath, 'w', **meta) as outds:
                        outds.write(data)
    print(f"Processing for {label} completed.")

if __name__ == "__main__":
    for tile_number in TILENUMBER:
        tif_path = f"{THEFOLDER}\\PCLTILES\\pcltile_{tile_number}.tif"
        tif_dir = f"{THEFOLDER}\\INFERENCETILES2\\{tile_number}"
        os.makedirs(tif_dir, exist_ok=True)

        resampled_files = [
            process_dem(tif_path, tif_dir, tile_number),
            process_lidar(tif_path, tif_dir, tile_number),
            process_rivers(tif_path, tif_dir, tile_number),
            process_roads(tif_path, tif_dir, tile_number)
        ]

        delete_non_resampled_files([os.path.basename(f) for f in resampled_files], tif_dir)

        # Define input files as a dictionary
        input_files = {
            'lidar': f'output_resampled_dataLIDAR_{tile_number}.tif',
            'dem': f'output_resampled_dataDEM_{tile_number}.tif',
            'roads': f'output_resampled_dataRoads_{tile_number}.tif',
            'rivers': f'output_resampled_dataRivers_{tile_number}.tif'
        }
        output_filename = 'tile_{}-{}.tif'

        # Define the base output path
        out_base_path = f"{THEFOLDER}\\INFERENCETILES3"
        os.makedirs(out_base_path, exist_ok=True)

        # Process each file
        for label, filename in input_files.items():
            input_filepath = os.path.join(tif_dir, filename)
            output_folder = os.path.join(out_base_path, label)
            os.makedirs(output_folder, exist_ok=True)
            process_file(label, input_filepath, output_folder)

        print(f"Processing for tile {tile_number} completed.")

    print("All processing completed.")


In [None]:
import os
import re

# Define the directory path
#directory_path = 'C:\\Users\\smdur\\OneDrive\\Desktop\\PCLCONUS\\Input\\inferencetiles\\hag'

# Regular expression to extract the identifier part of the filename 'tile_{identifier}.tif'
pattern = re.compile(r'tile_(\d+-\d+)\.tif')

# List all files in the directory
files = os.listdir(output_folder)

# Use a set to avoid duplicate identifiers
identifiers = set()

# Extract identifiers from filenames
for file in files:
    match = pattern.search(file)
    if match:
        identifiers.add(match.group(1))

# Convert the set to a sorted list
identifier_list = sorted(list(identifiers))
print(len(identifier_list))
print("done")


In [None]:
# hag_max = 30.0
# dem_max = 4379.1279296875
# roads_max = 1.0
# rivers_max = 1.0

In [None]:
import os
import rasterio
import numpy as np
import tensorflow as tf

#tilename = '0-0'
# input_hag_path = f"C:\\Users\\smdur\\OneDrive\\Desktop\\GLOBALPCL\\CNNPCLDEMO\\inferencetiles\\hag\\tile_{tilename}.tif"
# input_dem_path = f"C:\\Users\\smdur\\OneDrive\\Desktop\\GLOBALPCL\\CNNPCLDEMO\\inferencetiles\\dem\\tile_{tilename}.tif"
# input_roads_path = f"C:\\Users\\smdur\\OneDrive\\Desktop\\GLOBALPCL\\CNNPCLDEMO\\inferencetiles\\roads\\tile_{tilename}.tif"
# input_rivers_path = f"C:\\Users\\smdur\\OneDrive\\Desktop\\GLOBALPCL\\CNNPCLDEMO\\inferencetiles\\rivers\\tile_{tilename}.tif"

def load_and_preprocess_image(hag_path, dem_path, roads_path, rivers_path):
    with rasterio.open(hag_path) as src:
        hag_image = src.read(1)
    with rasterio.open(dem_path) as src:
        dem_image = src.read(1)
    with rasterio.open(roads_path) as src:
        roads_image = src.read(1)
    with rasterio.open(rivers_path) as src:
        rivers_image = src.read(1)

    # Normalize and stack the images
    hag_image = np.array(hag_image).astype('float32') / hag_max
    dem_image = np.array(dem_image).astype('float32') / dem_max
    roads_image = np.array(roads_image).astype('float32') / roads_max
    rivers_image = np.array(rivers_image).astype('float32') / rivers_max

    # Stack images along the last dimension
    combined_image = np.stack([hag_image, dem_image, roads_image, rivers_image], axis=-1)

    # Add batch dimension
    combined_image = np.expand_dims(combined_image, axis=0)
    return combined_image



for i in range(len(identifier_list)):
    tilename = identifier_list[i]
    #print(tilename)
    #input_hag_path = f"{out_base_path}\\lidar\\tile_{tilename}.tif"
    #input_dem_path = f"{out_base_path}\\dem\\tile_{tilename}.tif"
    #input_roads_path = f"{out_base_path}\\roads\\tile_{tilename}.tif"
    #input_rivers_path = f"{out_base_path}\\tile_{tilename}.tif"

    input_hag_path = os.path.join(out_base_path, "lidar", f"tile_{tilename}.tif")
    input_dem_path = os.path.join(out_base_path, "dem", f"tile_{tilename}.tif")
    input_roads_path = os.path.join(out_base_path, "roads", f"tile_{tilename}.tif")
    input_rivers_path = os.path.join(out_base_path, "rivers", f"tile_{tilename}.tif")


    input_image = load_and_preprocess_image(input_hag_path, input_dem_path, input_roads_path, input_rivers_path)
    predicted_image = model.predict(input_image)
    predicted_image = np.squeeze(predicted_image)
    
    # Debug print to check if all outputs are the same
    #print("Unique values in predicted output:", np.unique(predicted_image))
    
    # Adjust the scaling factor based on how the labels were scaled during training
    predicted_image *= 100
    
    #output_image_path = f"{THEFOLDER}\\predictions\\predicted_tile_{tilename}.tif"
    predictions_folder = os.path.join(THEFOLDER, "predictions3")
    os.makedirs(predictions_folder, exist_ok=True)
    output_image_path = os.path.join(predictions_folder, f"predicted_tile_{tilename}.tif")

    
    with rasterio.open(input_dem_path) as src: 
        profile = src.profile
    
    with rasterio.open(output_image_path, 'w', **profile) as dst:
        dst.write(predicted_image.astype(rasterio.uint8), 1)

print("Done")


In [None]:
import os
import glob
import subprocess

# Define the base folder and output paths
#THEFOLDER = r"C:\Users\smdur\OneDrive\Desktop\GlobalPCL"
predictions_folder = os.path.join(THEFOLDER, "predictions3")
output_dir = os.path.join(THEFOLDER, "mergedoutput")

# Create necessary directories if they don't exist
os.makedirs(predictions_folder, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

output_base_name = "predMerged_"  # Base name for output files

# Get a list of TIFF files
tifs = glob.glob(os.path.join(predictions_folder, "*.tif"))

# Define chunk size for processing
chunk_size = 300

# Calculate the number of chunks needed
num_chunks = len(tifs) // chunk_size
if len(tifs) % chunk_size != 0:
    num_chunks += 1  # Add one more chunk for the remaining files

# Loop through the TIFF files in chunks
for chunk_id in range(num_chunks):
    start_idx = chunk_id * chunk_size
    end_idx = min((chunk_id + 1) * chunk_size, len(tifs))
    chunk_tifs = tifs[start_idx:end_idx]
    
    output_tif = os.path.join(output_dir, f"{output_base_name}{chunk_id + 1}.tif")

    merge_command_hag = [
        "python",
        "C:\\Users\\smdur\\anaconda3\\envs\\globalpcl\\Scripts\\gdal_merge.py",
        "--config", "CHECK_DISK_FREE_SPACE", "FALSE",
        "-o", output_tif,
        "-n", "-9999",
        "-a_nodata", "-9999",
    ] + chunk_tifs

    # Run the gdal_merge command for the current chunk
    process_hag = subprocess.run(merge_command_hag, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    # Check if the command for the current chunk was successful
    if process_hag.returncode != 0:
        # An error occurred, print the error
        print(f"Error occurred while merging TIFF files for chunk {chunk_id + 1}:")
        print(process_hag.stderr)
    else:
        print(f"TIFF files merged successfully for chunk {chunk_id + 1}. Output: {output_tif}")

# Merge all chunks into a final output file
final_output_tif = os.path.join(THEFOLDER, "FINALOUTPUTTILES", f"predMerged_PCL_{TILENUMBER}.tif")
os.makedirs(os.path.dirname(final_output_tif), exist_ok=True)

chunk_tifs = glob.glob(os.path.join(output_dir, "*.tif"))

merge_command_final = [
    "python",
    "C:\\Users\\smdur\\anaconda3\\envs\\globalpcl\\Scripts\\gdal_merge.py",
    "--config", "CHECK_DISK_FREE_SPACE", "FALSE",
    "-o", final_output_tif,
    "-n", "-9999",
    "-a_nodata", "-9999",
] + chunk_tifs

# Run the gdal_merge command for the final merge
process_final = subprocess.run(merge_command_final, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Check if the command for the final merge was successful
if process_final.returncode != 0:
    # An error occurred, print the error
    print("Error occurred while merging final TIFF files:")
    print(process_final.stderr)
else:
    print("Final TIFF files merged successfully.")

# Clean up temporary chunk files
for tif in chunk_tifs:
    try:
        os.remove(tif)
        print(f"Deleted {tif}")
    except Exception as e:
        print(f"Failed to delete {tif}: {e}")

print("Done")

import shutil

if os.path.exists(predictions_folder):
    shutil.rmtree(predictions_folder)
os.makedirs(predictions_folder, exist_ok=True)