Cropping the original .tif aerial imagery (12500 × 12500) tiles into n × n patches (e.g., 1024 × 1024)

In [3]:
import os
import rasterio
from rasterio.windows import Window
import numpy as np
from tqdm import tqdm

def save_padded_tile(src, x, y, tile_size, out_path):
    width = min(tile_size, src.width - x)
    height = min(tile_size, src.height - y)

    window = Window(x, y, width, height)
    transform = src.window_transform(window)
    data = src.read(window=window)

    padded = np.zeros((src.count, tile_size, tile_size), dtype=src.dtypes[0])
    padded[:, :height, :width] = data

    profile = src.profile.copy()
    profile.update({
        "height": tile_size,
        "width": tile_size,
        "transform": rasterio.Affine(transform.a, transform.b, transform.c,
                                     transform.d, transform.e, transform.f)
    })

    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(padded)

def tile_geotiff_for_prediction(tif_path, out_img_dir, tile_size=1024):
    os.makedirs(out_img_dir, exist_ok=True)
    img_name = os.path.splitext(os.path.basename(tif_path))[0]

    with rasterio.open(tif_path) as src:
        for y in range(0, src.height, tile_size):
            for x in range(0, src.width, tile_size):
                tile_name = f"i_{img_name}_tile_{x}_{y}.tif"
                out_path = os.path.join(out_img_dir, tile_name)
                if os.path.exists(out_path):
                    continue
                save_padded_tile(src, x, y, tile_size, out_path)

def main():
    # Explicitly define only the 3 TIFF files you want to tile
    tif_files = [
        "/shared/data/climateplus2025/CapeTown_Image_2023_3samples_poster/2023_RGB_8cm_W24A_17.tif",
        "/shared/data/climateplus2025/CapeTown_Image_2023_3samples_poster/2023_RGB_8cm_W25C_16.tif",
        "/shared/data/climateplus2025/CapeTown_Image_2023_3samples_poster/2023_RGB_8cm_W57B_8.tif"
    ]

    out_img_dir = "/shared/data/climateplus2025/CapeTown_Image_2023_3samples_poster_1024size_Nov20/original_cropped_images"
    tile_size = 1024

    os.makedirs(out_img_dir, exist_ok=True)

    for tif_path in tqdm(tif_files, desc="Tiling only 3 specified images (1024x1024)"):
        tile_geotiff_for_prediction(tif_path, out_img_dir, tile_size)

    print("Finished tiling only the selected 3 images.")

if __name__ == "__main__":
    main()


Tiling only 3 specified images (1024x1024): 100%|██████████| 3/3 [00:42<00:00, 14.25s/it]

Finished tiling only the selected 3 images.





It's also possible to crop images at folder level

In [None]:
## Image Tiling for Prediction (nxn) with Padding
## Skip damagged TIFFs and Resume Incomplete Work
## For the list of corrupted TIFFs, see 'corrupted_tifs.txt'

import os
import rasterio
from rasterio.windows import Window
import numpy as np
from tqdm import tqdm

CORRUPT_LOG = "corrupted_tifs.txt"

def save_padded_tile(src, x, y, tile_size, out_path):
    width = min(tile_size, src.width - x)
    height = min(tile_size, src.height - y)

    window = Window(x, y, width, height)
    transform = src.window_transform(window)
    data = src.read(window=window)

    padded = np.zeros((src.count, tile_size, tile_size), dtype=src.dtypes[0])
    padded[:, :height, :width] = data

    profile = src.profile.copy()
    profile.update({
        "height": tile_size,
        "width": tile_size,
        "transform": rasterio.Affine(
            transform.a, transform.b, transform.c,
            transform.d, transform.e, transform.f
        )
    })

    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(padded)

def tile_geotiff_for_prediction(tif_path, out_img_dir, tile_size=320):
    os.makedirs(out_img_dir, exist_ok=True)
    img_name = os.path.splitext(os.path.basename(tif_path))[0]

    # Try opening — skip if corrupted
    try:
        src = rasterio.open(tif_path)
    except Exception as e:
        with open(CORRUPT_LOG, "a") as f:
            f.write(f"{tif_path} | ERROR: {str(e)}\n")
        return  # skip this broken TIFF

    with src:
        for y in range(0, src.height, tile_size):
            for x in range(0, src.width, tile_size):
                tile_name = f"i_{img_name}_tile_{x}_{y}.tif"
                out_path = os.path.join(out_img_dir, tile_name)

                # Resume logic — skip if tile already exists
                if os.path.exists(out_path):
                    continue

                try:
                    save_padded_tile(src, x, y, tile_size, out_path)
                except Exception as e:
                    # If tiling error occurs (rare), log tile-level error
                    with open(CORRUPT_LOG, "a") as f:
                        f.write(f"{out_path} | TILE_ERROR: {str(e)}\n")
                    continue

def main():
    tif_dir = "/data/data/capetown_bc_2025/Data/CapeTown_Image_2022_original"
    out_img_dir = "/data/data/capetown_bc_2025/Data/CapeTown_Image_2022_cropped_1024"
    tile_size = 1024

    tif_files = []
    for root, _, files in os.walk(tif_dir):
        for f in files:
            if f.lower().endswith(".tif"):
                tif_files.append(os.path.join(root, f))

    print("Number of TIFF files:", len(tif_files))

    for tif_path in tqdm(tif_files, desc="Tiling images for prediction (nxn)"):
        tile_geotiff_for_prediction(tif_path, out_img_dir, tile_size)

    print("Finished tiling. Corrupted files logged to:", CORRUPT_LOG)

if __name__ == "__main__":
    main()
