# Reprojection

In [1]:
import os
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

def reproject_raster_to_wgs(input_path, output_path, target_crs="EPSG:4326"):
    """
    Reprojects the input raster to the target CRS (default WGS84) and writes the output.
    
    Parameters:
      input_path (str): Path to the input raster (assumed to be in UTM or any CRS).
      output_path (str): Path where the reprojected raster will be saved.
      target_crs (str): Target coordinate reference system (default "EPSG:4326").
    """
    with rasterio.open(input_path) as src:
        # If the raster is already in the target CRS, simply copy it.
        if src.crs.to_string() == target_crs:
            print("Raster is already in the target CRS. Copying file.")
            with rasterio.open(output_path, 'w', **src.meta) as dst:
                for i in range(1, src.count + 1):
                    dst.write(src.read(i), i)
            return

        # Calculate the transform and dimensions for the new CRS.
        transform, width, height = calculate_default_transform(
            src.crs, target_crs, src.width, src.height, *src.bounds
        )
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': target_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        # Reproject and write to the output file.
        with rasterio.open(output_path, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=target_crs,
                    resampling=Resampling.nearest
                )
    print(f"Reprojected raster saved to {output_path}")

In [None]:
input_raster = "/home1/choroid/SMATousi/High_Resolution_Tiles/Monroe.tif"  # Replace with your UTM raster file path
output_dir = "/home1/choroid/SMATousi/High_Resolution_Tiles/"              # Replace with your desired output directory
os.makedirs(output_dir, exist_ok=True)

output_raster = os.path.join(output_dir, "Monroe_WGS.tif")
reproject_raster_to_wgs(input_raster, output_raster)

In [3]:
import os
import numpy as np
import rasterio
import rasterio.windows
from tqdm import tqdm

def save_tile(raster, window, output_path):
    width = int(window.width)
    height = int(window.height)
    if width <= 0 or height <= 0:
        print(f"Skipping tile {output_path} because computed window size is {width}x{height}")
        return
    tile = raster.read(window=window)
    transform = raster.window_transform(window)
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=height,
        width=width,
        count=raster.count,
        dtype=raster.dtypes[0],
        crs=raster.crs,
        transform=transform,
    ) as dst:
        dst.write(tile)

def get_window_from_center_geo(center_x, center_y, tile_geo_size, transform):
    """
    Given a geospatial center (center_x, center_y) and a tile size in coordinate units,
    compute the bounds and return a window using the provided transform.
    """
    half_size = tile_geo_size / 2.0
    left = center_x - half_size
    right = center_x + half_size
    bottom = center_y - half_size
    top = center_y + half_size
    window = rasterio.windows.from_bounds(left, bottom, right, top, transform=transform)
    return window

def process_psoitive_files_with_overlap(ground_truth_path, rgb_paths, stream_order_path, output_dir, tile_geo_size, overlap_rate=0.5, tile_number=0):
    """
    Processes positive tiles using a geospatial tile extent (tile_geo_size). For each detected center
    in the ground truth (where pixel value > 0), the geospatial coordinate is computed and a window is
    derived for each source using its own transform. This ensures that the pixel dimensions of the resulting
    tile reflect the source’s resolution.
    """
    os.makedirs(output_dir, exist_ok=True)
    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "dem")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        if gt_data.min() < 0:
            gt_data = -gt_data
        # np.where returns (rows, cols). Here x is column and y is row.
        y_indices, x_indices = np.where(gt_data > 0)
        all_indices = zip(x_indices, y_indices)

        # Open all RGB sources (which may have different resolutions)
        rgb_srcs = [rasterio.open(path) for path in rgb_paths]

        # Track geospatial centers to avoid overlap (distance in coordinate units)
        cropped_centers = []
        overlap_threshold = tile_geo_size * overlap_rate

        for x, y in tqdm(all_indices):
            # Convert the ground truth pixel (x,y) to a geospatial coordinate.
            center_x, center_y = rasterio.transform.xy(gt_src.transform, y, x, offset='center')

            # Skip if this center is too close to an already processed center.
            if any(np.sqrt((prev_x - center_x)**2 + (prev_y - center_y)**2) < overlap_threshold 
                   for prev_x, prev_y in cropped_centers):
                continue
            cropped_centers.append((center_x, center_y))

            # For ground truth and DEM, compute the window from the geospatial center.
            gt_window = get_window_from_center_geo(center_x, center_y, tile_geo_size, gt_src.transform)
            stream_window = get_window_from_center_geo(center_x, center_y, tile_geo_size, stream_src.transform)
            save_tile(gt_src, gt_window, os.path.join(gt_dir, f'ground_truth_tile_{tile_number}.tif'))
            save_tile(stream_src, stream_window, os.path.join(stream_dir, f'dem_tile_{tile_number}.tif'))

            # For each RGB image, compute its corresponding window using its own transform.
            target_src = rgb_srcs[0].transform
            for i, rgb_src in enumerate(rgb_srcs):
                rgb_window = get_window_from_center_geo(center_x, center_y, tile_geo_size, target_src)
                if int(rgb_window.width) <= 0 or int(rgb_window.height) <= 0:
                    print(f"Skipping RGB tile for tile {tile_number} due to invalid window size.")
                    continue
                save_tile(rgb_src, rgb_window, os.path.join(rgb_dir, f'rgb_{i}_tile_{tile_number}.tif'))
            
            tile_number += 1
            break

        for src in rgb_srcs:
            src.close()
    
    return tile_number

def process_files_with_negative_check(ground_truth_path, rgb_paths, stream_order_path, output_dir, tile_geo_size, overlap_rate=0.5, buffer_size=50, tile_number=0):
    """
    Processes negative tiles. For each candidate location (determined by a buffer around positive points),
    the geospatial center is computed and used to derive windows for ground truth, DEM, and RGB images.
    Only tiles that do not contain any positive ground truth values are saved.
    """
    os.makedirs(output_dir, exist_ok=True)
    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "dem")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        if gt_data.min() < 0:
            gt_data = -gt_data
        positive_points = np.argwhere(gt_data > 0)
        cropped_centers = []

        for py, px in tqdm(positive_points):
            # Iterate around the positive point within the buffer.
            for dx in range(-buffer_size, buffer_size + 1, 1):
                for dy in range(-buffer_size, buffer_size + 1, 1):
                    x = px + dx
                    y = py + dy
                    if not (0 <= x < gt_src.width and 0 <= y < gt_src.height):
                        continue
                    center_x, center_y = rasterio.transform.xy(gt_src.transform, y, x, offset='center')
                    if any(np.sqrt((prev_x - center_x)**2 + (prev_y - center_y)**2) < overlap_rate * tile_geo_size 
                           for prev_x, prev_y in cropped_centers):
                        continue

                    # Compute window for ground truth.
                    gt_window = get_window_from_center_geo(center_x, center_y, tile_geo_size, gt_src.transform)
                    cropped_gt = gt_src.read(1, window=gt_window)
                    if np.any(cropped_gt > 0):
                        continue  # Skip if this region contains positive values.

                    cropped_centers.append((center_x, center_y))
                    save_tile(gt_src, gt_window, os.path.join(gt_dir, f'negative_ground_truth_tile_{tile_number}.tif'))
                    save_tile(stream_src, gt_window, os.path.join(stream_dir, f'dem_tile_{tile_number}.tif'))

                    # For each RGB image, compute its corresponding window.
                    for i, rgb_path in enumerate(rgb_paths):
                        with rasterio.open(rgb_path) as rgb_src:
                            rgb_window = get_window_from_center_geo(center_x, center_y, tile_geo_size, rgb_src.transform)
                            if int(rgb_window.width) <= 0 or int(rgb_window.height) <= 0:
                                print(f"Skipping RGB negative tile for tile {tile_number} due to invalid window size.")
                                continue
                            save_tile(rgb_src, rgb_window, os.path.join(rgb_dir, f'rgb_{i}_tile_{tile_number}.tif'))
                    
                    tile_number += 1

        print(f"Total regions saved: {len(cropped_centers)}")
    
    return tile_number


In [4]:
GT_path = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/MO_Downloaded_HUCs/HUC_071100060307-done/data/gt/rasterized_gt.tif'

data_path = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/MO_Downloaded_HUCs/HUC_071100060307-done/data/'
rgb_paths = [os.path.join(data_path,'merged/tile_10__merged.tif'), 
             os.path.join(data_path,'merged/tile_12__merged.tif'), 
             os.path.join(data_path,'merged/tile_14__merged.tif'), 
             os.path.join(data_path,'merged/tile_16__merged.tif'),
             os.path.join(data_path,'merged/tile_18__merged.tif'), 
             os.path.join(data_path,'merged/tile_20__merged.tif'),
             '/home1/choroid/SMATousi/High_Resolution_Tiles/Monroe.tif']

dem_path = os.path.join(data_path,'merged/dem_tile__merged.tif')

pos_output_dir = '/home1/choroid/SMATousi/High_Resolution_Tiles/Tiled_test/'

starting_pos_tile_number = 0

last_pos_tile_number = process_psoitive_files_with_overlap(GT_path, 
                                                           rgb_paths, 
                                                           dem_path, 
                                                           pos_output_dir, 
                                                           crop_size=128, 
                                                           overlap_rate=0.25,
                                                           tile_number=starting_pos_tile_number)

TypeError: process_psoitive_files_with_overlap() got an unexpected keyword argument 'crop_size'

In [None]:
calculate_default_transform

In [None]:
import pyproj

pyproj.datadir.get_data_dir()