In [2]:
import os
import rasterio
from rasterio.warp import transform_bounds
from shapely.geometry import box, Point
import json
from tqdm import tqdm

def get_tif_info(folder_path):
    tif_data = {}

    for file in tqdm(os.listdir(folder_path), desc='Reading the Info'):
        if file.endswith(".tif"):
            file_path = os.path.join(folder_path, file)
            with rasterio.open(file_path) as src:
                bounds = src.bounds  # (left, bottom, right, top)
                center_x = (bounds.left + bounds.right) / 2
                center_y = (bounds.bottom + bounds.top) / 2
                metadata = src.meta  # Raster metadata
                
                tif_data[file] = {
                    "file_path": file_path,
                    "bounds": {
                        "left": bounds.left,
                        "bottom": bounds.bottom,
                        "right": bounds.right,
                        "top": bounds.top
                    },
                    "center_point": (center_x, center_y),
                    "metadata": metadata
                }
    
    return tif_data

def find_tifs_in_tile(tif_data, tile_bounds):
    """
    Given a square tile (min_x, min_y, max_x, max_y), find which TIF files intersect with it.
    """
    tile_geom = box(*tile_bounds)
    intersecting_tifs = []

    for tif_name, tif_info in tif_data.items():
        tif_bounds = tif_info["bounds"]
        tif_geom = box(tif_bounds["left"], tif_bounds["bottom"], tif_bounds["right"], tif_bounds["top"])

        if tif_geom.intersects(tile_geom):
            intersecting_tifs.append(tif_name)
    
    return intersecting_tifs




In [3]:
# Example usage
folder_path = "/home1/choroid/SMATousi/High_Res_Data/2023/DaviessHarrison/DaviessHarrisonCountyHUC/"  # Change to your folder path
tif_data = get_tif_info(folder_path)


Reading the Info: 100%|██████████████████████████████████████████████████████████████████████| 118/118 [00:00<00:00, 237.21it/s]


In [5]:
import os
import rasterio
import numpy as np
from rasterio.warp import reproject, Resampling, calculate_default_transform
from rasterio.windows import Window
from shapely.geometry import box
from tqdm import tqdm

# Define the UTM Zone 15 EPSG code
TARGET_CRS = "EPSG:32615"

def reproject_to_utm15(input_path, output_path):
    """
    Reprojects a given raster file to UTM Zone 15.
    """
    with rasterio.open(input_path) as src:
        transform, width, height = calculate_default_transform(
            src.crs, TARGET_CRS, src.width, src.height, *src.bounds
        )
        
        profile = src.profile
        profile.update({
            "crs": TARGET_CRS,
            "transform": transform,
            "width": width,
            "height": height
        })

        with rasterio.open(output_path, "w", **profile) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=TARGET_CRS,
                    resampling=Resampling.nearest
                )


def find_tifs_containing_tile(tif_data, tile_bounds):
    """
    Given a square tile (min_x, min_y, max_x, max_y), find which TIF files contain or intersect it.
    """
    tile_geom = box(*tile_bounds)
    intersecting_tifs = []

    for tif_name, tif_info in tif_data.items():
        tif_bounds = tif_info["bounds"]
        tif_geom = box(tif_bounds["left"], tif_bounds["bottom"], tif_bounds["right"], tif_bounds["top"])

        if tif_geom.intersects(tile_geom):
            intersecting_tifs.append(tif_name)
    
    return intersecting_tifs

def save_tile(raster, window, output_path):
    """
    Crops and saves a window from the raster.
    """
    tile = raster.read(window=window)
    transform = raster.window_transform(window)
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=window.height,
        width=window.width,
        count=raster.count,
        dtype=raster.dtypes[0],
        crs=raster.crs,
        transform=transform,
    ) as dst:
        dst.write(tile)

def crop_image(src, x, y, crop_size):
    """
    Creates a rasterio window centered at (x, y).
    """
    window = Window(x - crop_size // 2, y - crop_size // 2, crop_size, crop_size)
    return window

def process_positive_files_with_overlap(ground_truth_path, 
                                        rgb_paths, 
                                        stream_order_path, 
                                        output_dir, 
                                        crop_size=128, 
                                        overlap_rate=0.5,
                                        tile_number=0):
    """
    Processes positive regions with overlap checking.
    """
    os.makedirs(output_dir, exist_ok=True)

    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "dem")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        if gt_data.min() < 0:
            gt_data = -gt_data
        y_indices, x_indices = np.where(gt_data > 0)

        rgb_srcs = [rasterio.open(path) for path in rgb_paths]
        cropped_regions = []
        overlap_th = crop_size * overlap_rate
        all_indices = zip(x_indices, y_indices)
        
        for x, y in tqdm(all_indices):
            # Check for overlap with existing cropped regions
            overlap = any(abs(prev_x - x) < overlap_th and abs(prev_y - y) < overlap_th for prev_x, prev_y in cropped_regions)
            if overlap:
                continue  # Skip cropping this region

            cropped_regions.append((x, y))
            window = crop_image(stream_src, x, y, crop_size)
            save_tile(stream_src, window, os.path.join(stream_dir, f'dem_tile_{tile_number}.tif'))
            
            window = crop_image(gt_src, x, y, crop_size)
            save_tile(gt_src, window, os.path.join(gt_dir, f'ground_truth_tile_{tile_number}.tif'))

            for i, rgb_src in enumerate(rgb_srcs):
                window = crop_image(rgb_src, x, y, crop_size)
                save_tile(rgb_src, window, os.path.join(rgb_dir, f'rgb_{i}_tile_{tile_number}.tif'))

            tile_number += 1

        for src in rgb_srcs:
            src.close()
    
    return tile_number

def process_negative_files(ground_truth_path, 
                           rgb_paths, 
                           stream_order_path, 
                           output_dir, 
                           crop_size=128, 
                           overlap_rate=0.5, 
                           buffer_size=50,
                           tile_number=0):
    """
    Processes negative regions with overlap checking.
    """
    os.makedirs(output_dir, exist_ok=True)

    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "dem")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        if gt_data.min() < 0:
            gt_data = -gt_data
        positive_points = np.argwhere(gt_data > 0)
        cropped_regions = []

        for px, py in tqdm(positive_points):
            for dx in range(-buffer_size, buffer_size + 1, crop_size):
                for dy in range(-buffer_size, buffer_size + 1, crop_size):
                    x, y = px + dx, py + dy
                    if not (0 <= x < gt_src.width and 0 <= y < gt_src.height):
                        continue  

                    window = crop_image(gt_src, x, y, crop_size)
                    if any(np.sqrt((prev_x - window.col_off)**2 + (prev_y - window.row_off)**2) < overlap_rate * crop_size for prev_x, prev_y in cropped_regions):
                        continue  

                    cropped_gt = gt_src.read(1, window=window)
                    if np.any(cropped_gt > 0):
                        continue  

                    cropped_regions.append((window.col_off, window.row_off))
                    save_tile(gt_src, window, os.path.join(gt_dir, f'negative_gt_tile_{tile_number}.tif'))

                    tile_number += 1 

    return tile_number


# Reprojecting to UTM

In [11]:
source_data_path = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/MO_Downloaded_HUCs/HUC_071100060307-done/data/merged/'
dest_data_path = '/home1/choroid/SMATousi/High_Res_Data/final_raw_data/HUC_071100060307/NAIP/'
list_of_tiles = os.listdir(source_data_path)

for file in list_of_tiles:
    if file.endswith('.tif'):

        source_file_path = os.path.join(source_data_path,file)
        dest_file_path = os.path.join(dest_data_path,f"UTM_{file}")
        print(f"Doing {file} -- ")
        reproject_to_utm15(source_file_path,dest_file_path)

Doing tile_20__merged.tif -- 
Doing tile_14__merged.tif -- 
Doing tile_16__merged.tif -- 
Doing tile_18__merged.tif -- 
Doing dem_tile__merged.tif -- 
Doing tile_10__merged.tif -- 
Doing tile_12__merged.tif -- 
