In [1]:
import rasterio
from rasterio.windows import Window
import numpy as np
import cv2
import os
from tqdm import tqdm

In [2]:
def crop_and_save(image, x, y, size, save_path, file_name, is_binary=False, binary_threshold = 0.5):
    window = Window(x - size // 2, y - size // 2, size, size)
    cropped_image = image.read(window=window)
    if cropped_image.min() < 0:
        cropped_image = -cropped_image

    # Reshape and convert images
    if cropped_image.ndim == 3 and not is_binary:
        # For RGB or RGBA images
        if cropped_image.shape[0] == 3:  # RGB
            cropped_image = np.moveaxis(cropped_image, 0, -1)  # Rearrange bands to last dimension
        elif cropped_image.shape[0] == 4:  # RGBA
            # Convert RGBA to RGB by discarding the alpha channel
            cropped_image = np.moveaxis(cropped_image, 0, -1)[..., :3]
        else:
            print(cropped_image.shape[0])
            raise ValueError("Unexpected number of bands in image")
    else:
        # For single-band images (binary or grayscale)
        cropped_image = cropped_image.reshape(cropped_image.shape[1], cropped_image.shape[2])
        if is_binary:
            _, cropped_image = cv2.threshold(cropped_image, binary_threshold, 255, cv2.THRESH_BINARY)
        else:
            cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_GRAY2RGB)  # Convert grayscale to RGB

    cv2.imwrite(os.path.join(save_path, file_name), cropped_image)



In [3]:

def process_files(ground_truth_path, rgb_paths, stream_order_path, output_dir, crop_size=128):
    os.makedirs(output_dir, exist_ok=True)

    # Create subdirectories for different types of images
    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "stream_order")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        y_indices, x_indices = np.where(gt_data > 0)

        rgb_srcs = [rasterio.open(path) for path in rgb_paths]

        for x, y in zip(x_indices, y_indices):
            crop_and_save(stream_src, x, y, crop_size, stream_dir, f'KS1_stream_{x}_{y}.png', is_binary=True, binary_threshold=1)
            crop_and_save(gt_src, x, y, crop_size, gt_dir, f'KS1_ground_truth_{x}_{y}.png', is_binary=True, binary_threshold=0.5)
            for i, rgb_src in enumerate(rgb_srcs):
                crop_and_save(rgb_src, x, y, crop_size, rgb_dir, f'KS1_rgb_{i}_{x}_{y}.png')

    for src in rgb_srcs:
        src.close()

In [4]:
def save_tile(raster, window, output_path):
    tile = raster.read(window=window)
    transform = raster.window_transform(window)
    with rasterio.open(
        output_path,
        'w',
        driver='GTiff',
        height=window.height,
        width=window.width,
        count=raster.count,
        dtype=raster.dtypes[0],
        crs=raster.crs,
        transform=transform,
    ) as dst:
        dst.write(tile)

def crop_image(src, x, y, crop_size):
    window = rasterio.windows.Window(x - crop_size // 2, y - crop_size // 2, crop_size, crop_size)
    return window

def process_psoitive_files_with_overlap(ground_truth_path, 
                               rgb_paths, 
                               stream_order_path, 
                               output_dir, 
                               crop_size=128, 
                               overlap_rate=0.5,
                               tile_number = 0):
    
    os.makedirs(output_dir, exist_ok=True)

    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "dem")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        if gt_data.min() < 0:
            gt_data = -gt_data
        y_indices, x_indices = np.where(gt_data > 0)

        rgb_srcs = [rasterio.open(path) for path in rgb_paths]
        cropped_regions = []  # List to keep track of cropped regions
         # Initialize with the tile number
        
        overlap_th = crop_size * overlap_rate
        all_indices = zip(x_indices, y_indices)
        
        for x, y in tqdm(all_indices):
            # Check for overlap with existing cropped regions
            overlap = False
            for (prev_x, prev_y) in cropped_regions:
                if abs(prev_x - x) < overlap_th and abs(prev_y - y) < overlap_th:
                    overlap = True
                    break

            if overlap:
                continue  # Skip cropping this region due to overlap

            cropped_regions.append((x, y))
            # Crop and save as usual
            window = crop_image(stream_src, x, y, crop_size)
            save_tile(stream_src, window, os.path.join(stream_dir, f'dem_tile_{tile_number}.tif'))
            
            window = crop_image(gt_src, x, y, crop_size)
            save_tile(gt_src, window, os.path.join(gt_dir, f'ground_truth_tile_{tile_number}.tif'))

            for i, rgb_src in enumerate(rgb_srcs):
                window = crop_image(rgb_src, x, y, crop_size)
                save_tile(rgb_src, window, os.path.join(rgb_dir, f'rgb_{i}_tile_{tile_number}.tif'))

            tile_number += 1

        for src in rgb_srcs:
            src.close()
    
    return tile_number

def process_files_with_negative_check(ground_truth_path, 
                                      rgb_paths, 
                                      stream_order_path, 
                                      output_dir, 
                                      crop_size=128, 
                                      overlap_rate=0.5, 
                                      buffer_size=50,
                                      tile_number = 0):
    os.makedirs(output_dir, exist_ok=True)

    gt_dir = os.path.join(output_dir, "ground_truth")
    stream_dir = os.path.join(output_dir, "dem")
    rgb_dir = os.path.join(output_dir, "rgb_images")
    os.makedirs(gt_dir, exist_ok=True)
    os.makedirs(stream_dir, exist_ok=True)
    os.makedirs(rgb_dir, exist_ok=True)

    with rasterio.open(ground_truth_path) as gt_src, rasterio.open(stream_order_path) as stream_src:
        gt_data = gt_src.read(1)
        if gt_data.min() < 0:
            gt_data = -gt_data
        positive_points = np.argwhere(gt_data > 0)
        cropped_regions = []
          # Initialize tile number counter

        for px, py in tqdm(positive_points):
            for dx in range(-buffer_size, buffer_size + 1, crop_size):
                for dy in range(-buffer_size, buffer_size + 1, crop_size):
                    x, y = px + dx, py + dy
                    if not (0 <= x < gt_src.width and 0 <= y < gt_src.height):
                        continue  # Ensure within bounds

                    window = crop_image(gt_src, x, y, crop_size)
                    if any(np.sqrt((prev_x - window.col_off)**2 + (prev_y - window.row_off)**2) < overlap_rate * crop_size for prev_x, prev_y in cropped_regions):
                        continue  # Check for overlap

                    cropped_gt = gt_src.read(1, window=window)
                    if np.any(cropped_gt > 0):
                        continue  # Ensure no ground truth lines are included

                    cropped_regions.append((window.col_off, window.row_off))
                    save_tile(gt_src, window, os.path.join(gt_dir, f'negative_ground_truth_tile_{tile_number}.tif'))
                    save_tile(stream_src, window, os.path.join(stream_dir, f'dem_tile_{tile_number}.tif'))

                    for i, rgb_path in enumerate(rgb_paths):
                        with rasterio.open(rgb_path) as rgb_src:
                            save_tile(rgb_src, window, os.path.join(rgb_dir, f'rgb_{i}_tile_{tile_number}.tif'))

                    tile_number += 1  # Increment tile number after each successful save

        print(f"Total regions saved: {len(cropped_regions)}")
    
    return tile_number

In [5]:
root_paths = ['/home/macula/SMATousi/Gullies/ground_truth/organized_data/HUCs/']

# root_paths = ['/home/macula/SMATousi/Gullies/ground_truth/organized_data/Google_Lines_Paper_Evaluation/downloaded_raw_1/merged/']

# pos_output_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/Google_Lines_Paper_Evaluation/downloaded_raw_1/pos'
# neg_output_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/Google_Lines_Paper_Evaluation/downloaded_raw_1/neg'

pos_output_dir = '/home1/pupil/SMATousi/Pos_Neg_224/pos/'
neg_output_dir = '/home1/pupil/SMATousi/Pos_Neg_224/neg/'

starting_pos_tile_number = 0
starting_neg_tile_number = 0

last_neg_tile_number = 0
last_pos_tile_number = 0

for root_path in root_paths:

    all_hucs = os.listdir(root_path)

    for huc_name in all_hucs:

        if huc_name.endswith("done"):

            print("Starting with HUC: ", huc_name)

            huc_path = os.path.join(root_path, huc_name)
            data_path = os.path.join(huc_path, "data")

            GT_path = os.path.join(data_path, "gt/rasterized_gt.tif")

            rgb_paths = [os.path.join(data_path,'merged/tile_10__merged.tif'), 
                         os.path.join(data_path,'merged/tile_12__merged.tif'), 
                         os.path.join(data_path,'merged/tile_14__merged.tif'), 
                         os.path.join(data_path,'merged/tile_16__merged.tif'),
                         os.path.join(data_path,'merged/tile_18__merged.tif'), 
                         os.path.join(data_path,'merged/tile_20__merged.tif')]

            dem_path = os.path.join(data_path,'merged/dem_tile__merged.tif')

            starting_pos_tile_number = last_neg_tile_number
#             starting_neg_tile_number = last_neg_tile_number

            try:

                last_pos_tile_number = process_psoitive_files_with_overlap(GT_path, 
                                                                   rgb_paths, 
                                                                   dem_path, 
                                                                   pos_output_dir, 
                                                                   crop_size=224, 
                                                                   overlap_rate=0.25,
                                                                   tile_number=starting_pos_tile_number)

                last_neg_tile_number = process_files_with_negative_check(GT_path, 
                                                                  rgb_paths, 
                                                                  dem_path, 
                                                                  neg_output_dir, 
                                                                  crop_size=224, 
                                                                  overlap_rate=0.25,  
                                                                  buffer_size=10,
                                                                  tile_number=last_pos_tile_number)
            except:
                print("Error in HUC: ", huc_name)
                continue

    #     break

Starting with HUC:  HUC_110300170302-done


11479it [01:49, 104.40it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 11479/11479 [01:25<00:00, 133.76it/s]


Total regions saved: 133
Starting with HUC:  HUC_102300031003-done


8402it [01:35, 88.25it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 8402/8402 [01:32<00:00, 91.24it/s]


Total regions saved: 155
Starting with HUC:  HUC_102702060102-done


9067it [01:42, 88.81it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 9067/9067 [01:49<00:00, 83.14it/s]


Total regions saved: 151
Starting with HUC:  HUC_070801030408-done


3575it [00:46, 76.56it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 3575/3575 [00:44<00:00, 81.08it/s]


Total regions saved: 62
Starting with HUC:  HUC_110702020102-done


4478it [00:38, 116.85it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 4478/4478 [00:28<00:00, 156.77it/s]


Total regions saved: 38
Starting with HUC:  HUC_102002030803-done


6497it [01:18, 83.07it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 6497/6497 [01:18<00:00, 82.89it/s]


Total regions saved: 114
Starting with HUC:  HUC_102600080205-done


7606it [01:07, 111.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 7606/7606 [00:46<00:00, 162.88it/s]


Total regions saved: 77
Starting with HUC:  HUC_110300090303-done


9907it [01:42, 97.06it/s] 
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 9907/9907 [00:31<00:00, 316.76it/s]


Total regions saved: 44
Starting with HUC:  HUC_110300090307-done


9305it [01:36, 96.58it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 9305/9305 [01:34<00:00, 98.13it/s]


Total regions saved: 137
Starting with HUC:  HUC_102300030509-done


8081it [01:44, 77.08it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 8081/8081 [02:51<00:00, 47.15it/s]


Total regions saved: 163
Starting with HUC:  HUC_071000040910-done


7764it [01:18, 98.83it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 7764/7764 [01:48<00:00, 71.77it/s]


Total regions saved: 137
Starting with HUC:  HUC_070802050807-done


6340it [01:30, 70.28it/s] 
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 6340/6340 [00:53<00:00, 119.35it/s]


Total regions saved: 65
Starting with HUC:  HUC_102100070602-done


18865it [02:54, 108.37it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 18865/18865 [03:20<00:00, 94.16it/s]


Total regions saved: 311
Starting with HUC:  HUC_102702050101-done


3760it [00:44, 84.22it/s] 
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 3760/3760 [00:37<00:00, 100.40it/s]


Total regions saved: 47
Starting with HUC:  HUC_102600100407-done


6629it [01:04, 103.18it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 6629/6629 [01:10<00:00, 94.37it/s]


Total regions saved: 96
Starting with HUC:  HUC_070801050302-done


4591it [00:58, 77.86it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 4591/4591 [01:03<00:00, 72.05it/s]


Total regions saved: 91
Starting with HUC:  HUC_102200011306-done


14721it [02:00, 121.68it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 14721/14721 [01:55<00:00, 127.42it/s]


Total regions saved: 174
Starting with HUC:  HUC_102901010502-done


7447it [01:46, 70.14it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 7447/7447 [02:32<00:00, 48.82it/s]


Total regions saved: 124
Starting with HUC:  HUC_102702070207-done


2318it [02:04, 18.58it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 2318/2318 [01:37<00:00, 23.77it/s]


Total regions saved: 44
Starting with HUC:  HUC_071100060307-done


6534it [03:33, 30.56it/s] 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 6534/6534 [01:08<00:00, 94.79it/s]

Total regions saved: 68





In [19]:
last_neg_tile_number

241