# Unify rasters to a base raster

In [1]:
import os
from importlib.resources import files
from rasterio.enums import Resampling
from pathlib import Path
from tqdm import tqdm

from beak.utilities.io import save_raster, create_file_folder_list, create_file_list, check_path
from beak.utilities.raster_processing import unify_raster_grids

In [2]:
BASE_PATH = files("beak.data")
BASE_NAME = "LAWLEY22"
BASE_SPATIAL = "EPSG_32615_RES_50_0"

input_folder = BASE_PATH / BASE_NAME / "EXPORT" / "EPSG_4326_RES_0_015" / "CLIPPED_USC" / "CATEGORICAL"
output_folder = BASE_PATH / BASE_NAME / "EXPORT" / BASE_SPATIAL / "CATEGORICAL"
base_raster = BASE_PATH / "BASE_RASTERS" / str(BASE_SPATIAL + ".tif")

print(f"Input folder: {input_folder}")
print(f"Output folder: {output_folder}")
print(f"Base raster: {base_raster}")


Input folder: s:\projekte\20230082_darpa_criticalmaas_ta3\bearbeitung\github\beak-ta3\src\beak\data\LAWLEY22-EXPORT\EPSG_4326_RES_0_015\CLIPPED_USC\CATEGORICAL
Output folder: s:\projekte\20230082_darpa_criticalmaas_ta3\bearbeitung\github\beak-ta3\src\beak\data\LAWLEY22-EXPORT\EPSG_32615_RES_50_0\CATEGORICAL
Base raster: s:\projekte\20230082_darpa_criticalmaas_ta3\bearbeitung\github\beak-ta3\src\beak\data\BASE_RASTERS\EPSG_32615_RES_50_0.tif


**Check** files and folders, since there are a lot of categorical binarized rasters

In [3]:
# Check file and folder list
folders, _ = create_file_folder_list(input_folder)

file_list = []
for folder in folders:
  folder_files = create_file_list(folder, recursive=False)
  file_list.extend(folder_files)
  
print(f"Found {len(file_list)} files in {len(folders)} folders:")

Found 768 files in 43 folders:


**Run**

In [5]:
import multiprocessing as mp
from gc import collect

# Create folder list
folders, _ = create_file_folder_list(input_folder)
DRY_RUN = False
THREADS = mp.cpu_count()

for folder in tqdm(folders, total=len(folders), desc="Processing folders..."):
  # Create file list
  file_list = create_file_list(folder, recursive=False)
  
  if not os.path.exists(output_folder / folder.name):
    n_files = len(file_list)
    batch_size = THREADS
    
    for i in range(0, n_files, batch_size):
        batch_files = file_list[i:i+batch_size]
        batch_results = unify_raster_grids(base_raster, batch_files, resampling_method=Resampling.nearest, same_extent=True, same_shape=True, n_workers=THREADS)
        
        for i, file in enumerate(batch_files):
          unified_raster = batch_results[i][0]
          meta = batch_results[i][1]
          
          out_path = output_folder / folder.name / file.name
          check_path(Path(os.path.dirname(out_path)))
          
          if not DRY_RUN:
              save_raster(out_path, array=unified_raster, dtype="int8", metadata=meta)
          
          del unified_raster, meta
      
        del batch_results
        collect()


Processing folders...:   0%|          | 0/43 [00:00<?, ?it/s]

Processing folders...: 100%|██████████| 43/43 [05:10<00:00,  7.21s/it]
