In [1]:
import glob
import os
import shutil

import pandas as pd
import rasterio
import numpy as np
from tqdm import tqdm

In [2]:
basepath = "D:/users/holgerv/Ditches"

In [3]:
files = glob.glob(f"{basepath}/working/deep_learning/data/dem_1m_hpmf/*tif")

# Use HPMF statistics to assign lower and upper value boundaries

In [4]:
# Read HPMF statistics
stats_df = pd.read_csv(f"{basepath}/working/dem_1m_hpmf_stats.csv", encoding="utf-8")

In [5]:
stats_df

Unnamed: 0,file,hpmf_min,hpmf_max,hpmf_mean,hpmf_percentile_1,hpmf_percentile_10,hpmf_percentile_90,hpmf_percentile_99
0,D:/users/holgerv/Ditches/working/deep_learning...,-0.76,0.93,0.000417,-0.390000,-0.24,-0.09,-0.09
1,D:/users/holgerv/Ditches/working/deep_learning...,-1.90,1.08,0.000589,-0.440000,-0.25,-0.09,-0.09
2,D:/users/holgerv/Ditches/working/deep_learning...,-1.66,1.62,0.002162,-0.470000,-0.31,-0.10,-0.10
3,D:/users/holgerv/Ditches/working/deep_learning...,-1.50,1.44,0.004625,-0.410000,-0.25,-0.09,-0.09
4,D:/users/holgerv/Ditches/working/deep_learning...,-0.96,2.30,0.006886,-0.410000,-0.21,-0.09,-0.09
...,...,...,...,...,...,...,...,...
2068,D:/users/holgerv/Ditches/working/deep_learning...,-0.52,0.81,0.008336,-0.310000,-0.18,-0.09,-0.09
2069,D:/users/holgerv/Ditches/working/deep_learning...,-0.98,1.09,0.003026,-0.330000,-0.15,-0.08,-0.08
2070,D:/users/holgerv/Ditches/working/deep_learning...,-0.65,0.78,0.002513,-0.290000,-0.16,-0.08,-0.07
2071,D:/users/holgerv/Ditches/working/deep_learning...,-0.58,0.41,0.006919,-0.340000,-0.18,-0.09,-0.09


In [6]:
stats_df["hpmf_min"].describe(percentiles=[0.01, 0.05, 0.1])

count    2073.000000
mean       -1.237062
std         0.735341
min       -18.440000
1%         -4.028800
5%         -2.280000
10%        -1.808000
50%        -1.090000
max        -0.110000
Name: hpmf_min, dtype: float64

In [7]:
stats_df["hpmf_max"].describe(percentiles=[0.9, 0.95, 0.99])

count    2073.000000
mean        1.482142
std         1.000186
min         0.090000
50%         1.260000
90%         2.108000
95%         2.814000
99%         5.249200
max        14.690000
Name: hpmf_max, dtype: float64

In [8]:
# Get lower HPMF boundary based on first percentile of minimum values
lower_boundary = stats_df["hpmf_min"].describe(percentiles=[0.01])["1%"]

In [9]:
# Get upper HPMF boundary based on 99th percentile of maximum values
upper_boundary = stats_df["hpmf_max"].describe(percentiles=[0.99])["99%"]

In [10]:
display(lower_boundary)
display(upper_boundary)

-4.0288

5.249200000000028

# Normalize HPMF rasters

In [11]:
# Normalize HPMF raster
def normalize_hpmf_raster(in_fp: str, lower_boundary: float, upper_boundary: float, out_fp: str):
    
    with rasterio.open(in_fp) as src:
        
        # Read array
        array = src.read(1)
        
        # Get profile
        out_profile = src.profile
        
        # Replace no data values
        out_nodata = np.nan
        array = np.where(array == out_profile["nodata"], np.nan, array)
        
        # Cap outliers below lower boundary
        array = np.where(array <= lower_boundary, lower_boundary, array)
        
        # Cap outliers above upper boundary
        array = np.where(array >= upper_boundary, upper_boundary, array)
        
        # Normalize based on lower and upper boundaries
        array_norm = (array - lower_boundary) / (upper_boundary - lower_boundary)
        
        # Update profile
        out_profile["nodata"] = out_nodata
        
        # Write to raster
        with rasterio.open(out_fp, "w", **out_profile) as dst:
            dst.write(array_norm, 1)
            
    return

In [12]:
# Output directory
out_dirpath = f"{basepath}/working/deep_learning/data/dem_1m_hpmf_norm"
if os.path.exists(out_dirpath):
    shutil.rmtree(out_dirpath)
os.mkdir(out_dirpath)

In [13]:
%%time

# Loop over HPMF files and normalize them
for file in tqdm(files, position=0, leave=True):
    out_basename = "_".join([os.path.basename(file).split(".")[0], "norm"]) + ".tif"
    out_fp = f"{out_dirpath}/{out_basename}"
    normalize_hpmf_raster(file, lower_boundary, upper_boundary, out_fp)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2073/2073 [1:08:16<00:00,  1.98s/it]

CPU times: total: 1h 7min 54s
Wall time: 1h 8min 16s



