# Convert raster to tiled ML-ready data

ers and tif files are common for geoscience. We need a quick way to tile these and save them to our standard numpy tile dataset.

This notebook defines functions which:
1. convert a tif to a pytorch tensor
1. pads any nan areas with reflection padding 
1. unfolds the tensor in each direction (read torch .fold() / .unfold())
1. stacks those into a "batch" of tiles
1. generates a selection of indices for validation and training data
1. saves each to a seperate train/val folder
1. Generates some QA/QC figures

In [1]:
import logging

logging.basicConfig(
    level=logging.INFO,
    format="%(levelname)s:%(asctime)s: %(message)s",
)
logger = logging.getLogger(__name__)
logger.info("\n#####\nStarting new log")

import joblib
from pathlib import Path

import colorcet as cc
import matplotlib.pyplot as plt
import numpy as np
import rasterio as rio
import torch
import tifffile
from PIL import Image

class Norm:
    def __init__(self, t_file_path):
        """ Load a pre-fit sklearn Transformer to normalise input array
        """

        self.transformer = joblib.load(t_file_path)
        logger.info(f"Using {t_file_path} to transform your input array")
    
    def transform(self, arr):
        og_shape = arr.shape
        arr = arr.flatten().reshape(-1, 1)
        arr = self.transformer.transform(arr.astype(np.float64))
        return arr.reshape(og_shape)

# def norm(arr, loc=-88.5, scale=132.4):
#     """ Standardise input data (with Laplace distibution) to a Standard Gaussian distribution
#     Australian 80m TMI (continental) fit laplace: loc=-88.5, scale=132.4"""
#     import scipy.stats
#     lv = scipy.stats.laplace(loc=loc, scale=scale)
#     rv = scipy.stats.norm(loc=0.0, scale=1.0) # "Standard" Norm
#     return rv.ppf(lv.cdf(arr))


def img_to_tiles(
    lr_raster_path,
    hr_raster_path,
    lr_out_prefix="lr_tiles",
    hr_out_prefix="hr_tiles",
    scale=4,
    hr_s=256,
    nan_val=-999_999,
    ext="npy",
    norm=False,
    single_output_folder=False,
):

    """
    Specify a LR and HR image pair. This will tile and save them to tensor
    arrays or image tiles ready to load in pytorch.

    Normalisation is hacky and requires further work.

    """

    logging.info(f"Processing {lr_raster_path}, {hr_raster_path}")

    if Path(lr_raster_path).suffix == ".tif":
        lr = tifffile.imread(lr_raster_path)
        hr = tifffile.imread(hr_raster_path)
    elif Path(lr_raster_path).suffix == ".ers":
        lr = np.array(rio.open(lr_raster_path).read(1))
        hr = np.array(rio.open(hr_raster_path).read(1))

    lr[lr == nan_val] = np.nan
    hr[hr == nan_val] = np.nan

    lr_tensor = torch.as_tensor(norm(lr), dtype=torch.float32).unsqueeze(0)
    hr_tensor = torch.as_tensor(norm(hr), dtype=torch.float32).unsqueeze(0)

    # lr_original_extent = lr_tensor.shape
    # hr_original_extent = hr_tensor.shape

    # Expand raster size to a common multiple of lr size (which is a multiple of hr)

    # padded_w = (lr_tensor.shape[0] + (lr_s - (lr_tensor.shape[0] % lr_s)))
    # padded_h = (lr_tensor.shape[1] + (lr_s - (lr_tensor.shape[1] % lr_s)))

    lr_s = int(hr_s / scale)

    lr_tensor = torch.nn.functional.pad(
        lr_tensor,
        (
            0,
            (lr_s - (lr_tensor.shape[1] % lr_s)),
            0,
            (lr_s - (lr_tensor.shape[0] % lr_s)),
        ),
        mode="reflect",
    )
    hr_tensor = torch.nn.functional.pad(
        hr_tensor,
        (
            0,
            (hr_s - (hr_tensor.shape[1] % hr_s)),
            0,
            (hr_s - (hr_tensor.shape[0] % hr_s)),
        ),
        mode="reflect",
    )

    hr_tensor = hr_tensor[0]
    lr_tensor = lr_tensor[0]

    # Math to determine and execute the tiling process
    lr_tiles_per_row = lr_tensor.shape[1] // lr_s
    hr_tiles_per_row = hr_tensor.shape[1] // hr_s
    lr_tiles_per_column = lr_tensor.shape[0] // lr_s
    hr_tiles_per_column = hr_tensor.shape[0] // hr_s

    assert (
        lr_tiles_per_column == hr_tiles_per_column
        and lr_tiles_per_row == hr_tiles_per_row
    ), f"These should be equal"

    lr_patches = lr_tensor.unfold(0, lr_s, lr_s).unfold(1, lr_s, lr_s)
    hr_patches = hr_tensor.unfold(0, hr_s, hr_s).unfold(1, hr_s, hr_s)

    lr_patches = lr_patches.contiguous().view(
        lr_tiles_per_row * lr_tiles_per_column, -1, lr_s, lr_s
    )
    hr_patches = hr_patches.contiguous().view(
        hr_tiles_per_row * hr_tiles_per_column, -1, hr_s, hr_s
    )

    #  Drop tiles that contain mostly/any nan values, convert rest to some value
    allowed_nan_pct = 1  # 0.05 TODO currently allow all NaNs for consistent num tiles
    nan_fill_val = 0 # norm(np.nanmean(lr_patches))  # nan_val
    valid_mask = (
        torch.count_nonzero(lr_patches.isnan(), dim=(2, 3)) / lr_s ** 2
    ) <= allowed_nan_pct

    lr_patches_masked = lr_patches[valid_mask]
    hr_patches_masked = hr_patches[valid_mask]  # HR and LR indices need to match
    logger.info(
        f"Dropped {sum(~valid_mask).item()} mostly NaN tiles (> {allowed_nan_pct*100}% nan)"
    )
    # nan_val = torch.tensor(0)  # np.nanmean(hr_patches_masked))
    # lr_patches_masked[lr_patches_masked == torch.nan] = nan_val
    # hr_patches_masked[hr_patches_masked == torch.nan] = nan_val

    hr_patches = (
        torch.nan_to_num(hr_patches_masked, nan=nan_fill_val).numpy().astype(np.float32)
    )
    lr_patches = (
        torch.nan_to_num(lr_patches_masked, nan=nan_fill_val).numpy().astype(np.float32)
    )
    logger.info(f"Reverted any NaNs in remaining tiles to {nan_fill_val}")

    logger.info("None of these values should show NaN!")
    logger.info(f"{np.min(lr_patches)=}")
    logger.info(f"{np.max(lr_patches)=}")
    logger.info(f"{np.mean(lr_patches)=}")
    logger.info(f"{np.std(lr_patches)=}")
    # print(f"{np.nanmin(hr_patches)=}")
    # print(f"{np.nanmax(hr_patches)=}")

    if single_output_folder:
        single_output_folder = Path(single_output_folder)
        train_dir = single_output_folder
    else:
        train_dir = Path(hr_raster_path).parent / ext

    (train_dir / "hr" / "1-0").mkdir(parents=True, exist_ok=True)
    (train_dir / "lr" / f"{scale}-0").mkdir(parents=True, exist_ok=True)

    hr_file_name = f"hr/1-0/{Path(hr_raster_path).stem}"
    lr_file_name = f"lr/{scale}-0/{Path(lr_raster_path).stem}"

    if "npy" in ext:
        np.save(train_dir / f"{hr_file_name}.{ext}", hr_patches)
        np.save(train_dir / f"{lr_file_name}.{ext}", lr_patches)

    if "tif" in ext:
        for i in range(len(lr_patches)):
            tifffile.imsave(train_dir / f"{hr_file_name}_{i}.{ext}", hr_patches[i])
            tifffile.imsave(train_dir / f"{lr_file_name}_{i}.{ext}", lr_patches[i])

        logger.info(hr_patches[i].shape)
        logger.info(lr_patches[i].shape)

    logger.info(f"Saved {len(lr_patches)} tiles to {train_dir.absolute()}")



INFO:2022-04-15 15:48:18,772: 
#####
Starting new log
INFO:2022-04-15 15:48:35,912: Note: NumExpr detected 20 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:2022-04-15 15:48:35,912: NumExpr defaulting to 8 threads.


In [8]:
survey_search = "P*"
train_dir = Path(r"D:\luke\data_train\qt_norm")
norm = Norm(r"D:\luke\SRvey\utils\AUS_MAGPMAP_v7_ONSHORE_QuantileTransformer.joblib")
for survey_path in Path(r"D:\luke\data_source\surveys_line_spacing").glob(survey_search):
    if not survey_path.is_dir():
        continue
    logger.info(survey_path)

    for scale in [2, 3, 4]:

        root = survey_path
        img_to_tiles(
            hr_raster_path=f"{next(root.glob('*1.ers'))}",
            lr_raster_path=f"{next(root.glob(f'*{scale:d}.ers'))}",
            # hr_raster_path=next(survey_path.glob("*0200.tif")),
            # lr_raster_path=next(survey_path.glob("*0050.tif")),
            ext="tif",
            norm=norm.transform,
            single_output_folder=train_dir,
            nan_val=-999_999,
            scale=scale,  # lr tile res
            hr_s=240,  # hr tile res
        )


INFO:2022-04-15 15:50:20,875: Using D:\luke\SRvey\utils\AUS_MAGPMAP_v7_ONSHORE_QuantileTransformer.joblib to transform your input array
INFO:2022-04-15 15:50:20,876: D:\luke\data_source\surveys_line_spacing\P578
INFO:2022-04-15 15:50:20,877: Processing D:\luke\data_source\surveys_line_spacing\P578\p578_2.ers, D:\luke\data_source\surveys_line_spacing\P578\p578_1.ers
INFO:2022-04-15 15:50:21,916: Dropped 0 mostly NaN tiles (> 100% nan)
INFO:2022-04-15 15:50:21,926: Reverted any NaNs in remaining tiles to 0
INFO:2022-04-15 15:50:21,926: None of these values should show NaN!
INFO:2022-04-15 15:50:21,926: np.min(lr_patches)=-3.131595
INFO:2022-04-15 15:50:21,926: np.max(lr_patches)=3.0966911
INFO:2022-04-15 15:50:21,926: np.mean(lr_patches)=-0.07793587
INFO:2022-04-15 15:50:21,926: np.std(lr_patches)=1.0870522
INFO:2022-04-15 15:50:22,219: (240, 240)
INFO:2022-04-15 15:50:22,219: (120, 120)
INFO:2022-04-15 15:50:22,219: Saved 120 tiles to D:\luke\data_train
INFO:2022-04-15 15:50:22,226: Pro

In [9]:
## ArbSR

# ArbSR requires a specific dataset layout (see the matlab version of this script)
# ArbSR uses a single HR file and n, m scale downsamplings.
# We use a set of pre-gridded files, split into n directories of m scale.
# This script organises these directories as per the expectations of ArbSR.
# ArbSr uses np.arange(1.5, 4.5, 0.5), we have np.arange(2.0, 5.0, 1.0) == [2,3,4]

# val_num = int(np.round(val_pct * len(lr)))  # len(lr_patches)
# val_indices = sorted(rng.choice(len(lr), size=val_num, replace=False))
# logger.info(f"{val_indices=}")

# train_indices = [i for i in range(len(lr)) if i not in val_indices]
# logger.info(f"{train_indices=}")
# logger.info(
#     f"There are {sum(i in train_indices for i in val_indices)} val indices in your train indices :)"
# )


def process_dir(
    tile_dir,
    out_dir_name="output",
    val_pct=0.15,  # 15% Val, 85% Train
):
    """Based on dir name, process as n scale, for scale = "dir_name_n" """

    rng = np.random.default_rng(seed=21)

    tile_dir = Path(tile_dir)
    assert tile_dir.exists(), f"Error, {tile_dir.absolute().as_posix()} not found!"

    num_tiles = len(list((tile_dir / "hr" / "1-0").iterdir()))
    val_num = int(np.round(val_pct * num_tiles))  # len(lr_patches)
    val_indices = sorted(rng.choice(num_tiles, size=val_num, replace=False))
    logger.info(f"{val_indices=}")

    train_indices = [i for i in range(num_tiles) if i not in val_indices]
    print(f"{train_indices=}")
    print(
        f"There are {sum(i in train_indices for i in val_indices)} val indices in your train indices :)"
    )

    for d in (tile_dir / "hr").iterdir():
        # This should only be 1 iteration, but matches lr loop for clarity
        assert d.is_dir(), "Unexpected files found"
        
        hr_files = np.array(sorted(list(d.iterdir())))
        
        out_path_t = tile_dir / out_dir_name / "train" / "HR"
        out_path_v = tile_dir / out_dir_name / "val" / "HR"
        out_path_t.mkdir(exist_ok=True, parents=True)
        out_path_v.mkdir(exist_ok=True, parents=True)
        train_hr = hr_files[train_indices]
        val_hr = hr_files[val_indices]

        for i, f in enumerate(train_hr):
            f.rename(out_path_t / f"{i:04d}.tif")
        for i, f in enumerate(val_hr):
            f.rename(out_path_v / f"{i:04d}.tif")

    for d in (tile_dir / "lr").iterdir():
        assert d.is_dir(), "Unexpected files found"

        scale = float(d.stem.split("_x")[-1].replace("-", "."))
    
        lr_files = np.array(sorted(list(d.iterdir())))
        
        out_path_t = tile_dir / out_dir_name / "train" / f"LR/X{scale:.2f}_X{scale:.2f}"
        out_path_v = tile_dir / out_dir_name / "val" / f"LR/X{scale:.2f}_X{scale:.2f}"
        out_path_t.mkdir(exist_ok=True, parents=True)
        out_path_v.mkdir(exist_ok=True, parents=True)
        train_lr = lr_files[train_indices]
        val_lr = lr_files[val_indices]

        for i, f in enumerate(train_lr):
            f.rename(out_path_t / f"{i:04d}.tif")
        for i, f in enumerate(val_lr):
            f.rename(out_path_v / f"{i:04d}.tif")

    print(
        f"{tile_dir} processed and output to {(tile_dir).absolute()}"
    )


tile_dir = r"D:\luke\data_train\hr240_qt"#train_dir

process_dir(tile_dir, out_dir_name="processed")


INFO:2022-04-15 16:12:58,606: val_indices=[0, 22, 24, 32, 42, 46, 48, 72, 75, 80, 81, 87, 90, 103, 106, 108, 125, 135, 139, 145, 155, 157, 169, 176, 178, 179, 183, 194, 219, 229, 232, 251, 256, 257, 259, 263, 264, 265, 267, 273, 281, 289, 306, 308, 314, 326, 327, 330, 332, 339, 346, 353, 361, 363, 367, 369, 371, 378, 380, 382, 383, 407, 410]


train_indices=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 76, 77, 78, 79, 82, 83, 84, 85, 86, 88, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 107, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 137, 138, 140, 141, 142, 143, 144, 146, 147, 148, 149, 150, 151, 152, 153, 154, 156, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 177, 180, 181, 182, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 220, 221, 222, 223, 224, 225, 226, 227, 228, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 2

In [None]:
# import logging

# logging.basicConfig(
#     level=logging.INFO,
#     format="%(levelname)s:%(asctime)s: %(message)s",
# )
# logger = logging.getLogger(__name__)
# logger.info("\n#####\nStarting new log")

# from pathlib import Path

# import colorcet as cc
# import matplotlib.pyplot as plt
# import numpy as np
# import rasterio as rio
# import torch
# import tifffile
# from PIL import Image


# def raster_to_tiles(
#     scale_raster_path_dict,
#     lr_out_prefix="lr",
#     hr_out_prefix="hr",
#     hr_tile_dim=240,  # X1 scale factor tile dimension
#     nan_val=-999_999,
#     ext="tif",  # output to n tif files, or numpy nd array
#     norm=False,  # min max as below
#     single_output_folder=False,  # Combine everything to target folder
# ):

#     """
#     So you have line data from a geophysical survey. You've decimated the lines
#     and gridded it at several specific scale factors, e.g. remove 2nd, 3rd, 4th lines
#     You now want to turn this survey into individual tiles for ML. So that each tile
#     covers the same extent, you need to tile them at dimensions relative to their
#     scale factor. The dimensions are therefore a decimal (or fractional) scale smaller
#     than the original 1x scale grid.
#     You may note that 256/3 is not a pleasant number for a discrete count of pixels.
#     So we use 240, which goes to 120, 80, and 60 pixels per dimension for each of
#                 1,                 2,  3, and  4 times scale, etc.

#     An alternative would be to interpolate, but idk how that would affect ArbSR...
#     Its easier in the image world, because it's just bicubic downsample on the fly.

#     Normalisation is hacky and requires further work. It currently uses "nice"
#     statistics from the Australia wide mag map, which is generally representative
#     of TMI values, and constrains most data to [0,1]

#     """

#     if norm:
#         max_ = 1000  # Aus Magmap v7 histogram arbitrary clip/stats
#         min_ = -1000
#         mean_ = 0
#         std_ = 250

#         # norm = lambda i: (i + 4403.574) / 18454.907 # values from training
#         # unnorm = lambda i: (i * 18454.907) - 4403.574

#         norm = lambda i: (i - min_) / (max_ - min_)  # values from training
#         unnorm = lambda i: (i * (max_ - min_)) + min_

#         logger.info(f"Using {max_=}, {min_=}, {mean_=}, {std_=} for min max norm")

#     else:
#         norm = unnorm = lambda i: i  # NULL OP
#         logger.info("Not Normalising ...")

#     for scale, raster_path in scale_raster_path_dict.items():
#         logger.info(f"Processing {raster_path} at scale {scale}")
#         raster_path = Path(raster_path)
#         if raster_path.suffix == ".tif":
#             raster = tifffile.imread(raster_path)
#         elif raster_path.suffix == ".ers":
#             raster = np.array(rio.open(raster_path).read(1))

#         raster[raster == nan_val] = np.nan
#         logger.debug(f"{raster.shape}")
#         r_tensor = torch.as_tensor(norm(raster), dtype=torch.float32).unsqueeze(0)
#         logger.debug(f"{r_tensor.shape}")

#         # scale = float(scale.replace("-", "."))
#         sz = int(hr_tile_dim // scale)
#         assert sz == hr_tile_dim / scale  # TODO handle fractional sizes
#         logger.info(f"Tiling scale factor {scale} at {sz}x{sz}")

#         # lr_original_extent = lr_tensor.shape

#         # Expand raster size to a common multiple of lr size (which is a multiple of hr)

#         # padded_w = (lr_tensor.shape[0] + (lr_s - (lr_tensor.shape[0] % lr_s)))
#         # padded_h = (lr_tensor.shape[1] + (lr_s - (lr_tensor.shape[1] % lr_s)))
#         logger.debug(f"{repr(sz)=}")
#         logger.debug(f"{repr(r_tensor.shape)=}")
#         logger.debug(f"{repr((sz - (r_tensor.shape[0] % sz)))=}")

#         r_tensor = torch.nn.functional.pad(
#             r_tensor,
#             (
#                 0,
#                 (sz - (r_tensor.shape[1] % sz)),
#                 0,
#                 (sz - (r_tensor.shape[0] % sz)),
#             ),
#             mode="reflect",
#         )

#         r_tensor = r_tensor[0]
#         logger.debug(f"{r_tensor.shape}")

#         # Math to determine and execute the tiling process
#         tiles_per_row = r_tensor.shape[1] // sz
#         tiles_per_column = r_tensor.shape[0] // sz

#         r_patches = r_tensor.unfold(0, sz, sz).unfold(1, sz, sz)

#         r_patches = r_patches.contiguous().view(
#             tiles_per_row * tiles_per_column, -1, sz, sz
#         )

#         logger.debug(f"{len(r_patches)=}")
#         logger.debug(f"{r_patches.shape=}")

#         #  Drop tiles that contain mostly/any nan values, convert rest to some value
#         allowed_nan_pct = 0.05
#         nan_fill_val = mean_  # nan_val
#         valid_mask = (
#             torch.count_nonzero(r_patches.isnan(), dim=(2, 3)) / sz ** 2
#         ) <= allowed_nan_pct

#         r_patches_masked = r_patches[valid_mask]
#         logger.debug(
#             f"Dropped {sum(~valid_mask).item()} mostly NaN tiles (> {allowed_nan_pct*100}% NaN)"
#         )

#         r_patches_masked = torch.nan_to_num(r_patches_masked, nan=nan_fill_val)
#         logger.debug(f"Reverted any NaNs in remaining tiles to {nan_fill_val}")

#         # Random split the train/val tiles for this dataset
#         from numpy.random import default_rng

#         # rng = default_rng(seed=21)

#         # val_pct = 0.15  # 15% Val, 85% Train
#         # val_num = int(np.round(val_pct * len(r_patches_masked)))  # len(lr_patches)

#         # val_indices = sorted(
#         #     rng.choice(len(r_patches_masked), size=val_num, replace=False)
#         # )
#         # train_indices = [
#         #     i for i in range(len(r_patches_masked)) if i not in val_indices
#         # ]
#         # logger.info(f"{val_indices=}")
#         # logger.info(f"{train_indices=}")

#         # r_patches_train = r_patches_masked[train_indices].numpy().astype(np.float32)
#         # r_patches_val = r_patches_masked[val_indices].numpy().astype(np.float32)

#         # logger.info(f"{len(r_patches_train)=}")
#         # logger.info(f"{len(r_patches_val)=}")

#         # logger.info("None of these values should show nans!")
#         # logger.info(f"{np.max(r_patches_train)=}, {np.min(r_patches_train)=}")
#         # logger.info(f"{np.mean(r_patches_train)=}, {np.std(r_patches_train)=}")

#         r_patches = r_patches_masked.numpy().astype(np.float32)

#         if single_output_folder:
#             single_output_folder = Path(single_output_folder)
#             train_dir = single_output_folder / "train" / raster_path.stem
#             # val_dir = single_output_folder / "val"
#         else:
#             train_dir = Path(raster_path).parent / ext / "train" / raster_path.stem
#             # val_dir = Path(raster_path).parent / ext / "val"

#         r_file_name = "HR/" if scale == 1 else (f"LR/X{scale:.2f}_X{scale:.2f}/")
#         (train_dir / r_file_name).mkdir(parents=True, exist_ok=True)
#         # (val_dir / r_file_name).mkdir(parents=True, exist_ok=True)

#         # if "npy" in ext:
#         #     np.save(train_dir / f"{r_file_name}.{ext}", r_patches_train)
#         #     np.save(val_dir / f"{r_file_name}.{ext}", r_patches_val)

#         j = len(list((train_dir / r_file_name).iterdir()))
#         # h = len(list((val_dir / r_file_name).iterdir()))

#         if "tif" in ext:
#             for i in range(len(r_patches)):
#                 tifffile.imsave(
#                       / r_file_name / f"{1+i+j:04d}.tif", r_patches[i]
#                 )

#             # for i in range(len(r_patches_val)):
#             #     tifffile.imsave(
#             #         val_dir / r_file_name / f"{1+i+h:04d}.tif", r_patches_val[i]
#             #     )

#             # logger.info(r_patches[i].shape)

#         logger.info(f"\nSaved to {train_dir.parent.absolute()}\n")

#     return r_patches, train_indices, val_indices

# survey_search = "*.tif"

# survey_search = "P*"
# survey_extension = "*.ers"
# scale_raster_path_dict = {}
# single_output_folder = Path("C:/Luke/data/Paper_2/lr64_combined")

# assert (
#     len(list(single_output_folder.iterdir())) == 0
# ), f"dir {single_output_folder} is not empty!"

# for survey_path in Path(r"C:\Luke\data\Paper_2").glob(survey_search):
#     if not survey_path.is_dir():
#         continue
#     root = survey_path

#     logging.info(f"{survey_path.stem=}")
#     scale_raster_path_dict = scale_raster_path_dict | {survey_path.stem: {}}

#     for scale_raster in root.glob(survey_extension):
#         scale = float(scale_raster.stem.split("_")[-1].replace("-", "."))
#         # > Python 3.9 dict merging with pipe operator
#         scale_raster_path_dict[survey_path.stem] = scale_raster_path_dict[
#             survey_path.stem
#         ] | {scale: str(scale_raster.absolute())}

#     r_patches, train_indices, val_indices = raster_to_tiles(
#         scale_raster_path_dict=scale_raster_path_dict[survey_path.stem],
#         ext="tif",
#         norm=True,
#         single_output_folder=single_output_folder,
#         nan_val=-999_999,
#         hr_tile_dim=240,
#     )

# logger.info(scale_raster_path_dict)


In [None]:
def check_tiles(data_path, index=0, ext="np", s=256):
    # if "np" in ext:
    #     lr_tile = np.load(lr_path)[index][0]
    #     hr_tile = np.load(hr_path)[index][0]
    # elif "tif" in ext:
    #     lr_tile = tifffile.imread(f"{lr_path}").squeeze()
    #     hr_tile = tifffile.imread(f"{hr_path}").squeeze()
    data_path = Path(data_path)

    if "tif" in ext:
        lr_tile = tifffile.imread(f"{next(data_path.glob(f'**/lr/{i}.tif'))}").squeeze()
        hr_tile = tifffile.imread(f"{next(data_path.glob(f'**/hr/{i}.tif'))}").squeeze()

    us = np.array(Image.fromarray(lr_tile).resize((s, s)))

    plt.figure(figsize=(20, 10))
    plt.subplot(1, 3, 1)
    plt.imshow(us, vmin=hr_tile.min(), vmax=hr_tile.max())
    plt.colorbar()
    plt.subplot(1, 3, 2)
    plt.imshow(hr_tile)
    plt.colorbar()
    plt.subplot(1, 3, 3)
    plt.imshow(hr_tile - us, cmap=cc.cm.CET_D7, vmin=-0.5, vmax=0.5)
    plt.colorbar()


In [None]:
# def ers_to_tifs(survey_path, out_dir=""):
#     out_dir = Path(out_dir)
#     out_dir.mkdir(parents=True, exist_ok=True)

#     ers_arr = np.array(rio.open(survey_path).read(1))
#     tifffile.imsave(out_dir / f"{survey_path.stem}.tif", ers_arr)
#     print(f'Saved {Path(out_dir / f"{survey_path.stem}.tif").absolute()}')


# survey_search = "*4.ers"
# for survey_path in Path("C:/Luke/PhD/Oasis Montaj/ArbSR").glob(survey_search):
#     print(survey_path)
#     # survey_path
#     ers_to_tifs(survey_path, out_dir="PPDRC")

# # Test no loss of information:
# # np.max(tifffile.imread(next(Path(r"C:\Luke\PhD\paper2\SRvey\utils\PPDRC").glob("*.tif"))) - np.array(rio.open(r"C:\Luke\PhD\Oasis Montaj\ArbSR\p681_1.ers").read(1)))


In [None]:
from pathlib import Path
import matplotlib.pyplot as plt
import tifffile
import numpy as np

ims = []
for im in Path(r"C:\Luke\data\Paper 2\PPDRC\lr32\train").glob("**\*.tif"):
    ims.append(tifffile.imread(im))

ims = np.array(ims)[0]


In [None]:
survey_search = "**/*"

for survey_path in Path(r"C:\Luke\data\Paper 2\PPDRC\lr32\train").glob(survey_search):
    logger.info(tifffile.imread(next(survey_path.glob("**/*0050.tif"))).max())
    logger.info(tifffile.imread(next(survey_path.glob("**/*0050.tif"))).max())
    logger.info(tifffile.imread(next(survey_path.glob("**/*0200.tif"))).min())
    logger.info(tifffile.imread(next(survey_path.glob("**/*0200.tif"))).min())
