# Data and preparation

In [1]:
# Modules
import numpy as np
import rasterio
import sys

sys.path.insert(0, "..")
from eis_toolkit.checks.parameter import check_numeric_value_sign
from eis_toolkit.exceptions import NumericValueSignException, InvalidParameterValueException, InvalidInputDataException
from eis_toolkit.transformations import binarize, logarithmic, winsorize

# Supress scientific notation
np.set_printoptions(suppress=True)


In [2]:
# Raster data
image_small = rasterio.open("/eis_toolkit/tests/data/remote/small_raster.tif")                                    # single band raster (rect without wholes or no-data)
image_small_nan = rasterio.open("/eis_toolkit/tests/data/local/data/small_raster_nan.tif")                        # small_raster with no-data (-999) below 5
image_small_empty_meta = rasterio.open("/eis_toolkit/tests/data/local/data/small_raster_nan_empty_meta.tif")      # small_raster with no-data (empty) below 5 (exported without specific no-data value)
image_small_mb4 = rasterio.open("/eis_toolkit/tests/data/local/data/multiband.tif")                               # multiband raster with 4 bands

# Large raster data sets, 25 MPx [-9.99, +9.99]
image_25mpx_mb3 = rasterio.open("/eis_toolkit/tests/data/local/data/large_raster_3b.tif")
image_25mpx_mb12 = rasterio.open("/eis_toolkit/tests/data/local/data/large_raster_12b.tif")

test_image = image_small_mb4

# General issues
## Raster data
- Testing on raster **with** nan-value in meta data
- Testing on raster **without** nan-values in meta data
- Testing **multi-band**-support

## DataFrames
- Core functions will be the same as for raster data, so that only the "branching" needs to be tested

# Binarize
- selection: bands can be **None** or a list of **int**, **float**, **None** values
- thresholds: a list of **int** or **float** values
- nodata: either **None** or a list of **int** and **float**
- method: kind of output of the transformed data, either overwriting the existing ones ("**replace**") or extraction of selected bands ("**extract**")



## Tests

### Method <code>replace</code> and selection <code>None</code> (all bands)

In [3]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=None, thresholds=[2], nodata=None, method="replace")


### Method <code>replace</code> and <code>single selection</code>

In [4]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[3], thresholds=[5], nodata=None, method="replace")


### Method <code>replace</code> and <code>single threshold</code>

In [5]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[1, 3], thresholds=[2], nodata=None, method="replace")


### Method <code>replace</code> and <code>unordered selection</code>

In [6]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[1, 3, 4], thresholds=[2, 0, 1], nodata=None, method="replace")


In [7]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[3, 1, 4], thresholds=[5, 2, 0], nodata=[1.771, 2.749, -0.766], method="replace")


### Method <code>replace</code> and <code>multi thresholds</code> and <code>NoData</code>

In [8]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=None, thresholds=[-5, -2, 2, 5], nodata=[2.749, None, None, None], method="replace")


In [9]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=None, thresholds=[-5, -2, 2, 5], nodata=[-10], method="replace")


### Method <code>extract</code> and selection <code>None</code> (all bands)

In [10]:
# Using the same arguments, its the same as method "replace"
image, meta, settings = binarize.binarize(in_data=test_image, selection=None, thresholds=[2], nodata=None, method="extract")


### Method <code>extract</code> and <code>selected bands</code> (Bands 3, 1 if 4 band image)

In [11]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[3, 1], thresholds=[5], nodata=None, method="extract")


In [12]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[3, 1], thresholds=[5], nodata=[-10], method="extract")


### Method <code>extract</code> and <code>multi tresholds</code> (Bands 3, 2, 4 if 4 band image)
- band 3 = index 0, threshold 5
- band 1 = index 1, thresold 1
- band 4 = index 3, thresold 2

In [13]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[3, 1, 4], thresholds=[5, 1, 0], nodata=None, method="extract")


### Method <code>extract</code> and <code>multi nodata</code> (Bands 3, 2, 4 if 4 band image)


In [14]:
image, meta, settings = binarize.binarize(in_data=test_image, selection=[3, 1, 4], thresholds=[5, 1, 0], nodata=[1.771, 2.749, None], method="extract")


### Settings

In [15]:
settings

{'band 1': {'band_origin': 3,
  'threshold': 5,
  'nodata_meta': -999.999,
  'nodata_used': 1.771},
 'band 2': {'band_origin': 1,
  'threshold': 1,
  'nodata_meta': -999.999,
  'nodata_used': 2.749},
 'band 3': {'band_origin': 4,
  'threshold': 0,
  'nodata_meta': -999.999,
  'nodata_used': -999.999}}

# Logarithm

### Method <code>replace</code> and selection <code>None</code> (all bands)

In [16]:
image, meta, settings = logarithmic.log_transform(in_data=test_image, selection=None, base=[2], nodata=None, method="replace")


# Winsorize

## Test functions

In [3]:
# Print some stats
def trans_winsorize_basic_stats(image):
  print(f"number of bands: {image.shape[0]}\n-------------------")
  for i in range(0, image.shape[0]):
    print(f"min band {i+1}: {np.min(image[i])}")
    print(f"max band {i+1}: {np.max(image[i])}")
    print(f"mean band {i+1}: {np.mean(image[i])}")
    print(f"std band {i+1}: {np.std(image[i])}\n")
  
# Print some stats when nan values are incoorprated. Only for test arrays with np.nan-values
def trans_winsorize_basic_stats_nan(image):
  print(f"number of bands: {image.shape[0]}\n-------------------")
  for i in range(0, image.shape[0]):
    print(f"min band {i+1}: {np.nanmin(image[i])}")
    print(f"max band {i+1}: {np.nanmax(image[i])}")
    print(f"mean band {i+1}: {np.nanmean(image[i])}")
    print(f"std band {i+1}: {np.nanstd(image[i])}\n")

def test(function=None, raise_error=False):
  if function is not None and raise_error==True:
    return function

## Exceptions

### Fixed values

In [4]:
raise_error = False

In [None]:
# Exceptions (limits = None)
image_trans, image_meta = test(winsorize.winsorize_absolute_values(in_data=image_small, limit_min=None, limit_max=None, replace_lower=3, replace_upper=6, nan_value=None, nan_value_ignore=False),
                               raise_error=raise_error)

In [None]:
# Exceptions (limit_min > limit_max)
image_trans, image_meta = test(winsorize.winsorize_absolute_values(in_data=image_small, limit_min=6, limit_max=3, replace_lower=3, replace_upper=6, nan_value=None, nan_value_ignore=False),
                               raise_error=raise_error)

In [None]:
# Exceptions (bands not a list)
image_trans, image_meta = test(winsorize.winsorize_absolute_values(in_data=image_small, bands=1, limit_min=None, limit_max=None, replace_lower=3, replace_upper=6, nan_value=None, nan_value_ignore=False),
                               raise_error=raise_error)

In [None]:
# Exceptions (band number not int)
image_trans, image_meta = test(winsorize.winsorize_absolute_values(in_data=image_small, bands=[1.6], limit_min=None, limit_max=None, replace_lower=3, replace_upper=6, nan_value=None, nan_value_ignore=False),
                               raise_error=raise_error)

In [None]:
# Exceptions (wrong band number)
image_trans, image_meta = test(winsorize.winsorize_absolute_values(in_data=image_small, bands=[1, 8], limit_min=None, limit_max=None, replace_lower=3, replace_upper=6, nan_value=None, nan_value_ignore=False),
                               raise_error=raise_error)

### Percentile values

In [None]:
raise_error = False

In [None]:
# Exceptions (limits = None)
image_trans, image_dict, image_meta = test(winsorize.winsorize_percentile_values(in_data=image_small, limit_min=None, limit_max=None), raise_error=raise_error)

In [None]:
# Exceptions (limit_min > limit_max)
image_trans, image_dict, image_meta = test(winsorize.winsorize_percentile_values(in_data=image_small, limit_min=50, limit_max=10),raise_error=raise_error)

In [9]:
# Exceptions (limits in wrong range: 0-100)
image_trans, image_dict, image_meta = test(winsorize.winsorize_percentile_values(in_data=image_small, limit_min=-5, limit_max=15), raise_error=raise_error)

In [None]:
# Exceptions (limits in wrong range: 0-100)
image_trans, image_dict, image_meta = test(winsorize.winsorize_percentile_values(in_data=image_small, limit_min=80, limit_max=40), raise_error=raise_error)

In [None]:
# Exceptions (limits = 0)
image_trans, image_dict, image_meta = test(winsorize.winsorize_percentile_values(in_data=image_small, limit_min=None, limit_max=0), raise_error=raise_error)

## No-Data-Values

1. Ignore nan-values, so that it will be processed like a real value (will be replaced by given value)
2. Take nan-value from input meta data (no replacement of nan-value)
3. Overwrite meta data nodata value with user-defined value

In [None]:
# Ignore nan-value
image_trans, image_meta = winsorize.winsorize_absolute_values_raster(in_data=image_small_nan, limit_min=4, limit_max=5, replace_lower=4, replace_upper=5, nan_value_handling="blob")
print(f"no-data value (metadata): ", image_small_nan.nodata, "\n")
trans_winsorize_basic_stats(image_trans)
trans_winsorize_basic_stats_nan(image_trans)

# Works since stats are lower than those with nan_value_ignore=False (-999.0 was set to 4)

In [7]:
image_trans

array([[[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ...,  5.,  5.,  5.],
        [nan, nan, nan, ...,  5.,  5.,  5.],
        [nan, nan, nan, ...,  5.,  5.,  5.]]], dtype=float32)

In [6]:
# Nan with specific value from export, read from metadata
image_trans, image_meta = winsorize.winsorize_absolute_values(in_data=image_small_nan, limit_min=4, limit_max=6, replace_lower=4, replace_upper=5, nan_value=None)
print(f"no-data valu (metadata): ", image_small_nan.nodata, "\n")
trans_winsorize_basic_stats(image_trans)

# Works, since the function returns an array with nodata-values on nodata-value locations

no-data valu (metadata):  -999.0 

number of bands: 1
-------------------
min band 1: -999.0
max band 1: 5.995999813079834
mean band 1: -471.1231994628906
std band 1: 501.486083984375



In [7]:
# Nan with user-input value, overwriting the exiting metadata value
image_trans, image_meta = winsorize.winsorize_absolute_values(in_data=image_small_nan, limit_min=4, limit_max=6, replace_lower=4, replace_upper=5, nan_value=8.76)
print(f"no-data value (metadata): ", image_small_nan.nodata, "\n")
trans_winsorize_basic_stats_nan(image_trans)

# Works, since the function returns an array with transformed values on nodata locations from metadata value.
# Checked without overwriting the np.nan value, so that the array contains np.nan instead of 8.76.

no-data value (metadata):  -999.0 

number of bands: 1
-------------------
min band 1: 4.0
max band 1: 8.760000228881836
mean band 1: 4.684638500213623
std band 1: 0.7096104025840759



## Fixed values

In [8]:
# Singe-band
image_trans, image_meta = winsorize.winsorize_absolute_values(in_data=image_small, limit_min=4, limit_max=5, replace_lower=4, replace_upper=5)
trans_winsorize_basic_stats(image_trans)

number of bands: 1
-------------------
min band 1: 4.0
max band 1: 5.0
mean band 1: 4.616246118012422
std band 1: 0.46237553972553846



In [9]:
# Multi-band (all)
image_trans, image_meta = winsorize.winsorize_absolute_values(in_data=image_mb, limit_min=5, limit_max=6, replace_lower=3, replace_upper=6)
trans_winsorize_basic_stats(image_trans)

number of bands: 4
-------------------
min band 1: 3.0
max band 1: 6.0
mean band 1: 3.730537267080745
std band 1: 1.2343139946670947

min band 2: 3.0
max band 2: 6.0
mean band 2: 3.502313664596273
std band 2: 1.1045719208465312

min band 3: 3.0
max band 3: 6.0
mean band 3: 3.398190605590062
std band 3: 0.9736643749205292

min band 4: 3.0
max band 4: 5.67
mean band 4: 3.0420566770186332
std band 4: 0.30286958822395055



In [10]:
# Multi-band (selection)
image_trans, image_meta = winsorize.winsorize_absolute_values(in_data=image_mb, bands=[1, 3], limit_min=5, limit_max=6, replace_lower=3, replace_upper=6)
trans_winsorize_basic_stats(image_trans)

number of bands: 2
-------------------
min band 1: 3.0
max band 1: 6.0
mean band 1: 3.730537267080745
std band 1: 1.2343139946670947

min band 2: 3.0
max band 2: 6.0
mean band 2: 3.398190605590062
std band 2: 0.9736643749205292



## Percentile values

# Linear

# Sigmoid