In [None]:
# Imports 
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import xarray as xr
import folium
import numpy as np
import pandas as pd
import datetime as dt

from eodag import EODataAccessGateway
from eodag import setup_logging

from rasterio.crs import CRS
from rioxarray.merge import merge_arrays

from eotools.shortcut import prepare, configure, deserialize

# Setup Verbose Values:
# 0: no logging and no progress bar
# 1: no logging but progress bars displayed
# 2: log at the INFO level
# 3: log at the DEBUG level (even more information)

setup_logging(verbose=0)

# Image Processing
This Notebook intends to show some examples of image processing.

## Data Retrieval/Acquisition
The first Lines of Code, which essentialy load the data are not explained, since the Notebooks 01 and 02 do that.

In [None]:
# Get Credentials from .env file and make dirs from paths.yml
# These functions are just shortcuts from a python script and just help to keep the notebooks short and simple
secrets, ws_paths = prepare(log=False)
dag = configure(secrets=secrets, paths=ws_paths)
deserialized_search_results = deserialize(filepath="search_results.geojson", ws_path=ws_paths, dag=dag, log=True)

# Download multiple Products
products = deserialized_search_results
paths = dag.download_all(products)

In [None]:
# Set Boundingbox for Area inside the Tile.
latmin, latmax = 48.1, 48.35
lonmin, lonmax = 16.1, 16.6
extent = {'lonmin': lonmin, 'latmin': latmin, 'lonmax': lonmax, 'latmax': latmax}

In [None]:
# Setting common Parameters for all further image processing
common_params = dict(
    crs=CRS.from_epsg(4326),               # the downloaded images are in 4326, don't reproject them
    resolution=0.0006,                     # but lower their resolution (0.0006 should be 60m in 100km)
    extent=(lonmin,latmin,lonmax,latmax)   # and zoom over/crop the area of interest
)

# Define basic Functions for future operations
def normalized_difference(a, b):
    return (a - b*1.)/(a + b)

def normalize(a):
    return (a - a.min())/(a.max() - a.min())
    

In [None]:
# Get a list of all available Bands (assets)
def get_assets(root:str, res=60):
    jp2_files = [file for dirs in os.walk(root, topdown=True)
                     for file in dirs[2] if file.endswith(f"_{res}m.jp2")]
    assets = [file.split('_')[2] for file in jp2_files if file.startswith('T')]
    return assets
    
    
assets = get_assets(paths[0], res=10)
assets

In [None]:
# Functions for loading data into datasets.

def load_single_product(product, bands:list):
    loaded_data = {}

    for band in bands:
        data = product.get_data(band=band, **common_params)
        data = data.squeeze()

        time_str = product.properties['startTimeFromAscendingNode']
        date = dt.datetime.strptime(time_str,'%Y-%m-%dT%H:%M:%S.%f%z')

        data = data.expand_dims(dim={'time':[date.date()]})
        data.name = band
        loaded_data[band] = data
    ds = xr.Dataset(loaded_data)
    return ds

def load_multiple_timestamps(products, bands:list):
    single_ds = []
    for product in products:
        single_product = load_single_product(product=product, bands=bands)
        single_ds.append(single_product)

    ds = xr.merge(single_ds)
    return ds

In [None]:
# Get Products from same Tile
tile_33UWP = sorted([p for p in products if 'T33UWP' == p.properties['title'].split('_')[5]], key=lambda p: p.properties["title"].split("_")[2])


In [None]:
# Loading multiple Bands into a dataset with multiple Timestamps
ds = load_multiple_timestamps(products=tile_33UWP, bands=['B04', 'B03', 'B02', 'B08', 'TCI'])
ds

## Image Processing

### Stacking

In order to display an array, the array needs to have either one or three channels. If there is one color channel the image or array will be displayed as a *black and white* (greyscale) image, but any colormap can be chosen. If the array has three colors each "layer" will be interpreted as one of the RGB colors.

In this example a single image from the dataset is chosen.

In [None]:
# Select a single Timestamp from the Dataset (both of the following methods work)
single_image = ds.sel(time=dt.datetime(2024, 5, 1), method='nearest')

Then the individual bands are extracted from the dataarray and stacked to create an array with shape (x,y,3). Note that Numpy interpretes the first array as red the second as green and the third as blue. It might be natural to stack the bands like that `[b02, b03, b04]`, but this will lead to wrong color interpretations. The Axis defines where the bands should be stacked. If the axis is not set to 2 then the output array will have the shapes (3,x,y) or (x,3,y), both of which will not be properly interpreted.

In [None]:
b02 = single_image['B02'].values #blue values
b03 = single_image['B03'].values #green values
b04 = single_image['B04'].values #red values

rgb_raw = np.stack([b04, b03, b02], axis=2)

Now the image can be plotted useing ``matplotlib``. Here the aspect ratio is set to 2, as the longitude (lon) coordinates range from -180 to 180 but the latitude (lat) coordinates just from -90 to 90. Therefore 1° in lon would be similar in lenght to 2° lat.

In [None]:
# Plot Image
fig, ax = plt.subplots(figsize=(8,8))
ax.imshow(rgb_raw, aspect=2)
ax.set_title("rgb raw")
ax.set_axis_off()

As you can clearly see the contrast of the image is very low, which makes it appear very dark. Therefore it is helpful to have a look at the Histograms of the image, to properly get a higher contrast.

### Histogram

In [None]:
# Function for plotting an Histogram

def histogram(arr, nbins=300, alpha=0.5, figsize=(5,5), title='Histogram'):
    #Flatten
    flat_r = arr[:, :, 0].flatten()
    flat_g = arr[:, :, 1].flatten()
    flat_b = arr[:, :, 2].flatten()

    # You can set the number of bins and alpha individually
    mbins = np.linspace(np.nanmin(arr), np.nanmax(arr), nbins)

    # Plot 
    fig, ax = plt.subplots(figsize=figsize)
    ax.hist(flat_r, color='red', bins=mbins, alpha=alpha)
    ax.hist(flat_g, color='green', bins=mbins,  alpha=alpha)
    ax.hist(flat_b, color='blue', bins=mbins,  alpha=alpha)

    ax.set_title(title)
    ax.set_xlabel('I')
    ax.set_ylabel('n')
    plt.show()


In [None]:
histogram(rgb_raw, nbins=6)

### Contrast

To fix this problem we can increase the contrast of the image. To do this we implement the following functions, which will be needed for processing your image. For more details on how these functions work and why we need them to increase contrast and correctly encode the data, take a look at https://www.cg.tuwien.ac.at/courses/EinfVisComp/Skriptum/SS13/EVC-11%20Point%20Operations.pdf for a great overview. In your own Code it might be helpful, if you put those functions in a Python script and import the script into your code.

In [None]:
def auto_clip(I, percentile=0.02, pooled=True):
    """ 
    Calculates the quantiles of I using the percentile parameter and clips the values using the clip function defined below.
    
    Modifies I
    
    Parameters
    ----------
    I : np.array(rows, cols, bands)
        Image array.
    percentile : float, optional
        Percentile defining the clipping boundaries of I in terms of its distribution (defaults to 0.02). 
    pooled: if True, computes the pooled percentile over all band
            (default, use this to keep the relative intensities of the bands for natural looking images)    
            if False, computes the percentiles for each band individually
            (use this - in conjunction with stretch - to bring the different bands into a comparable range, e.g. for false colour images)
        
    
    Returns
    -------
    np.array : 
        Auto-clipped image data.
    
    """
    if pooled:
        v_min = np.nanquantile(I, percentile)
        v_max = np.nanquantile(I, 1 - percentile)
 
    else:
        tmp = I.reshape(-1, I.shape[-1]) #collapes image x,y 2d-array into a 1d-array
        v_min = np.nanquantile(tmp, percentile, axis=0)
        v_max = np.nanquantile(tmp, 1 - percentile, axis=0)
        
    return clip(I, v_min, v_max)        

def clip(I, v_min, v_max):
    """ 
    Performs clipping (dt. "Histogrammbegrenzung")
    Sets all values in I that are outside of [v_min, v_max] to the corresponding boundary.

    
    Modifies I
    
    Parameters
    ----------
    I : np.array
        Image array.
    v_min : scalar or array
        Lower clipping boundary for each band
    v_max : scalar or array
        lower clipping boundary for each band
    
    Returns
    -------
    np.array : 
        Clipped image data.
        
    """
        
    tmp = I.reshape(-1, I.shape[-1]) #collapes image x,y 2d-array into a 1d-array         
    if np.isscalar(v_min):
        tmp[tmp < v_min] = v_min
        tmp[tmp > v_max] = v_max
    else:
        idx = np.where(tmp < v_min)
        tmp[idx]=v_min[idx[1]]
        idx = np.where(tmp > v_max)
        tmp[idx]=v_max[idx[1]]        
    
    return I
            
def stretch(I, p_min, p_max, pooled=True):
    """
    Performs histogram stretching or normalisation (dt. "Spreizung")
    Computes and applies an affine transformation of values in I to the range [p_min, p_max]. 
    For floating point images to be displayed with pylab.imshow(), p_min=0, p_max=1
    should be chosen.
    
    Modifies I
    
    Parameters
    ----------
    I : np.array
        Image array.
    p_min : number
        Lower boundary of the output range.
    p_max : number
        Upper  boundary of the output range.
    pooled: if True, the transformation is computed for and applied to all bands simultaneously  
            if False, -"- to the individual bands separately
    Returns
    -------
    np.array : 
        Normalised image data within the range [p_min, p_max].
    
    """

    tmp = I.reshape(-1, I.shape[-1]) #collapes image x,y 2d-array into a 1d-array   

    if pooled:    
        q_min = np.nanmin(I)
        q_max = np.nanmax(I)

    else:
             
        q_min = np.nanmin(tmp, axis = 0)
        q_max = np.nanmax(tmp, axis = 0)        

    tmp[:] =  (p_max - p_min) * (tmp - q_min) / (q_max - q_min) + p_min
    return I

First we can use the `auto_clip()` or `clip()` function to set values of intensity which are outside of a boundary to the boundary border: 

In [None]:
rgb_clipped = auto_clip(rgb_raw.copy(), percentile=0.05, pooled = True)

We can create a histogram with the clipped data:

In [None]:
histogram(rgb_clipped, nbins=6)

You can see that the histogram now has clear borders, and all the values below/above these borders have been set to the border values. You can play around clipping fewer/more values by setting the percentile value in the `auto_clip()` function. The `pooled` parameter sets if the bands get clipped individually (`=FALSE`) or as a group (`=TRUE`).

As the intensity of the values is still too low after clipping we have to stretch the values to spread between 0 and 1. We can do this by using the `stretch()` function. 

In [None]:
rgb_clipped_stretched = stretch(rgb_clipped.copy(), 0, 1, pooled = True)

In [None]:
histogram(rgb_clipped_stretched, nbins=6)

In [None]:
fig, ax = plt.subplots()
ax.imshow(rgb_clipped_stretched)
ax.set_title("rgb clipped stretched")
ax.set_axis_off()

If the final image doesnt look natural or still seems too dark try playing around with the percentile value or set the intensity value manually with the `clip()` function instead of `auto_clip()`. If an individual band has a higher intensity compared to the others try setting `pooled` to `=FALSE` when clipping and stretching. 

You can also use `np.log()` on the raw rgb image before clipping and stretching it to increase contrast.

You can see what happens when we play around with the `percentile` parameter in the `auto_clip` function or use the `np.log` function below:

In [None]:
rgb_clipped_stretched_log = stretch(auto_clip(np.log(rgb_raw.copy()), percentile=0.05),0,1)

### False Color Image

As we want to analyze our picture regarding forested areas, we can make this easier using a false color image. Vegetetation reflects light in much higher intensities in the near infrared side of the spectrum. To visualize this we can replace the red band in the rgb image with the NIR band (=band 8) 

In [None]:
# Loading the differnt Bands for the False Color Image
b02 = single_image['B02'].values #blue values
b03 = single_image['B03'].values #green values
b08 = single_image['B08'].values #NIR values

# Stacking the Bands into a numpy array, which can be interpreted as an RGB Image.
fc_img = np.stack([b08, b03, b02], axis=2)
fc_img = normalize(fc_img)

In [None]:
fig, ax = plt.subplots()
ax.imshow(fc_img)
plt.show()

In [None]:
histogram(fc_img)

We can see that the NIR values are slightly higher than the ones of the green and blue band. So this time we will change the `pooled` parameter to `False` when incresing the contrast. You can also see the difference when the `pooled` parameter is set to `True`.

In [None]:
fc_img_clipped_stretched_false = stretch(auto_clip(np.log(fc_img.copy()), pooled = False), 0, 1, pooled = False)

fc_img_clipped_stretched_true = stretch(auto_clip(np.log(fc_img.copy()), percentile=0.005, pooled = True), 0, 1, pooled = True)

In [None]:
histogram(fc_img_clipped_stretched_false)
histogram(fc_img_clipped_stretched_true)

## Cloud Filtering

Now we want to generate an Image which has no clud coverage. To do this we need a few different images from different times. We will then get the median values from each pixel. With this method the Clouds should gone. There might be errors if the selected images all have a very high cloud coverage.

In [None]:
cloudfree_img = ds.median(dim="time")

In [None]:
b02 = cloudfree_img['B02'].values
b03 = cloudfree_img['B03'].values
b04 = cloudfree_img['B04'].values

cloudfree_img_raw = np.stack([b04, b03, b02], axis=2)
cloudfree_img = stretch(auto_clip(np.log(cloudfree_img_raw)),0,1)

fig, ax = plt.subplots(figsize=(8,8))
ax.imshow(cloudfree_img)
ax.set_title("Cloudfree image")
ax.set_axis_off()