In [None]:
#Imports
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import xarray as xr
import folium
import numpy as np
import pandas as pd
import datetime as dt

from eodag import EODataAccessGateway
from eodag import setup_logging

from rasterio.crs import CRS
from rioxarray.merge import merge_arrays

import eotools.shortcut as eoshort
import eotools.loading as eoload
import eotools.geometry as eogeom

import geopandas as gpd
from sklearn import svm, tree
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import matplotlib.colors as colors


# Setup Verbose Values:
# 0: no logging and no progress bar
# 1: no logging but progress bars displayed
# 2: log at the INFO level
# 3: log at the DEBUG level (even more information)

setup_logging(verbose=0)


# EODAG - Classify

EODAG (Earth Observation Data Access Gateway) is a command line tool and a Python package for searching and downloading remotely sensed images while offering a unified API for data access regardless of the data provider.

EODAG gives you an easy way to access products from more than 10 providers, with more than 50 different product types (Sentinel 1, Sentinel 2, Sentinel 3, Landsat, etc.) that can be searched and downloaded.

## Step 1
### Configuration
In the configuration we pass the username and password from the Copernicus Dataspace Ecosystem (CDSE) to eodag. Also we define the path for the downloads.

In [None]:
# Get Credentials from .env file and make dirs from paths.yml
# These functions are just shortcuts from a python script and just help to keep the notebooks short and simple
secrets, workspace = eoshort.read_paths(path='paths.yml')
dag = EODataAccessGateway()
dag = eoshort.configure(dag=dag, secrets=secrets, paths=workspace)
deserialized_search_results = eoshort.deserialize(filepath="search_results.geojson", workspace=workspace, dag=dag, log=True)

In [None]:
#Plot Quicklooks of Search Results
eoshort.plot_quicklooks(deserialized_search_results)

## Step 3
### Download 
Now either a single product or multiple products from the search will be downloaded. If the product has already been downloaded it will not load it again, if it is saved in the right workingspace.

In [None]:
# Download Single Product
product = deserialized_search_results[1]
path = dag.download(product)

In [None]:
# Set Boundingbox for Area inside the Tile.
latmin, latmax = 48.1, 48.35
lonmin, lonmax = 16.1, 16.6
extent = {'lonmin': lonmin, 'latmin': latmin, 'lonmax': lonmax, 'latmax': latmax}

# Folium Map
fmap = folium.Map(location=(np.array([latmin, latmax]).mean(), np.array([lonmin, lonmax]).mean()), zoom_start=9)
folium.Rectangle(bounds=[[latmin, lonmin],[latmax, lonmax]], color="red").add_to(fmap)
folium.GeoJson(
    data=deserialized_search_results[:],  # SearchResult has a __geo_interface__ interface used by folium to get its GeoJSON representation, single results dont work (this [2:3] instead of [2])
    tooltip=folium.GeoJsonTooltip(fields=["title"])
).add_to(fmap)
fmap

In [None]:
# Setting common Parameters for all further image processing
common_params = dict(
    crs=CRS.from_epsg(4326),               # the downloaded images are in 4326, don't reproject them
    resolution=0.0006,                     # but lower their resolution (0.0006 should be 60m in 100km)
    extent=(lonmin,latmin,lonmax,latmax)   # and zoom over/crop the area of interest
)

## Step 4 
### Post Process

#### Loading Bands as Dataset

The Level-2A processing includes a Scene Classification and an Atmospheric Correction applied to Top-Of-Atmosphere (TOA) Level-1C orthoimage products. Level-2A main output is an orthoimage atmospherically corrected, Surface Reflectance product.

Please be aware that "Surface Reflectance (SR)" is a new term that has been introduced to replace the former one: "Bottom of Atmosphere (BOA) reflectance."

Additional outputs are an Aerosol Optical Thickness (AOT) map, a Water Vapour (WV) map and a Scene Classification (SCL) map together with Quality Indicators (QI) for cloud and snow probabilities at 60 m resolution. Level-2A output image products are resampled and generated with an equal spatial resolution for all bands (10 m, 20 m or 60 m). Standard distributed products contain the envelope of all resolutions in three distinct folders:


- 10 m: containing spectral bands 2, 3, 4 , 8, a True Colour Image (TCI) and an AOT and WVP maps resampled from 20 m.

- 20 m: containing spectral bands 1 - 7, the bands 8A, 11 and 12, a True Colour Image (TCI), a Scene Classification (SCL) map and an AOT and WVP map. The band B8 is omitted as B8A provides more precise spectral information.

- 60 m: containing all components of the 20 m product resampled to 60 m and additionally the bands 1 and 9, a True Colour Image (TCI), a Scene Classification (SCL) map and an AOT and WVP map. The cirrus band 10 is omitted, as it does not contain surface information.

In [None]:
# Get a list of all available Bands (assets)
assets = eoload.load_assets(path, res=10, only_spectral=True, include_tci=False)
assets

In [None]:
# Loading multiple Bands into a dataset
ds = eoload.load_single_product(product=product, bands=assets)
ds

In [None]:
single_img = ds.sel(time=dt.datetime(2023, 4, 22), method='nearest')

## Classify

### Preparing the data

In [None]:
path_forest = '../data/shapefiles/forest.geojson'
path_nonforest = '../data/shapefiles/nonforest.geojson'

In [None]:
X_train, X_test, y_train, y_test = eogeom.preprocess_data_to_classify(ds=ds, feature_path=path_forest, nonfeature_path=path_nonforest)

In [None]:
# Naive Bayes
nb = GaussianNB()
nb_test = nb.fit(X_train, y_train)
nb_predict = nb.predict(X_test)


In [None]:
print("NAIVE BAYES: \n "+ classification_report(y_test, nb_predict))

In [None]:
print("NAIVE BAYES: \n ",confusion_matrix(y_test, nb_predict), "\n")


### Applying a classifier to an image

In [None]:
bands = []
for band in ['B04', 'B03', 'B02', 'B08']:
    bands.append(single_img[band].values)
    
image_data = np.stack(bands, axis=2)

In [None]:
num_of_pixels = single_img.sizes['x'] * single_img.sizes['y']
num_of_bands = len(bands)
X_image_data = image_data.reshape(num_of_pixels, num_of_bands)

nb_predict_img = nb.predict(X_image_data)

nb_predict_img = nb_predict_img.reshape(single_img.sizes['y'], single_img.sizes['x'])

In [None]:
cmap = colors.ListedColormap([(1, 0, 0, 0), 'g'])

fig, ax = plt.subplots(figsize=(12,3))
ax.imshow(nb_predict_img, cmap=cmap)
ax.set_title("naive Bayes")
ax.set_axis_off()
plt.show()

In [None]:
# Adding the Classification Array to the Dataset. (An already existing band gets "copied" and its values get overwritten; there might be better ways, but this is short)
ds['NB-forest'] = ds['B02']
ds['NB-forest'].values = np.array([nb_predict_img])
ds

In [None]:
ds.sel(time=dt.datetime(2023, 4, 22), method='nearest')['NB-forest'].plot.imshow()

In [None]:
# Loading the True Color Image
tci = eoload.load_single_product(product=product, bands=['TCI'])
tci = tci.sel(time=dt.datetime(2023, 4, 22), method='nearest')

In [None]:
# Plotting Forest Mask on top of TCI
cmap = colors.ListedColormap([(1, 0, 0, 0), 'C0'])

fig, ax = plt.subplots(figsize=(8,8))
tci['TCI'].plot.imshow(ax=ax, zorder=0)
ds.sel(time=dt.datetime(2023, 4, 22), method='nearest')['NB-forest'].plot.imshow(ax=ax, zorder=1, cmap=cmap, alpha=1)
plt.show()

### NDVI

NDVI and many other indices rely on the normalized difference, represented by the function below

In [None]:
def normalized_difference(a, b):
    return (a - b*1.)/(a + b) # If b in numerator is not multiplied by 1 as a float some weird things happen.

To get the ndvi we need to calculate the normalized difference between the infrared (B08) and the red (B04) band.

In [None]:
# Calculating the NDVI and adding it to the dataset
ndvi = normalized_difference(single_img['B08'], single_img['B04'])
single_img['NDVI'] = ndvi

We can plot the result:

In [None]:
single_img['NDVI'].plot.imshow(cmap='RdYlGn')

NDVI does a good job seperating vegetation from non-vegetation but it can't seperate forest from vegetated cropland, or grassland.