# Data Prep

Preprocess and unify all data.

Before starting to train a ML model, we have to preprocess our data. In this case Sentinel-2 Level-2A imagery is used to generate composites by maximum NDVI across a period of two months. The resulting composites are augmented with indices, like NDVI and all timesteps are reduced into a single raster by deriving statistical parameters, like mean and variance.

The DEM image uploaded beforehand is downsampled to the same resolution as the Sentinel-2 composites by calculating various textile measures.

Then the resulting Sentinel-2 derived raster and DEM derived raster are stacked and a dimensionality reduction is performed. The reduced image can then be used for further processing.

## Define Parameters

In [None]:
# Import Earth Engine API and initialize it
import ee
ee.Initialize()

# Define processing constants
SOURCE = 'COPERNICUS/S2_SR'  # Define dataset source
START = '2017-07-01'  # Define start date (inclusive)
FINISH = '2017-08-31'  # Define end date (exclusive)
REGION = ee.Geometry.Rectangle([12.6545, 47.9291, 12.6762, 47.9423])  # Define region in EPSG:4326

# Define export constants
FILENAME = 'NDVI_composite'  # Name of exported raster
FOLDER = 'Google Earth Engine'  # Name of export folder
SCALE = 10  # Size of pixel in meters
CRS = 'EPSG:32632'  # Coordinate reference system of exported raster
MAX_PIXELS = 1e7  # Maximum number of pixels when exporting

# Define map constants
VIS_PARAMS = {'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 2000, 'gamma': 1}
LAYER_NAME = FILENAME

# Define quality measure
QUALITY = 'NDVI'  # Quality measure name
def addQuality(image):
    quality_band = image.normalizedDifference(['B5', 'B4']).rename([QUALITY])  # NDVI in this case
    return image.addBands(quality_band)

# Show Sentinel-2 (Level-2A) imagery

In [None]:
from IPython.display import Image

# Define dataset filters
filter_date = ee.Filter.date(START, FINISH)
filter_region = ee.Filter.bounds(REGION)
filter = ee.Filter.And(filter_date, filter_region)

# Read dataset
dataset = ee.ImageCollection(SOURCE).filter(filter)

# Create max NDVI pixel composite image
dataset = dataset.map(addQuality)  # Add quality band
image = dataset.qualityMosaic(QUALITY)  # Choose max QUALITY pixels
image = image.select(image.bandNames().remove([QUALITY]))  # Remove QUALITY band

# Set all bands to highest common precision, resample and clip to region
image = image.toUint32()
image = image.reproject(crs=CRS, scale=SCALE)
image = image.clip(REGION)

# Show image
url = image.getThumbUrl({
    'min': 0,
    'max': 1200,
    'bands': ['B4', 'B3', 'B2'],
    'dimensions': 500})
print(url)

# Display the thumbnail land surface temperature in France.
print('\nPlease wait while the thumbnail loads, it may take a moment...')
Image(url=url)

In [None]:
# Get the data from the image as a numpy array
import requests
import io
import numpy as np

url = image.getDownloadUrl({
    'scale': SCALE,
    'region': REGION,
    'format': 'NPY'
    })

response = requests.get(url)
data = np.load(io.BytesIO(response.content))