# Data Prep

Preprocess and unify all data.

Before starting to train a ML model, we have to preprocess our data. In this case Sentinel-2 Level-2A imagery is used to generate composites by maximum NDVI across a period of two months. The resulting composites are augmented with indices, like NDVI and all timesteps are reduced into a single raster by deriving statistical parameters, like mean and variance.

The DEM image uploaded beforehand is downsampled to the same resolution as the Sentinel-2 composites by calculating various textile measures.

Then the resulting Sentinel-2 derived raster and DEM derived raster are stacked and a dimensionality reduction is performed. The reduced image can then be used for further processing.

## Define Parameters

In [24]:
# Import Earth Engine API and initialize it
import ee
ee.Initialize()

# Define processing constants
SOURCE = 'COPERNICUS/S2_SR'  # Define dataset source
TIMESERIES_MIDDLE = '2019-06-01'  # Define middle of timeseries
TIMESERIES_DURATION = 365  # Define duration of timeseries in days
NUM_COMPOSITES = 6  # Define amount of composites in the timeseries
REGION = ee.Geometry.Rectangle([12.6545, 47.9291, 12.6762, 47.9423])  # Define region in EPSG:4326

# Define export constants
FILENAME = 'NDVI_composite'  # Name of exported raster
FOLDER = 'Google Earth Engine'  # Name of export folder
SCALE = 10  # Size of pixel in meters
CRS = 'EPSG:32632'  # Coordinate reference system of exported raster
MAX_PIXELS = 1e7  # Maximum number of pixels when exporting

# Define map constants
VIS_PARAMS = {'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 2000, 'gamma': 1}
LAYER_NAME = FILENAME

# Define quality measure
QUALITY = 'NDVI'  # Quality measure name
def addQuality(image):
    quality_band = image.normalizedDifference(['B5', 'B4']).rename([QUALITY])  # NDVI in this case
    return image.addBands(quality_band)

# Calculate Timeseries Windows for calculating Composites

In [25]:
from datetime import datetime, timedelta

def generate_timewindows(middle_date, num_windows, timeseries_duration=365.242):
    # Calculate the start date of the timeseries
    middle_date = datetime.strptime(middle_date, '%Y-%m-%d')
    current_start = middle_date - timedelta(days=timeseries_duration / 2)

    # Calculate the duration of each timewindow (in days)
    window_duration = timeseries_duration / num_windows
    
    # Initialize a list to store the timewindows as tuples
    timewindows = []
    for _ in range(num_windows):
        # Calculate the start and end dates of each timewindow
        start_date = current_start
        end_date = current_start + timedelta(days=window_duration)

        start_date = start_date.strftime('%Y-%m-%d')
        end_date = end_date.strftime('%Y-%m-%d')
        timewindow = (start_date, end_date)
        
        # Append the timewindow as a tuple (start, end) to the list
        timewindows.append(timewindow)
        
        # Move the middle_date to the next timewindow
        current_start += timedelta(days=window_duration)
    
    return timewindows

timewindows = generate_timewindows(TIMESERIES_MIDDLE, NUM_COMPOSITES, TIMESERIES_DURATION)

# Show Sentinel-2 (Level-2A) imagery

In [29]:
from IPython.display import Image

composites = []
for start, end in timewindows:
    # Define dataset filters
    filter_date = ee.Filter.date(start, end)  # inclusive start, exclusive end
    filter_region = ee.Filter.bounds(REGION)

    # Read dataset
    dataset = ee.ImageCollection(SOURCE).filter(filter_date).filter(filter_region)

    # Create max NDVI pixel composite image
    dataset = dataset.map(addQuality)  # Add quality band
    composite = dataset.qualityMosaic(QUALITY)  # Choose max QUALITY pixels
    composite = composite.select(composite.bandNames().remove([QUALITY]))  # Remove QUALITY band

    # Set all bands to highest common precision, resample and clip to region
    composite = composite.toUint32()
    composite = composite.reproject(crs=CRS, scale=SCALE)
    composite = composite.clip(REGION)

    composites.append(composite)

composites = ee.ImageCollection(composites)#.min()#.mean()
composites = composites.reduce(ee.Reducer.variance())
composites = composites.toUint32()
composites = composites.reproject(crs=CRS, scale=SCALE)
print(composites.getInfo())

# TODO: Filter by Quality Indicators (QI) for cloud and snow probabilities at 60 m resolution

# Show image
Image(url=composites.getThumbUrl({
    'min': 0,
    'max': 20000000,
    'bands': ['B4_variance', 'B3_variance', 'B2_variance'],
    'region': REGION,
    'dimensions': 500}))

{'type': 'Image', 'bands': [{'id': 'B1_variance', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 4294967295}, 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 0, 0, -10, 0]}, {'id': 'B2_variance', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 4294967295}, 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 0, 0, -10, 0]}, {'id': 'B3_variance', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 4294967295}, 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 0, 0, -10, 0]}, {'id': 'B4_variance', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 4294967295}, 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 0, 0, -10, 0]}, {'id': 'B5_variance', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 4294967295}, 'crs': 'EPSG:32632', 'crs_transform': [10, 0, 0, 0, -10, 0]}, {'id': 'B6_variance', 'data_type': {'type': 'PixelType', 'precision': 'int', 'min': 0, 'max': 4294967295}, 'crs': 'EPSG:32632',

## Download NumPy Array

In [None]:
# Get the data from the image as a numpy array
import requests
import io
import numpy as np

url = composites.getDownloadUrl({
    'scale': SCALE,
    'region': REGION,
    'format': 'NPY'
    })

response = requests.get(url)
data = np.load(io.BytesIO(response.content))

## Downsample DEM

In [None]:
dem = ee.Image('projects/leaf-type-mixture/assets/DEM')
dem_mean = dem.reduceResolution(reducer=ee.Reducer.mean(), maxPixels=1024).reproject(crs=CRS, scale=SCALE)
Image(url=dem_mean.getThumbUrl({
    'min': 0,
    'max': 40,
    'bands': ['b1'],
    'region': REGION,
    'dimensions': 500}))