# Data Processing
Shows the different processes for saving datasets in cloud optimized formats leveraging eo cloud tools

In [1]:
# platform libraries
import openeo
from sentinelhub import (SHConfig, SentinelHubRequest, DataCollection, MimeType, CRS, BBox, bbox_to_dimensions, geometry)

# utility libraries
from datetime import date
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import xarray as xr

In [2]:
config = SHConfig()
config.sh_client_id = %env SH_CLIENT_ID
config.sh_client_secret = %env SH_CLIENT_SECRET

In [3]:
conn = openeo.connect('https://jjdxlu8vwl.execute-api.eu-central-1.amazonaws.com/production')
# conn = openeo.connect('https://openeo-dev.sinergise.com/testing')

In [4]:
conn = conn.authenticate_basic(username=config.sh_client_id, password=config.sh_client_secret)

In [5]:
conn.describe_account()

{'default_plan': 'enterprise-s',
 'info': {'sh_userinfo': {'account': '7c7cd3b4-7996-4abb-a465-b41dac60e107',
   'aid': '7c7cd3b4-7996-4abb-a465-b41dac60e107',
   'aud': 'c7210eb0-591c-4b42-b355-a654a71051b5',
   'd': {'1': {'ra': {'rag': 7}, 't': 14001}},
   'did': 1,
   'email': 'hub+mooc@eox.at',
   'exp': 1695900074,
   'jti': '4a94f976-b9e1-4e9b-9cf9-68850c7bdccd',
   'name': 'hub+mooc@eox.at',
   'org': '3d31397f-770a-4f98-9358-84e918491745',
   'sid': 'ccd025cd-adb9-46f6-81db-bc99a73c5c57',
   'sub': '58a6b718-458b-4493-bfe9-14229b83c310'}},
 'name': 'hub+mooc@eox.at',
 'user_id': '58a6b718-458b-4493-bfe9-14229b83c310'}

#### Load collections

In [13]:
collection      = 'SENTINEL2_L2A_SENTINELHUB'
spatial_extent  = {'west':11.020833,'east':11.366667,'south':46.653599,'north':46.954167,'crs':4326}
#spatial_extent  = {'west':11.0,'east':11.1,'south':46.6,'north':46.7,'crs':4326}
# spatial_extent  = {'west':11.08,'east':11.11,'south':46.77,'north':46.79,'crs':4326}
# spatial_extent  = {'west':11.08,'east':11.20,'south':46.6,'north':46.79,'crs':4326}
# spatial_extent  = {'west':11.18,'east':11.21,'south':46.70,'north':46.72,'crs':4326}\

temporal_extent = ["2018-02-01", "2018-06-30"]
# temporal_extent = ["2018-02-01", "2018-02-15"]

#bands           = ['B03', 'B11', 'CLM'] # ['B02', 'B03', 'B04', 'CLM']
bands           = ['B03', 'B11', "CLM"] # ['B02', 'B03', 'B04', 'CLM']

In [14]:
s2 = conn.load_collection(collection,
                          spatial_extent=spatial_extent, # put json here
                          bands=bands,
                          temporal_extent=temporal_extent)

In [15]:
# s2 = s2.save_result(format="GTiff") #ZARR

#### Create process graph

In [16]:
# mask out cloud areas
cloud_mask= s2.band("CLM")
s2cube_masked = s2.mask(cloud_mask)

In [17]:
# compute ndsi 
green = s2cube_masked.band("B03")
swir = s2cube_masked.band("B11")
ndsi = (green - swir) / (green + swir)

In [19]:
# threshold and select only snow areas
snowmap = ndsi > 0.4

In [20]:
snowmap

In [21]:
snowmap = snowmap.save_result(format="GTiff")

In [23]:
snowmap

In [22]:
# show process graph json
snowmap.print_json()

{
  "process_graph": {
    "loadcollection1": {
      "process_id": "load_collection",
      "arguments": {
        "bands": [
          "B03",
          "B11",
          "CLM"
        ],
        "id": "SENTINEL2_L2A_SENTINELHUB",
        "spatial_extent": {
          "west": 11.020833,
          "east": 11.366667,
          "south": 46.653599,
          "north": 46.954167,
          "crs": 4326
        },
        "temporal_extent": [
          "2018-02-01",
          "2018-06-30"
        ]
      }
    },
    "reducedimension1": {
      "process_id": "reduce_dimension",
      "arguments": {
        "data": {
          "from_node": "loadcollection1"
        },
        "dimension": "bands",
        "reducer": {
          "process_graph": {
            "arrayelement1": {
              "process_id": "array_element",
              "arguments": {
                "data": {
                  "from_parameter": "data"
                },
                "index": 2
              },
              "

#### Data Processing and save
- Synchronous Saving
- Asynchronous Saving

##### Asynchronous Saving
Creating a batch job suitable for processing larger study area or longer time step

In [25]:
# Asynchronous saving -- batch processing 
# snowmap = snowmap.save_result(format="GTiff")
job = snowmap.create_job(title="NDSI timeseries 2022") 

In [26]:
job.start_and_wait()

0:00:00 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': send 'start'
0:00:15 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': queued (progress N/A)
0:00:20 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': queued (progress N/A)
0:00:27 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': queued (progress N/A)
0:00:35 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': queued (progress N/A)
0:00:45 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:00:57 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:01:13 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:01:32 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:01:56 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:02:26 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:03:03 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:03:50 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': running (progress N/A)
0:04:48 Job 'acd0daf4-ab44-45cd-9e6d-fd

Failed to parse API error response: 404 '<!doctype html>\n<html lang=en>\n<title>404 Not Found</title>\n<h1>Not Found</h1>\n<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>\n'


0:10:49 Job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf': error (progress N/A)
Your batch job 'acd0daf4-ab44-45cd-9e6d-fdf24e7f1bdf' failed. Error logs:


OpenEoApiError: [404] unknown: <!doctype html>
<html lang=en>
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>


In [None]:
results = job.get_results()
results

In [None]:
results.download_files("data/snowmap/")

##### Synchronous Saving

In [14]:
conn.list_file_formats()

In [15]:
snowmap.download("data/snowmap_syn.tiff")

OpenEoApiError: [400] ProcessGraphComplexity: The process is too complex for synchronous processing. Please use a batch job instead. estimated size of generated output of 139018080 bytes exceeds maximum supported size of 5000000 bytes.

In [14]:
# #ndsi_mean = ndsi.reduce_dimension(dimension="t", reducer = mean)
# ndsi_max = ndsi.max_time()
# ndsi_max

In [18]:
# ndsi_max.download("ndsi_max.tiff")
# ndsi.download("data/snowmap.tiff")

In [16]:
# OpenEoApiError: [400] ProcessGraphComplexity: The process is too complex for synchronous processing. Please use a batch job instead. estimated size of generated output of 34754520 bytes exceeds maximum supported size of 5000000 bytes.
# OpenEoApiError: [504] unknown: Endpoint request timed out

# Others

In [26]:
# # from gtiff_parser import *
# import gtiff_parser

# # URL containing "default.tif" from a response to "/jobs/{job_id}/results"
# input_tiff = "data/snowmap/9c4626d3-a8fc-4b89-a8cc-59b296cbc646/32TPS_6_2/default.tif"

# # URL containing "userdata.json" from a response to "/jobs/{job_id}/results"
# input_metadata = "data/snowmap/job-results.json"

# # folder where the output files will be put
# output_dir = "."

# # output format
# output_format = "zarr"

# output_file_paths = parse_multitemporal_gtiff_to_format(input_tiff, input_metadata, output_dir, output_format)


In [22]:
# job/f382cd4a-f26c-4fc4-ab80-e16764aef2e0/results

In [23]:
job.job_id

'f382cd4a-f26c-4fc4-ab80-e16764aef2e0'

In [22]:
# import rioxarray as rio
# rio.open_rasterio("data/snowmap.tiff")

In [23]:
# import rioxarray as rio
# pth_res = "ndsi_max.tiff"
# res = rio.open_rasterio(pth_res)
# res

In [1]:
# res.values

In [1]:
# res.squeeze().plot.imshow(cmap="hot_r")

In [15]:
# ndsi_max_fin = ndsi_max.save_result(format="GTiff")
# #snowmap_fin.execute_batch()
# ndsi_max_fin_job = ndsi_max_fin.create_job(title = "ndsi_max")
# ndsi_max_fin_job.start_job()

In [24]:
# ndsi_max_fin_job

In [25]:
# OpenEoApiError: [500] Internal: Server error: Failed to download from:
# https://services.sentinel-hub.com/api/v1/batch/process
# with HTTPError:
# 403 Client Error: Forbidden for url: https://services.sentinel-hub.com/api/v1/batch/process
# Server response: "{"status": 403, "reason": "Forbidden", "message": "You are not authorized to perform this action.", "code": "COMMON_INSUFFICIENT_PERMISSIONS"}"