# SentinelHub Statistical API

## Resources
- SentinelHub Documentation (higher level): https://docs.sentinel-hub.com/api/latest/api/statistical/
- request builder: https://apps.sentinel-hub.com/requests-builder/
- SentinelHub Python documentation, Statistical API <https://sentinelhub-py.readthedocs.io/en/latest/examples/statistical_request.html>

## Notes
Catalog API and Statistical API require a time argunent. As a result, requests of BYOC collections without `Sensing time` return empty data.

## 1. SentinelHub configuration

In [1]:
# Configure plots for inline use in Jupyter Notebook
%matplotlib inline

import datetime as dt

# Utilities
import boto3
import dateutil
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import os
import rasterio
# Various utilities
import json
import xarray as xr
import shapely.geometry
import IPython.display
import zarr

# Sentinel Hub
from sentinelhub import (
    CRS,
    BBox,
    ByocCollection,
    ByocCollectionAdditionalData,
    ByocCollectionBand,
    ByocTile,
    DataCollection,
    DownloadFailedException,
    MimeType,
    SentinelHubBYOC,
    SentinelHubRequest,
    SentinelHubStatistical,
    SHConfig,
    bbox_to_dimensions,
    os_utils,
)

# The following is not a package. It is a file utils.py which should be in the same folder as this notebook.
# from utils import plot_image

config = SHConfig()
config.instance_id = os.environ.get("SH_INSTANCE_ID")
config.sh_client_id = os.environ.get("SH_CLIENT_ID")
config.sh_client_secret = os.environ.get("SH_CLIENT_SECRET")
config.aws_access_key_id = os.environ.get("username")
config.aws_secret_access_key = os.environ.get("password")

## helper function
def stats_to_df(stats_data):
    """Transform Statistical API response into a pandas.DataFrame"""
    df_data = []

    for single_data in stats_data["data"]:
        df_entry = {}
        is_valid_entry = True

        df_entry["interval_from"] = parse_time(single_data["interval"]["from"]).date()
        df_entry["interval_to"] = parse_time(single_data["interval"]["to"]).date()

        for output_name, output_data in single_data["outputs"].items():
            for band_name, band_values in output_data["bands"].items():
                band_stats = band_values["stats"]
                if band_stats["sampleCount"] == band_stats["noDataCount"]:
                    is_valid_entry = False
                    break

                for stat_name, value in band_stats.items():
                    col_name = f"{output_name}_{band_name}_{stat_name}"
                    if stat_name == "percentiles":
                        for perc, perc_val in value.items():
                            perc_col_name = f"{col_name}_{perc}"
                            df_entry[perc_col_name] = perc_val
                    else:
                        df_entry[col_name] = value

        if is_valid_entry:
            df_data.append(df_entry)

    return pd.DataFrame(df_data)


## 2. Define AOI and visualize it

### Define bounding box

In [2]:
x1 = 6  # degree
y1 = 49.5  # degree
x2 = 6.3 # degree
y2 = 49.7  # degree

# bbox_lux = x1, y1, x2, y2
bbox_lux = BBox(bbox=[11.335796, 62.870072, 12.541001, 63.444063], crs=CRS.WGS84)

resolution = 60
lux_bbox = BBox(bbox=bbox_lux, crs=CRS.WGS84)
lux_size = bbox_to_dimensions(lux_bbox, resolution=resolution)
print(f"Image shape at {resolution} m resolution: {lux_size} pixels")

IPython.display.GeoJSON(shapely.geometry.box(*bbox_lux).__geo_interface__)

Image shape at 60 m resolution: (962, 1111) pixels


<IPython.display.GeoJSON object>

## 3. Prepare collections
### 3.1 BYOC collection
- Collection name: UrbanAtlas2018_10m_raster
- CollectionId: bc4099db-f686-4e66-99a6-387a11eb2067

In [3]:
collection_id_UA = "78545a11-1e57-4ed0-a3ce-68c2e305216b"
collection_name_UA = "UrbanAtlas2018_10m_raster"
data_collection_UA = DataCollection.define_byoc(collection_id_UA, name=collection_name_UA, is_timeless = True)
data_collection_UA

<DataCollection.UrbanAtlas2018_10m_raster: DataCollectionDefinition(
  api_id: byoc-78545a11-1e57-4ed0-a3ce-68c2e305216b
  catalog_id: byoc-78545a11-1e57-4ed0-a3ce-68c2e305216b
  wfs_id: byoc-78545a11-1e57-4ed0-a3ce-68c2e305216b
  collection_type: BYOC
  collection_id: 78545a11-1e57-4ed0-a3ce-68c2e305216b
  is_timeless: True
  has_cloud_coverage: False
)>

In [4]:
# list band names of the collection
byoc = SentinelHubBYOC(config=config)
collection = byoc.get_collection(collection_id_UA)
collection["additionalData"]

{'bands': {'B1': {'bitDepth': 16,
   'source': 'B1',
   'bandIndex': 1,
   'sampleFormat': 'UINT'}},
 'maxMetersPerPixel': 6400.0,
 'extent': {'type': 'Polygon',
  'coordinates': [[[-49.396508783, 26.176870268],
    [-49.396508783, 70.57381085],
    [67.130764957, 70.57381085],
    [67.130764957, 26.176870268],
    [-49.396508783, 26.176870268]]]},
 'fromSensingTime': '2018-01-01T00:00:00Z',
 'toSensingTime': '2018-01-01T00:00:00Z',
 'hasSensingTimes': 'YES'}

In [5]:
# copy CollectionId from FAIRiCube catalog https://catalog.fairicube.eu/
collection_id_popdens = "b468089b-2627-4787-b984-89c10434f6c6"
collection_name_popdens = "Population_density"
# define collection
data_collection_popdens = DataCollection.define_byoc(collection_id_popdens, name=collection_name_popdens)
data_collection_popdens

<DataCollection.Population_density: DataCollectionDefinition(
  api_id: byoc-b468089b-2627-4787-b984-89c10434f6c6
  catalog_id: byoc-b468089b-2627-4787-b984-89c10434f6c6
  wfs_id: byoc-b468089b-2627-4787-b984-89c10434f6c6
  collection_type: BYOC
  collection_id: b468089b-2627-4787-b984-89c10434f6c6
  is_timeless: False
  has_cloud_coverage: False
)>

In [6]:
# simple evalscript to check if single request works
evalscript_test = """

//VERSION=3
function setup() {
  return {
    input: [{
    bands: [
    "B1",
    "dataMask"
    ]
    }],
    output: [{ 
    id: "UA",
        bands: 1,
        sampleType: "UINT16" // raster format will be UINT16
        },
        {
        id: "dataMask",
        bands: 1}]
    
  };
}

function evaluatePixel(sample) {
  return {
    UA: [sample.B1],
    dataMask: [sample.dataMask]
    };
}
"""

request = SentinelHubStatistical(
    aggregation=SentinelHubStatistical.aggregation(
        evalscript=evalscript_test,
        time_interval=("2018-01-01", "2018-05-01"),
        aggregation_interval="P1D",
        size=lux_size
    ),
    input_data=[SentinelHubStatistical.input_data(DataCollection.UrbanAtlas2018_10m_raster)],
    bbox=bbox_lux,
    config=config,
)

data = request.get_data()[0]
data

{'data': [{'interval': {'from': '2018-01-01T00:00:00Z',
    'to': '2018-01-02T00:00:00Z'},
   'outputs': {'UA': {'bands': {'B0': {'stats': {'min': 8617.0,
        'max': 53249.0,
        'mean': 31694.024006638025,
        'stDev': 3296.151831826919,
        'sampleCount': 1068782,
        'noDataCount': 962728}}}}}}],
 'status': 'OK'}

### Query catalog

In [7]:
from sentinelhub import SentinelHubCatalog, BBox, Geometry, SHConfig, CRS, DataCollection
catalog = catalog = SentinelHubCatalog(config=config)
search_iterator = catalog.search(
  DataCollection.UrbanAtlas2018_10m_raster,  
  geometry=bbox_lux,
  time=('2018-01-01', '2018-07-06'),    
)

results = list(search_iterator)
results

[{'stac_version': '1.0.0',
  'stac_extensions': ['https://stac-extensions.github.io/projection/v1.0.0/schema.json'],
  'id': 'b260234b-7b92-4c30-b92b-69841bd5f406',
  'type': 'Feature',
  'geometry': {'type': 'Polygon',
   'crs': {'type': 'name',
    'properties': {'name': 'urn:ogc:def:crs:OGC::CRS84'}},
   'coordinates': [[[9.101425344029629, 61.53030960907723],
     [27.464329746195915, 60.46806507295995],
     [34.15332651701756, 69.03487939129583],
     [8.725506175707775, 70.5697570204103],
     [9.101425344029629, 61.53030960907723]]]},
  'bbox': [8.725506175707775,
   60.46806507295995,
   34.15332651701756,
   70.5697570204103],
  'properties': {'datetime': '2018-01-01T00:00:00Z',
   'proj:epsg': 3035,
   'proj:bbox': [4273020.0, 4270230.0, 5273020.0, 5270230.0],
   'proj:geometry': {'type': 'Polygon',
    'crs': {'type': 'name',
     'properties': {'name': 'urn:ogc:def:crs:EPSG::3035'}},
    'coordinates': [[[4273019.999997908, 4270229.999845818],
      [5273020.000042262, 427