# SentinelHub Statistical API

## Resources
- SentinelHub Documentation (higher level): https://docs.sentinel-hub.com/api/latest/api/statistical/
- request builder: https://apps.sentinel-hub.com/requests-builder/
- SentinelHub Python documentation, Statistical API <https://sentinelhub-py.readthedocs.io/en/latest/examples/statistical_request.html>

## Notes
Catalog API and Statistical API require a time argument. When using BYOC data, make sure that `fromSensingTime` and `toSensingTime` are not null, otherwise the Statistical API return empty data.

## 1. SentinelHub configuration

In [1]:
# Configure plots for inline use in Jupyter Notebook
%matplotlib inline

import datetime as dt

# Utilities
import boto3
import dateutil
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import os
import rasterio
# Various utilities
import json
import xarray as xr
import shapely.geometry
import IPython.display
import zarr

# Sentinel Hub
from sentinelhub import (
    CRS,
    BBox,
    ByocCollection,
    ByocCollectionAdditionalData,
    ByocCollectionBand,
    ByocTile,
    DataCollection,
    DownloadFailedException,
    MimeType,
    SentinelHubBYOC,
    SentinelHubRequest,
    SentinelHubStatistical,
    SHConfig,
    bbox_to_dimensions,
    os_utils,
)

# The following is not a package. It is a file utils.py which should be in the same folder as this notebook.
# from utils import plot_image

config = SHConfig()
config.instance_id = os.environ.get("SH_INSTANCE_ID")
config.sh_client_id = os.environ.get("SH_CLIENT_ID")
config.sh_client_secret = os.environ.get("SH_CLIENT_SECRET")
config.aws_access_key_id = os.environ.get("username")
config.aws_secret_access_key = os.environ.get("password")

## helper function
def stats_to_df(stats_data):
    """Transform Statistical API response into a pandas.DataFrame"""
    df_data = []

    for single_data in stats_data["data"]:
        df_entry = {}
        is_valid_entry = True

        df_entry["interval_from"] = parse_time(single_data["interval"]["from"]).date()
        df_entry["interval_to"] = parse_time(single_data["interval"]["to"]).date()

        for output_name, output_data in single_data["outputs"].items():
            for band_name, band_values in output_data["bands"].items():
                band_stats = band_values["stats"]
                if band_stats["sampleCount"] == band_stats["noDataCount"]:
                    is_valid_entry = False
                    break

                for stat_name, value in band_stats.items():
                    col_name = f"{output_name}_{band_name}_{stat_name}"
                    if stat_name == "percentiles":
                        for perc, perc_val in value.items():
                            perc_col_name = f"{col_name}_{perc}"
                            df_entry[perc_col_name] = perc_val
                    else:
                        df_entry[col_name] = value

        if is_valid_entry:
            df_data.append(df_entry)

    return pd.DataFrame(df_data)


## 2. Define AOI and visualize it

### Define bounding box

In [2]:
bbox = BBox(bbox=[11.335796, 62.870072, 12.541001, 63.444063], crs=CRS.WGS84)

resolution = 60
bbox = BBox(bbox=bbox, crs=CRS.WGS84)
size = bbox_to_dimensions(bbox, resolution=resolution)
print(f"Image shape at {resolution} m resolution: {size} pixels")

IPython.display.GeoJSON(shapely.geometry.box(*bbox).__geo_interface__)

Image shape at 60 m resolution: (962, 1111) pixels


<IPython.display.GeoJSON object>

## 3. Prepare collections
### 3.1 BYOC collection
- Collection name: UrbanAtlas2018_10m_raster
- CollectionId: bc4099db-f686-4e66-99a6-387a11eb2067

In [7]:
collection_id_UA = "78545a11-1e57-4ed0-a3ce-68c2e305216b"
collection_name_UA = "UrbanAtlas2018_10m_raster"
data_collection_UA = DataCollection.define_byoc(collection_id_UA, name=collection_name_UA, is_timeless = True)
data_collection_UA

<DataCollection.UrbanAtlas2018_10m_raster: DataCollectionDefinition(
  api_id: byoc-78545a11-1e57-4ed0-a3ce-68c2e305216b
  catalog_id: byoc-78545a11-1e57-4ed0-a3ce-68c2e305216b
  wfs_id: byoc-78545a11-1e57-4ed0-a3ce-68c2e305216b
  collection_type: BYOC
  collection_id: 78545a11-1e57-4ed0-a3ce-68c2e305216b
  is_timeless: True
  has_cloud_coverage: False
)>

CheCheck that `fromSensingTime` and `toSensingTime` are not null

In [4]:
# list band names of the collection
byoc = SentinelHubBYOC(config=config)
collection = byoc.get_collection(collection_id_UA)
collection["additionalData"]

{'bands': {'B1': {'bitDepth': 16,
   'source': 'B1',
   'bandIndex': 1,
   'sampleFormat': 'UINT'}},
 'maxMetersPerPixel': 6400.0,
 'extent': {'type': 'Polygon',
  'coordinates': [[[-49.396508783, 26.176870268],
    [-49.396508783, 70.57381085],
    [67.130764957, 70.57381085],
    [67.130764957, 26.176870268],
    [-49.396508783, 26.176870268]]]},
 'fromSensingTime': '2018-01-01T00:00:00Z',
 'toSensingTime': '2018-01-01T00:00:00Z',
 'hasSensingTimes': 'YES'}

In [5]:
# copy CollectionId from FAIRiCube catalog https://catalog.fairicube.eu/
collection_id_popdens = "b468089b-2627-4787-b984-89c10434f6c6"
collection_name_popdens = "Population_density"
# define collection
data_collection_popdens = DataCollection.define_byoc(collection_id_popdens, name=collection_name_popdens)
data_collection_popdens

<DataCollection.Population_density: DataCollectionDefinition(
  api_id: byoc-b468089b-2627-4787-b984-89c10434f6c6
  catalog_id: byoc-b468089b-2627-4787-b984-89c10434f6c6
  wfs_id: byoc-b468089b-2627-4787-b984-89c10434f6c6
  collection_type: BYOC
  collection_id: b468089b-2627-4787-b984-89c10434f6c6
  is_timeless: False
  has_cloud_coverage: False
)>

## Compute statistics

In [12]:
evalscript = """

//VERSION=3
function setup() {
  return {
    input: [{
    bands: [
    "B1",
    "dataMask"
    ]
    }],
    output: [{ 
        id: "UA",
        bands: 1,
        sampleType: "UINT16" // raster format will be UINT16
        },
        {
        id: "dataMask",
        bands: 1}]
    
  };
}

function evaluatePixel(sample) {
  return {
    UA: [sample.B1],
    dataMask: [sample.dataMask]
    };
}
"""

request = SentinelHubStatistical(
    aggregation=SentinelHubStatistical.aggregation(
        evalscript=evalscript,
        time_interval=("2018-01-01", "2018-05-01"),
        aggregation_interval="P1D",
        size=size
    ),
    input_data=[SentinelHubStatistical.input_data(DataCollection.UrbanAtlas2018_10m_raster)],
    bbox=bbox,
    config=config,
)

data = request.get_data()[0]
data

{'data': [{'interval': {'from': '2020-05-01T00:00:00Z',
    'to': '2020-05-02T00:00:00Z'},
   'outputs': {'populationDensity': {'bands': {'B0': {'stats': {'min': 0.0,
        'max': 27.0,
        'mean': 2.2661188156239094,
        'stDev': 3.91440397395676,
        'sampleCount': 1068782,
        'noDataCount': 0}}}}}}],
 'status': 'OK'}

### Compute histograms
For other example uses of Statistical API see <https://docs.sentinel-hub.com/api/latest/api/statistical/examples/> 

In [14]:
calculations = {
    "default": {
        "histograms": {
            "default": {
                "binWidth": "10",
                # "lowEdge": "0",
                # "highEdge": "101" #histogram interval is [lowEdge, highEdge) that is, highEdge value excluded
            }
        }
    }
}
    
evalscript = """

//VERSION=3
function setup() {
  return {
    input: [{
    bands: [
    "populationDensity",
    "dataMask"
    ]
    }],
    output: [{ 
        id: "populationDensity",
        bands: 1,
        sampleType: "UINT16" // raster format will be UINT16
        },
        {
        id: "dataMask",
        bands: 1}]
    
  };
}

function evaluatePixel(sample) {
  return {
    populationDensity: [sample.populationDensity],
    dataMask: [sample.dataMask]
    };
}
"""

request = SentinelHubStatistical(
    aggregation=SentinelHubStatistical.aggregation(
        evalscript=evalscript,
        time_interval=("2020-01-01", "2020-12-01"),
        aggregation_interval="P1D",
        size=size
    ),
    input_data=[SentinelHubStatistical.input_data(data_collection_popdens)],
    bbox=bbox,
    calculations=calculations,
    config=config,
)

data = request.get_data()[0]
data

{'data': [{'interval': {'from': '2020-05-01T00:00:00Z',
    'to': '2020-05-02T00:00:00Z'},
   'outputs': {'populationDensity': {'bands': {'B0': {'stats': {'min': 0.0,
        'max': 27.0,
        'mean': 2.2661188156239094,
        'stDev': 3.91440397395676,
        'sampleCount': 1068782,
        'noDataCount': 0},
       'histogram': {'bins': [{'lowEdge': 0, 'highEdge': 10, 'count': 1042261},
         {'lowEdge': 10, 'highEdge': 20, 'count': 1171},
         {'lowEdge': 20, 'highEdge': 30, 'count': 25350}],
        'overflowCount': 0,
        'underflowCount': 0}}}}}}],
 'status': 'OK'}