## Data Access demo

## Data Access v1

[OWSLib](https://geopython.github.io/OWSLib) is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models. In this demo we’ll work with the CSW, WMS and WCS interfaces.

In [None]:
from owslib.csw import CatalogueServiceWeb
from owslib.wms import WebMapService
from owslib.wcs import WebCoverageService
import lxml.etree
import requests
from tifffile import imread
from io import BytesIO, StringIO

### Data Discovery

The user has already discovered the dataset to use by using the OWSLib CSW client

In [None]:
base_domain = "develop.eoepca.org"
workspace_prefix = "ws"
system_endpoint = f'https://resource-catalogue.{base_domain}/csw'

In [None]:
csw = CatalogueServiceWeb(system_endpoint,timeout=30)

In [None]:
scene_id="S2B_MSIL2A_20190910T095029_N0500_R079_T33TXN_20230430T083712.SAFE"

In [None]:
csw.getrecordbyid(id=[scene_id])

In [None]:
links = csw.records[scene_id].references

In [None]:
for link in links:
    scheme = link['scheme']
    if scheme and 'WMS' in scheme:
        wms_endpoint=link['url']
        print(link['url'])

### Data Visualization

After discovering the dataset, the user can identify the WMS link and use the OWSLib WMs client to visualize the dataset

In [None]:
wms = WebMapService(wms_endpoint, version='1.3.0')

The list of layers available to the WMS service:

In [None]:
list(wms.contents)

Along with some WMS layer metadata:

In [None]:
wms[scene_id].title

In [None]:
wms[scene_id].boundingBoxWGS84

In [None]:
[op.name for op in wms.operations]

In [None]:
wms[scene_id].styles

In [None]:
wms.getOperationByName('GetMap').formatOptions

The user can visualize the WMS GetMap request from matplotlib

In [None]:
%matplotlib inline
import os, sys
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

def getMap(wms,layerName,bbox,filename,style=None):
    wms.getOperationByName('GetMap').formatOptions
    img = wms.getmap(layers=[layerName],
                 styles=[style] if style is not None else None,
                 size=(600,300),
                 srs='EPSG:4326',
                 bbox=bbox,
                 format='image/png',
                 transparent=True)

    tmpfile = open(filename,'wb')
    tmpfile.write(img.read())
    tmpfile.close()

In [None]:
getMap(wms,f"{scene_id}__TRUE_COLOR",wms[scene_id].boundingBoxWGS84, 'rgb.png')
image1=mpimg.imread('rgb.png')
fig = plt.figure(figsize=(12,7))
img1=plt.imshow(image1,extent=wms[scene_id].boundingBoxWGS84,aspect='auto')
plt.show()

False color composite using Near Infrared, Red, Green Bands

In [None]:
getMap(wms,f"{scene_id}__FALSE_COLOR",wms[scene_id].boundingBoxWGS84, 'nirrg.png')
image1=mpimg.imread('nirrg.png')
fig = plt.figure(figsize=(12,7))
img1=plt.imshow(image1,extent=wms[scene_id].boundingBoxWGS84,aspect='auto')
plt.show()

In [None]:
getMap(wms,f"{scene_id}__NDVI",wms[scene_id].boundingBoxWGS84, 'ndvi.png', style='summer')
image1=mpimg.imread('ndvi.png')
fig = plt.figure(figsize=(12,7))
img1=plt.imshow(image1,extent=wms[scene_id].boundingBoxWGS84,aspect='auto')
plt.show()

In [None]:
getMap(wms,f"{scene_id}__TRUE_COLOR",wms[scene_id].boundingBoxWGS84, 'rgb.png')
image1=mpimg.imread('rgb.png')
fig = plt.figure(figsize=(12,7))
img1=plt.imshow(image1,extent=wms[scene_id].boundingBoxWGS84,aspect='auto')
plt.show()

Alternatively, the Folium library is available to create a Leaflet map with the WMS layer

In [None]:
import os
import folium

print(folium.__version__)

import folium.plugins.timestamped_wmstilelayer

In [None]:
centre_lat=wms[scene_id].boundingBoxWGS84[1]+(wms[scene_id].boundingBoxWGS84[3]-wms[scene_id].boundingBoxWGS84[1])/2
centre_long=wms[scene_id].boundingBoxWGS84[0]+(wms[scene_id].boundingBoxWGS84[2]-wms[scene_id].boundingBoxWGS84[0])/2
m = folium.Map(location=[centre_lat, centre_long], zoom_start=7, tiles=None)

folium.raster_layers.WmsTileLayer(
    url="https://a.tiles.maps.eox.at",
    layers='terrain-light_3857',
    name='terrain-light',
    fmt='image/jpeg',
).add_to(m)


folium.raster_layers.WmsTileLayer(
    url=wms_endpoint.partition("?")[0],
    layers=f"{scene_id}__TRUE_COLOR",
    name=f"{scene_id}__TRUE_COLOR",
    fmt='image/png',
    transparent=True,
    overlay=True,
    control=True,
).add_to(m)

folium.raster_layers.WmsTileLayer(
    url=wms_endpoint.partition("?")[0],
    layers=f"{scene_id}__outlines",
    name=f"{scene_id}__outlines",
    fmt='image/png',
    transparent=True,
    overlay=True,
    control=True,
).add_to(m)

folium.LayerControl().add_to(m)

m

## Showing contents of a whole collection for a given timespan.

In [None]:
m = folium.Map(location=[centre_lat, centre_long], zoom_start=6, tiles=None)

folium.raster_layers.WmsTileLayer(
    url="https://a.tiles.maps.eox.at",
    layers='terrain-light_3857',
    name='terrain-light',
    fmt='image/jpeg',
).add_to(m)

folium.raster_layers.WmsTileLayer(
    url=wms_endpoint.partition("?")[0],
    layers='S2L2A__outlines',
    name='Outlines',
    fmt='image/png',
    transparent=True,
    overlay=True,
    control=True,
    time="2019-09-10T00:00:00Z/2019-09-11T00:00:00Z",
).add_to(m)


folium.LayerControl().add_to(m)

m

### Data Download

After discovering and visualizing the dataset, the user can identify the WCS link from the catalogue record and use the OWSLib WCS client to download the dataset

In [None]:
tree = None
for link in links:
    scheme = link['scheme']
    if scheme and 'WCS' in scheme:
        print(link['url'])
        wcs_endpoint=link['url'].split('?')[0]
        wcs_id=link['url'].split('eoid=')[1]
        tree = lxml.etree.fromstring(requests.get(link['url']).content)
        break

coverage_ids = tree.xpath('wcs:CoverageDescriptions/wcs:CoverageDescription/@gml:id', namespaces=tree.nsmap)
coverage_ids

In [None]:
getcoverage_request = wcs_endpoint + '?service=WCS&version=2.0.1&request=GetCoverage&coverageid=' + coverage_ids[1] + '&scaleSize=x(100),y(100)&format=image/tiff'
response = requests.get(getcoverage_request)
response.raise_for_status()

content = response.content
img1 = plt.imshow(imread(BytesIO(content)),extent=[23.4,24.5,37.8,38.8],aspect='auto')
plt.show()

## Data Access v2

In [None]:
base_domain = "develop.eoepca.org"

## Data Discovery (STAC)

In [None]:
import json
import requests
from folium import Map, TileLayer
from pystac_client import Client

#### Define a STAC query and validate its returns

In [None]:
# See STAC API docs at https://eoapi.develop.eoepca.org/stac/api.html
STAC_API_URL = f"https://eoapi.{base_domain}/stac"
RASTER_API_URL = f"https://eoapi.{base_domain}/raster"
COLLECTION_ID = "sentinel-2-iceland"

In [None]:
STAC_API_URL

In [None]:
# Search against the STAC API
catalog = Client.open(STAC_API_URL)
catalog.id

In [None]:
# Select a collection
collections_filter = {
    "op": "=",
    "args": [{"property": "collection"}, COLLECTION_ID],
}

In [None]:
# Define your area of interest
AOI = {
  "type": "Polygon",
  "coordinates": [
    [
      [-21.470015412404706, 63.55594801099713],
      [-20.336567910645556, 63.55594801099713],
      [-20.336567910645556, 64.17209253282897],
      [-21.470015412404706, 64.17209253282897],
      [-21.470015412404706, 63.55594801099713]
    ]
  ]
}
spatial_filter = {"op": "s_intersects", "args": [{"property": "geometry"}, AOI]}

In [None]:
# Define your time interval of interest
TIME_INTERVAL = ["2023-07-01T00:00:00Z", "2023-07-31T23:59:59Z"]
temporal_filter = {
    "op": "t_intersects",
    "args": [{"property": "datetime"}, {"interval": TIME_INTERVAL}],
}

In [None]:
# Additional filters can be applied for other search criteria like <= maximum eo:cloud_cover in item properties
cloud_filter = {"op": "<=", "args": [{"property": "eo:cloud_cover"}, 100]}

In [None]:
# Define your search with CQL2 syntax
SEARCH_BODY = {
    "filter-lang": "cql2-json",
    "limit": 20,
    "sortby": [{"direction": "desc", "field": "properties.datetime"}],
    "context": "on",  # add context for a summary of matched results
    "filter": {
        "op": "and",
        "args": [collections_filter, spatial_filter, temporal_filter, cloud_filter],
    },
}

In [None]:
# Note this search body can also be used for a stac item search

response = requests.post(
    f"{STAC_API_URL}/search",
    json=SEARCH_BODY
)
response.raise_for_status()
stac_items = response.json()

# Check how many items were matched in search
print("returned {numberReturned} out of {numberMatched} matching items".format(**stac_items))

## Visualize a Single Item With Dynamic Tiles (XYZ)

In [None]:
first_item = stac_items["features"][0]

In [None]:
first_item["id"]

In [None]:
collection_id = COLLECTION_ID
item_id = first_item["id"]
response = requests.get(
    f"{RASTER_API_URL}/collections/{collection_id}/items/{item_id}/tilejson.json",
    params={
        # Info to add to the tilejson response
        "minzoom": 9,
        "maxzoom": 12,
        "assets": ["red", "green", "blue"],
        "color_formula": "Gamma RGB 5 Saturation 0.8 Sigmoidal RGB 20 0.35",
        "nodata": 0,
    },
)
response.raise_for_status()
tilejson = response.json()

In [None]:
tilejson

In [None]:
map_center = tilejson["center"][:2][::-1]

In [None]:
# Set up a map located w/in event bounds
m = Map(
    tiles="OpenStreetMap",
    location=map_center,
    zoom_start=tilejson["minzoom"],
    min_zoom=0,
    max_zoom=20
)

# Add the formatted map layer
map_layer = TileLayer(
    tiles=tilejson["tiles"][0],
    attr="Contains modified Copernicus Sentinel-2 data",
)
map_layer.add_to(m)
m

### Visualize a Spatio-Temporal Mosaic With Dynamic Tiles (XYZ)

In [None]:
# Register a mosaic for your STAC query
response = requests.post(
    f"{RASTER_API_URL}/searches/register",
    json=SEARCH_BODY,
)
response.raise_for_status()
mosaic = response.json()
print(json.dumps(mosaic, indent=2))

In [None]:
# Get base url for tiler from the register mosaic request
tiles_href = next(
    link["href"] for link in mosaic["links"] if link["rel"] == "tilejson"
)
# Set the tileMatrixSetId
tiles_href = tiles_href.format(**{"tileMatrixSetId": "WebMercatorQuad"})

In [None]:
response = requests.get(
    tiles_href,
    params={
        # Info to add to the tilejson response
        "minzoom": 9,
        "maxzoom": 12,
        "assets": ["red", "green", "blue"],
        "color_formula": "Gamma RGB 5 Saturation 0.8 Sigmoidal RGB 20 0.35",
        "nodata": 0,
    },
)
response.raise_for_status()
tilejson = response.json()

In [None]:
tilejson

In [None]:
# Set up a map located w/in event bounds
m = Map(
    tiles="OpenStreetMap",
    location=map_center,
    zoom_start=tilejson["minzoom"],
    min_zoom=0,
    max_zoom=20
)

# Add the formatted map layer
map_layer = TileLayer(
    tiles=tilejson["tiles"][0],
    attr="Contains modified Copernicus Sentinel-2 data",
)
map_layer.add_to(m)
m

## Access a multidimensional dataset (NetCDF, Zarr)

In [None]:
import json

import requests
from folium import Map, TileLayer

In [None]:
base_domain = "develop.eoepca.org"

In [None]:
MULTIDIM_API_URL = f"https://eoapi.{base_domain}/multidim/md"
CEDA_EOCIS_NETCDF = "https://dap.ceda.ac.uk/neodc/eocis/data/global_and_regional/sea_surface_temperature/CDR_v3/Analysis/L4/v3.0.1/2024/06/22/20240622120000-ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_ICDR3.0-v02.0-fv01.0.nc"

In [None]:
response = requests.get(f"{MULTIDIM_API_URL}/variables", params={"url": CEDA_EOCIS_NETCDF})
response.raise_for_status()
print(response.json())

In [None]:
CEDA_EOCIS_VARIABLE = "sea_ice_fraction"
assert CEDA_EOCIS_VARIABLE in response.json()

In [None]:
response = requests.get(
    f"{MULTIDIM_API_URL}/info",
    params={
        "url": CEDA_EOCIS_NETCDF,
        "variable": CEDA_EOCIS_VARIABLE,
        "decode_times": True,
        "show_times": True,
    }
)
response.raise_for_status()
print(json.dumps(response.json(), indent=4))

In [None]:
tilejson = requests.get(
    f"{MULTIDIM_API_URL}/WebMercatorQuad/tilejson.json",
    params = {
        "url": CEDA_EOCIS_NETCDF,
        "variable": CEDA_EOCIS_VARIABLE,
        "rescale": "0,1",
        "colormap_name": "blues_r",
    }
).json()
print(tilejson)

bounds = tilejson["bounds"]

# The dataset is quite big so we MUST restrict to the higher zoom level (NetCDF do not have overviews)
maxzoom = tilejson["maxzoom"]

m = Map(
    location=((bounds[1] + bounds[3]) / 2,(bounds[0] + bounds[2]) / 2),
    zoom_start=maxzoom
)

TileLayer(
    tiles=tilejson["tiles"][0],
    opacity=1,
    attr="EOCIS",
    min_native_zoom=maxzoom,
    max_native_zoom=maxzoom,
).add_to(m)

m

## (WIP) Protected STAC API Transaction Extension methods (POST to /collections etc.) require authorization

WIP, see https://github.com/EOEPCA/resource-discovery/issues/106

In [None]:
base_domain = "apx.develop.eoepca.org"

PROTECTED_STAC_API_URL = f"https://eoapi.{base_domain}/stac"

In [None]:
import requests

In [None]:
response = requests.get(f"{PROTECTED_STAC_API_URL}/collections")
response.raise_for_status()
assert "collections" in response.json()

In [None]:
response = requests.post(f"{PROTECTED_STAC_API_URL}/collections")
assert response.status_code == 401  # 401 Unauthorized

## Data Visualization with Maps (OGC API Maps)

In [None]:
collection = "sentinel-2-l2a-render"
response = requests.get(f"{PROTECTED_STAC_API_URL}/collections/{collection}")
response.json()["renders"]

In [None]:
MAPS_API_URL = f"https://maps.develop.eoepca.org"
from PIL import Image
from IPython.display import display
import io
params = {
    "bbox": "-22,64,-21.7,64.3",
    "datetime": "2023-10-31T00:00:00Z/2023-10-31T23:59:59Z",
    "width": 256,
    "height": 256,
}
response = requests.get(f"{MAPS_API_URL}/collections/{collection}/map", params=params)
response.raise_for_status()
img = Image.open(io.BytesIO(response.content))
display(img)

In [None]:
collection = "sentinel-2-l2a-render"
style = "ndvi"
params = {
    "bbox": "-22,64,-21.7,64.3",
    "datetime": "2023-10-31T00:00:00Z/2023-10-31T23:59:59Z",
    "width": 256,
    "height": 256,
}
response = requests.get(f"{MAPS_API_URL}/collections/{collection}/styles/{style}/map", params=params)
response.raise_for_status()
img = Image.open(io.BytesIO(response.content))
display(img)

In [None]:
MAPS_API_URL = f"https://maps.{base_domain}"
from PIL import Image
from IPython.display import display
params = {
    "bbox": "-24.36,63.49,-21.28,65.69",
    "datetime": "2023-10-31T00:00:00Z/2023-11-08T23:59:59Z",
    "width": 256,
    "height": 256,
}
response = requests.get(f"{MAPS_API_URL}/collections/{collection}/map", params=params)
response.raise_for_status()
img = Image.open(BytesIO(response.content))
display(img)

## Managing private and public STAC collections

In [1]:
import requests

realm = "eoepca"
base_domain = "apx.develop.eoepca.org"
keycloak_endpoint = f"https://iam-auth.{base_domain}"
stac_endpoint = f"https://eoapi.{base_domain}/stac"
token_endpoint = f"{keycloak_endpoint}/realms/{realm}/protocol/openid-connect/token"
print(token_endpoint)

https://iam-auth.apx.develop.eoepca.org/realms/eoepca/protocol/openid-connect/token


In [2]:
for collection in requests.get(f"{stac_endpoint}/collections").json()['collections']:
    print(collection['id'])

landsat-8-l1
noaa-emergency-response
s5p-bp-cloud-fraction-2023-aug-dec
s5p-cloud-fraction-2023-aug-dec
sentinel-2-iceland
sentinel-2-l2a-render
ws-eric.naip
ws-eric.test


### 1) Access

In [3]:
COLLECTIONS = [
    ("noaa-emergency-response", "public"),
    ("landsat-8-l1", "public"),
    ("ws-eric.naip", "private"),
    ("does-not-exist", "not_found"),
]

ENDPOINTS = [
    "/",
    "/api",
    "/collections",
]

for col_id, visibility in COLLECTIONS:
    ENDPOINTS += [
        f"/collections/{col_id}",
        f"/collections/{col_id}/items",
    ]

headers = {
    "Accept": "application/json"
}

def classify_expected(endpoint: str):
    if "does-not-exist" in endpoint:
        return "not_found"
    elif "/collections/" in endpoint:
        parts = endpoint.split("/collections/")
        if len(parts) > 1:
            name = parts[1].split("/")[0]
            return "private" if "." in name else "public"
    return "public"

print(f"\nTesting STAC GET endpoints on {stac_endpoint}\n")

for path in ENDPOINTS:
    url = stac_endpoint + path
    expected = classify_expected(path)

    try:
        response = requests.get(url, headers=headers, timeout=5, allow_redirects=False)
        status = response.status_code

        if expected == "not_found":
            expected_code = 404
        elif expected == "private":
            expected_code = 302 # redirect to login
        else:
            expected_code = 200

        if status == expected_code:
            verdict = "PASS"
        else:
            verdict = f"FAIL ({status})"

        print(f"{expected.upper():10} | GET {path:<40} -> {status} {verdict}")

    except Exception as e:
        print(f"{expected.upper():10} | GET {path:<40} -> ERROR: {e}")



Testing STAC GET endpoints on https://eoapi.apx.develop.eoepca.org/stac

PUBLIC     | GET /                                        -> 200 PASS
PUBLIC     | GET /api                                     -> 200 PASS
PUBLIC     | GET /collections                             -> 200 PASS
PUBLIC     | GET /collections/noaa-emergency-response     -> 200 PASS
PUBLIC     | GET /collections/noaa-emergency-response/items -> 200 PASS
PUBLIC     | GET /collections/landsat-8-l1                -> 200 PASS
PUBLIC     | GET /collections/landsat-8-l1/items          -> 200 PASS
PRIVATE    | GET /collections/ws-eric.naip                -> 302 PASS
PRIVATE    | GET /collections/ws-eric.naip/items          -> 302 PASS
NOT_FOUND  | GET /collections/does-not-exist              -> 404 PASS
NOT_FOUND  | GET /collections/does-not-exist/items        -> 404 PASS


In [4]:
def iam_token(username, password):
    headers = {
        "Cache-Control": "no-cache",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {
        "scope": "roles",
        "grant_type": "password",
        "username": username,
        "password": password,
        "client_id": "demo",
        "client_secret": "demo"
    }
    response = requests.post(token_endpoint, headers=headers, data=data)
    if response.ok:
        return response.json()["access_token"]
    else:
        print(response)
        return None

In [5]:
token_eric = iam_token("eric", "changeme")
response = requests.get(
    stac_endpoint + "/collections/ws-eric.naip",
    headers={"Authorization": f"Bearer {token_eric}"},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 200 else f"FAIL ({status})")

PASS


In [6]:
token_alice = iam_token("alice", "changeme")
response = requests.get(
    stac_endpoint + "/collections/ws-eric.naip",
    headers={"Authorization": f"Bearer {token_alice}"},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 403 else f"FAIL ({status})")

PASS


### 2) Management

In [7]:
test_collection = {
    "id": "test",
    "description": "Test",
    "title": "Test Collection",
    "links": [
        {
            "rel": "self",
            "href": f"{stac_endpoint}/collections/test",
            "type": "application/json"
        },
        {
            "rel": "parent",
            "href": f"{stac_endpoint}/collections",
            "type": "application/json"
        }
    ],
    "extent": {
        "spatial": {
            "bbox": [[-180.0, -90.0, 180.0, 90.0]]
        },
        "temporal": {
            "interval": [["2020-01-01T00:00:00Z", None]]
        }
    },
    "type": "Collection",
    "stac_version": "1.0.0",
    "license": "MIT"
}

In [8]:
token_admin = iam_token("example-admin", "changeme")

In [9]:
# eric can't create public collections
response = requests.post(f"{stac_endpoint}/collections", 
    json=test_collection, 
    headers={"Authorization": f"Bearer {token_eric}"})
status = response.status_code
print("PASS" if status == 403 else f"FAIL ({status})")

PASS


In [10]:
# admin create public collections (given his Keycloak stac_editor role)
response = requests.post(f"{stac_endpoint}/collections", 
    json=test_collection, 
    headers={"Authorization": f"Bearer {token_admin}"})
status = response.status_code
print("PASS" if status == 201 else f"FAIL ({status})")

PASS


In [11]:
# eric can access public collections
response = requests.get(
    stac_endpoint + "/collections/test",
    headers={"Authorization": f"Bearer {token_eric}"},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 200 else f"FAIL ({status})")

PASS


In [12]:
# anyone (no auth!) can access public collections
response = requests.get(
    stac_endpoint + "/collections/test",
    headers={},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 200 else f"FAIL ({status})")

PASS


In [13]:
# cleanup
collections_to_delete = ["test"]
for collection in collections_to_delete:
    response = requests.delete(f"{stac_endpoint}/collections/{collection}", headers={"Authorization": f"Bearer {token_admin}"})
    if response.status_code > 204:
        print(f"Error deleting collection {collection}: {response.status_code} - {response.text}")
        continue
    else:
        print(f"({response.status_code}) Collection {collection} deleted successfully.")

(200) Collection test deleted successfully.


In [14]:
# but only eric can access his private collection
response = requests.get(
    stac_endpoint + "/collections/ws-eric.naip",
    headers={"Authorization": f"Bearer {token_eric}"},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 200 else f"FAIL ({status})")

response = requests.get(
    stac_endpoint + "/collections/ws-eric.naip/items",
    headers={"Authorization": f"Bearer {token_eric}"},
    timeout=5,
    allow_redirects=False
)
print("first item:" + response.json()["features"][0]["id"])
status = response.status_code
print("PASS" if status == 200 else f"FAIL ({status})")

PASS
first item:ne_m_4110264_sw_13_060_20220827
PASS


In [15]:
# anyone (no auth!) is denied access to private collections and redirected to login
response = requests.get(
    stac_endpoint + "/collections/ws-eric.naip",
    headers={},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 302 else f"FAIL ({status})")

PASS


In [16]:
private_collection = {
    "id": "ws-eric.test",
    "description": "Private test collection for Eric's workspace",
    "title": "Eric's Test Collection",
    "links": [
        {
            "rel": "self",
            "href": f"{stac_endpoint}/collections/ws-eric.test",
            "type": "application/json"
        },
        {
            "rel": "parent",
            "href": f"{stac_endpoint}/collections",
            "type": "application/json"
        }
    ],
    "extent": {
        "spatial": {
            "bbox": [[-180.0, -90.0, 180.0, 90.0]]
        },
        "temporal": {
            "interval": [["2020-01-01T00:00:00Z", None]]
        }
    },
    "type": "Collection",
    "stac_version": "1.0.0",
    "license": "MIT"
}

In [17]:
# eric can't create private collections (even with ws_eric. prefix)
response = requests.post(f"{stac_endpoint}/collections", 
    json=private_collection, 
    headers={"Authorization": f"Bearer {token_eric}"})
status = response.status_code
print("PASS" if status == 403 else f"FAIL ({status})")

PASS


In [20]:
# only admin with role stac_editor can create private collections (i.e. with ws_eric. prefix)
response = requests.post(f"{stac_endpoint}/collections", 
    json=private_collection, 
    headers={"Authorization": f"Bearer {token_admin}"})
status = response.status_code
print("PASS" if status == 201 else f"FAIL ({status})")

PASS


In [21]:
# but only eric can access, others are prompted to login for erics collections
response = requests.get(
    stac_endpoint + "/collections/ws-eric.test",
    headers={"Authorization": f"Bearer {token_eric}"},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 200 else f"FAIL ({status})")

response = requests.get(
    stac_endpoint + "/collections/ws-eric.test",
    headers={},
    timeout=5,
    allow_redirects=False
)
status = response.status_code
print("PASS" if status == 302 else f"FAIL ({status})")

PASS
PASS


### 3) curate STAC items

In [22]:
# only eric (and others of ws-eric team) can curate private collections (i.e. with ws-eric. prefix)

In [23]:
from shapely import wkb
import numpy as np
import pandas as pd
import stac_geoparquet

In [24]:
df = pd.read_parquet('../data/catalog.v1.parquet')
df.head()

Unnamed: 0,stac_version,stac_extensions,id,providers,proj:epsg,links,assets,collection,datetime,proj:shape,naip:state,proj:transform,proj:centroid,gsd,naip:year,proj:bbox,bbox,geometry
0,1.1.0,[https://stac-extensions.github.io/eo/v1.0.0/s...,ne_m_4110264_sw_13_060_20220827,"[{'name': 'USDA Farm Service Agency', 'roles':...",26913,[{'href': 'https://planetarycomputer.microsoft...,"{'image': {'eo:bands': [{'common_name': 'red',...",naip,2022-08-27 16:00:00+00:00,"[12953, 10217]",ne,"[0.6, 0.0, 741276.0, 0.0, -0.6, 4550210.4, 0.0...","{'lat': 41.0315, 'lon': -102.09348}",0.6,2022,"[741276.0, 4542438.600000001, 747406.2, 455021...","{'xmax': -102.055505, 'xmin': -102.131414, 'ym...",b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...
1,1.1.0,[https://stac-extensions.github.io/eo/v1.0.0/s...,ne_m_4110263_sw_13_060_20220820,"[{'name': 'USDA Farm Service Agency', 'roles':...",26913,[{'href': 'https://planetarycomputer.microsoft...,"{'image': {'eo:bands': [{'common_name': 'red',...",naip,2022-08-20 16:00:00+00:00,"[12940, 10200]",ne,"[0.6, 0.0, 730771.2, 0.0, -0.6, 4549863.6, 0.0...","{'lat': 41.0315, 'lon': -102.21848}",0.6,2022,"[730771.2, 4542099.6, 736891.2, 4549863.6]","{'xmax': -102.180634, 'xmin': -102.256294, 'ym...",b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...
2,1.1.0,[https://stac-extensions.github.io/eo/v1.0.0/s...,ne_m_4110263_se_13_060_20220820,"[{'name': 'USDA Farm Service Agency', 'roles':...",26913,[{'href': 'https://planetarycomputer.microsoft...,"{'image': {'eo:bands': [{'common_name': 'red',...",naip,2022-08-20 16:00:00+00:00,"[12941, 10068]",ne,"[0.6, 0.0, 736065.6, 0.0, -0.6, 4550033.4, 0.0...","{'lat': 41.0315, 'lon': -102.15598}",0.6,2022,"[736065.6, 4542268.800000001, 742106.4, 455003...","{'xmax': -102.118573, 'xmin': -102.193354, 'ym...",b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...
3,1.1.0,[https://stac-extensions.github.io/eo/v1.0.0/s...,ne_m_4110262_se_13_060_20220820,"[{'name': 'USDA Farm Service Agency', 'roles':...",26913,[{'href': 'https://planetarycomputer.microsoft...,"{'image': {'eo:bands': [{'common_name': 'red',...",naip,2022-08-20 16:00:00+00:00,"[12928, 10051]",ne,"[0.6, 0.0, 725561.4, 0.0, -0.6, 4549694.4, 0.0...","{'lat': 41.0315, 'lon': -102.28098}",0.6,2022,"[725561.4, 4541937.600000001, 731592.0, 454969...","{'xmax': -102.243696, 'xmin': -102.318229, 'ym...",b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00...
4,1.1.0,[https://stac-extensions.github.io/eo/v1.0.0/s...,ne_m_4110361_sw_13_060_20220818,"[{'name': 'USDA Farm Service Agency', 'roles':...",26913,[{'href': 'https://planetarycomputer.microsoft...,"{'image': {'eo:bands': [{'common_name': 'red',...",naip,2022-08-18 16:00:00+00:00,"[12811, 10031]",ne,"[0.6, 0.0, 625734.6, 0.0, -0.6, 4547227.2, 0.0...","{'lat': 41.0315, 'lon': -103.46848}",0.6,2022,"[625734.6, 4539540.600000001, 631753.2, 454722...","{'xmax': -103.431874, 'xmin': -103.50505, 'yma...","b""\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00..."


In [27]:
def convert_ndarray(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, pd.Timestamp):
        return obj.isoformat()
    elif isinstance(obj, dict):
        return {k: convert_ndarray(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_ndarray(i) for i in obj]
    else:
        return obj

for row in df.to_dict(orient="records"):
    # Convert geometry from WKB bytes to shapely geometry
    if isinstance(row.get("geometry"), bytes):
        row["geometry"] = wkb.loads(row["geometry"])
    dict_item = stac_geoparquet.to_dict(row)
    dict_item['collection'] = 'ws-eric.test'
    # for link in dict_item["links"]:
    #     link["href"] = link["href"].replace("https://planetarycomputer.microsoft.com/api/stac/v1", f"{STAC_API_URL}")

    # Convert all numpy arrays to lists for JSON serialization
    dict_item = convert_ndarray(dict_item)

    dict_item['bbox'] = [
        dict_item['bbox']['xmin'],
        dict_item['bbox']['ymin'],
        dict_item['bbox']['xmax'],
        dict_item['bbox']['ymax'],
        ]

    dict_item['type'] = "Feature"
    dict_item['assets']['thumbnail']['href'] = dict_item['assets']['thumbnail']['href'].replace("./", "https://naipeuwest.blob.core.windows.net/naip/")

    print(dict_item)
    response = requests.post(
        f"{stac_endpoint}/collections/ws-eric.test/items",
        json=dict_item,
        headers={"Authorization": f"Bearer {token_admin}"}
    )
    print(f"({response.status_code}) {dict_item['id']} added to collection ws_eric.test")
    print(response.json())

{'stac_version': '1.1.0', 'stac_extensions': ['https://stac-extensions.github.io/eo/v1.0.0/schema.json', 'https://stac-extensions.github.io/projection/v1.0.0/schema.json'], 'id': 'ne_m_4110264_sw_13_060_20220827', 'links': [{'href': 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip', 'rel': 'collection', 'title': None, 'type': 'application/json'}, {'href': 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip', 'rel': 'parent', 'title': None, 'type': 'application/json'}, {'href': 'https://planetarycomputer.microsoft.com/api/stac/v1', 'rel': 'root', 'title': 'Microsoft Planetary Computer STAC API', 'type': 'application/json'}, {'href': 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/naip/items/ne_m_4110264_sw_13_060_20220827', 'rel': 'self', 'title': None, 'type': 'application/geo+json'}, {'href': 'https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection=naip&item=ne_m_4110264_sw_13_060_20220827', 'rel': 'preview', 'tit

In [28]:
# now check in https://eoapi.apx.develop.eoepca.org/manager/collections and see Eric's Test Collection

In [29]:
# cleanup
collections_to_delete = ["ws-eric.test"]
for collection in collections_to_delete:
    response = requests.delete(f"{stac_endpoint}/collections/{collection}", headers={"Authorization": f"Bearer {token_admin}"})
    if response.status_code > 204:
        print(f"Error deleting collection {collection}: {response.status_code} - {response.text}")
        continue
    else:
        print(f"({response.status_code}) Collection {collection} deleted successfully.")

(200) Collection ws-eric.test deleted successfully.
