# Create json files for sentinel-2l2a data

In [1]:
import os
from pathlib import Path
import json
import rasterio
from datetime import datetime

## Extract metadata from a GeoTIFF file

In [2]:
def get_image_metadata(tiff_path):
    with rasterio.open(tiff_path) as src:
        bbox = list(src.bounds)
        transform = list(src.transform)
        epsg = src.crs.to_epsg()
        shape = list(src.shape)

        centroid = {
            "lat": (bbox[1] + bbox[3]) / 2,
            "lon": (bbox[0] + bbox[2]) / 2
        }

    return {
        "bbox": bbox,
        "proj:epsg": epsg,
        "proj:transform": transform,
        "proj:shape": shape,
        "proj:centroid": centroid
    }

## Create STAC item.json for sentinel-2 bands

In [3]:
def create_sentinel2_item_json(scene_id, scene_dir, base_url):
    index_files = list(scene_dir.glob("*.tif"))
    if not index_files:
        print(f"No TIFF files found for {scene_id}")
        return None

    metadata = get_image_metadata(index_files[0])

    try:
        datetime_str = scene_id.split('_')[2]
        scene_datetime = datetime.strptime(datetime_str, "%Y%m%dT%H%M%S")
    except (IndexError, ValueError):
        print(f"Error parsing datetime for {scene_id}")
        return None

    item = {
        "type": "Feature",
        "stac_version": "1.0.0",
        "stac_extensions": [
            "https://stac-extensions.github.io/raster/v1.1.0/schema.json",
            "https://stac-extensions.github.io/projection/v1.1.0/schema.json"
        ],
        "id": scene_id,
        "collection": "sentinel-2-l2a",
        "bbox": metadata["bbox"],
        "geometry": {
            "type": "Polygon",
            "coordinates": [[
                [metadata["bbox"][0], metadata["bbox"][1]],
                [metadata["bbox"][0], metadata["bbox"][3]],
                [metadata["bbox"][2], metadata["bbox"][3]],
                [metadata["bbox"][2], metadata["bbox"][1]],
                [metadata["bbox"][0], metadata["bbox"][1]]
            ]]
        },
        "properties": {
            "datetime": scene_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"),
            "platform": "sentinel-2",
            "instruments": ["msi"],
            "sentinel:product_type": "l2a",
            **metadata
        },
        "links": [
            {
                "rel": "root",
                "href": f"{base_url}/collection.json",
                "type": "application/json"
            },
            {
                "rel": "parent",
                "href": f"{base_url}/collection.json",
                "type": "application/json"
            },
            {
                "rel": "collection",
                "href": f"{base_url}/collection.json",
                "type": "application/json"
            }
        ],
        "assets": {
            index_file.stem: {
                "href": f"{base_url}/{scene_id}/{index_file.name}",
                "type": "image/tiff; application=geotiff; profile=cloud-optimized",
                "title": f"Sentinel-2 Band {index_file.stem}",
                "description": f"Sentinel-2 L2A band {index_file.stem}.",
                "roles": ["data"]
            } for index_file in index_files
        }
    }

    return item

## Create STAC collection.json for sentinel-2

In [4]:
def create_sentinel2_collection_json(output_dir, items, base_url):
    all_bboxes = [item["bbox"] for item in items if item]
    overall_bbox = [
        min(b[0] for b in all_bboxes),
        min(b[1] for b in all_bboxes),
        max(b[2] for b in all_bboxes),
        max(b[3] for b in all_bboxes)
    ]

    collection = {
        "type": "Collection",
        "id": "sentinel-2-l2a",
        "stac_version": "1.0.0",
        "description": "This Sentinel-2 L2A (maximum cloud coverage: 30) dataset is cropped to the extent of Virunga Volcanoes Massif. The [Sentinel-2](https://sentinel.esa.int/web/sentinel/missions/sentinel-2) program provides global imagery in thirteen spectral bands at 10m-60m resolution and a revisit time of approximately five days. This dataset represents the global Sentinel-2 archive, from 2016 to the present, processed to L2A (bottom-of-atmosphere) using [Sen2Cor](https://step.esa.int/main/snap-supported-plugins/sen2cor/) and converted to [cloud-optimized GeoTIFF](https://www.cogeo.org/) format.",
        "extent": {
            "spatial": {"bbox": [overall_bbox]},
            "temporal": {"interval": [["2016-01-01T00:00:00Z", "2025-12-31T23:59:59Z"]]}
        },
        "license": "proprietary",
        "keywords": ["sentinel-2", "l2a", "remote-sensing"],
        "providers": [{
            "name": "ESA",
            "roles": ["producer"],
            "url": "https://sentinel.esa.int"
        }],
        "summaries": {
            "gsd": [10, 20, 60],
            "platform": ["Sentinel-2A", "Sentinel-2B"],
            "instruments": ["msi"],
            "constellation": ["sentinel-2"],
            "view:off_nadir": [0]
        },
        "links": [{
            "rel": "root",
            "href": f"{base_url}/collection.json",
            "type": "application/json"
        }] + [{
            "rel": "item",
            "href": f"{base_url}/{item['id']}/{item['id']}.json",
            "type": "application/json"
        } for item in items if item]
    }

    with open(output_dir / "collection.json", 'w') as f:
        json.dump(collection, f, indent=2)

    print("Created collection.json")

In [7]:
def main():
    base_url = "G:/Semester4/Innolab/eoAPI/data/Sentinel-2-L2A"
    indices_dir = Path(base_url.replace("file:///", ""))

    scene_dirs = [d for d in indices_dir.iterdir() if d.is_dir()]
    print(f"Found {len(scene_dirs)} scenes")

    items = []
    for scene_dir in scene_dirs:
        scene_id = scene_dir.name
        item = create_sentinel2_item_json(scene_id, scene_dir, base_url)
        if item:
            items.append(item)
            item_file = scene_dir / f"{scene_id}.json"
            with open(item_file, 'w') as f:
                json.dump(item, f, indent=2)
            print(f"Created item for {scene_id}")

    create_sentinel2_collection_json(indices_dir, items, base_url)

In [8]:
if __name__ == "__main__":
    main()

Found 26 scenes
Created item for S2A_MSIL2A_20240531T080611_R078_T35MQU_20240531T150446
Created item for S2A_MSIL2A_20240610T080611_R078_T35MRU_20240610T142004
Created item for S2A_MSIL2A_20240620T080611_R078_T35MQU_20240620T135853
Created item for S2A_MSIL2A_20240620T080611_R078_T35MRU_20240620T135115
Created item for S2A_MSIL2A_20240630T080611_R078_T35MQU_20240701T185708
Created item for S2A_MSIL2A_20240630T080611_R078_T35MRU_20240701T190110
Created item for S2A_MSIL2A_20240630T080611_R078_T35MRU_20240701T190926
Created item for S2A_MSIL2A_20240819T080611_R078_T35MQU_20240819T130750
Created item for S2B_MSIL2A_20240127T081119_R078_T35MRU_20240127T120827
Created item for S2B_MSIL2A_20240307T080759_R078_T35MRU_20240307T123828
Created item for S2B_MSIL2A_20240317T080649_R078_T35MQU_20240317T121450
Created item for S2B_MSIL2A_20240317T080649_R078_T35MRU_20240317T114656
Created item for S2B_MSIL2A_20240605T080609_R078_T35MQU_20240605T114949
Created item for S2B_MSIL2A_20240615T080609_R078