In [1]:
import geopandas as gpd

roi = gpd.read_file("roi.geojson")
roi_bbox = roi.total_bounds  # [minx, miny, maxx, maxy]
roi_bbox


array([75. , 28. , 77.8, 31.5])

In [2]:
pip install pystac_client

Note: you may need to restart the kernel to use updated packages.




In [3]:
from pystac_client import Client
import datetime

catalog = Client.open("https://earth-search.aws.element84.com/v1")

# Date range for Date 1 (Feb 2023)
date1_range = "2023-02-05/2023-02-25"

search_s2_d1 = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=roi_bbox.tolist(),
    datetime=date1_range,
    query={"eo:cloud_cover": {"lt": 30}}
)

items_s2_d1 = list(search_s2_d1.get_items())
len(items_s2_d1)



107

In [4]:
for item in items_s2_d1[:5]:
    print(item.id, item.properties["eo:cloud_cover"])


S2A_43RGM_20230225_0_L2A 0.001347
S2B_43REL_20230223_0_L2A 0.001108
S2B_43RFL_20230223_0_L2A 0.000226
S2B_43RGL_20230223_0_L2A 5e-05
S2B_43RFM_20230223_0_L2A 0.000571


In [5]:
date2_range = "2023-04-05/2023-04-25"

search_s2_d2 = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=roi_bbox.tolist(),
    datetime=date2_range,
    query={"eo:cloud_cover": {"lt": 30}}
)

items_s2_d2 = list(search_s2_d2.get_items())

for item in items_s2_d2[:5]:
    print(item.id, item.properties["eo:cloud_cover"])


S2B_43RFL_20230424_0_L2A 6.387484
S2B_43RFM_20230424_0_L2A 12.483489
S2B_43REN_20230424_0_L2A 3.825745
S2B_43REP_20230424_0_L2A 29.695684
S2A_43RDL_20230422_0_L2A 0.047943


Only required spectral bands (Red, NIR) and the Scene Classification Layer were downloaded to optimize storage and preprocessing efficiency.

In [6]:
import os
import requests
from tqdm import tqdm
from pystac_client import Client


In [7]:
def download_file(url, out_path):
    response = requests.get(url, stream=True)
    response.raise_for_status()

    total = int(response.headers.get("content-length", 0))
    with open(out_path, "wb") as f, tqdm(
        desc=os.path.basename(out_path),
        total=total,
        unit="B",
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
                bar.update(len(chunk))

In [8]:
catalog = Client.open("https://earth-search.aws.element84.com/v1")

In [9]:
scene_date1 = "S2B_43RGL_20230223_0_L2A"  # Feb (early season)
scene_date2 = "S2A_43RDL_20230422_0_L2A"  # Apr (peak season)

scenes = {
    "date1": scene_date1,
    "date2": scene_date2
}


In [10]:
bands = {
    "red": "B04.jp2",   # Red band (10m)
    "nir": "B08.jp2",   # NIR band (10m)
    "scl": "SCL.jp2"    # Scene Classification Layer (20m)
}


In [11]:
base_dir = "data/sentinal2"
os.makedirs(base_dir, exist_ok=True)


In [13]:
for label, scene_id in scenes.items():
    print(f"\nProcessing {scene_id}")

    search = catalog.search(
        collections=["sentinel-2-l2a"],
        ids=[scene_id]
    )

    item = list(search.get_items())[0]

    out_dir = os.path.join(base_dir, label)
    os.makedirs(out_dir, exist_ok=True)

    for asset_key, filename in bands.items():
        asset = item.assets.get(asset_key)

        if asset is None:
            print(f"❌ {asset_key} not found — skipping")
            continue

        out_path = os.path.join(out_dir, filename)

        if os.path.exists(out_path):
            print(f"✔ {filename} already exists — skipping")
            continue

        print(f"⬇ Downloading {asset_key} → {filename}")
        download_file(asset.href, out_path)



Processing S2B_43RGL_20230223_0_L2A
⬇ Downloading red → B04.jp2


B04.jp2: 100%|██████████████████████████████████████████████████████████████████████| 215M/215M [01:43<00:00, 2.17MB/s]


⬇ Downloading nir → B08.jp2


B08.jp2: 100%|██████████████████████████████████████████████████████████████████████| 228M/228M [02:03<00:00, 1.93MB/s]


⬇ Downloading scl → SCL.jp2


SCL.jp2: 100%|████████████████████████████████████████████████████████████████████| 3.20M/3.20M [00:02<00:00, 1.25MB/s]



Processing S2A_43RDL_20230422_0_L2A
⬇ Downloading red → B04.jp2


B04.jp2: 100%|██████████████████████████████████████████████████████████████████████| 228M/228M [01:20<00:00, 2.98MB/s]


⬇ Downloading nir → B08.jp2


B08.jp2: 100%|██████████████████████████████████████████████████████████████████████| 199M/199M [01:10<00:00, 2.95MB/s]


⬇ Downloading scl → SCL.jp2


SCL.jp2: 100%|███████████████████████████████████████████████████████████████████████| 349k/349k [00:01<00:00, 241kB/s]
