In [None]:
##Import libraries
import earthaccess 
import geopandas as gdp
import os
from pathlib import Path
from time import sleep

In [None]:
## Provide login credentials, cant be set up on urs.earthdata.nasa.gov
earthaccess.login()

In [None]:
## This notebook is set up to download data per tile / shapefile. 
# Provide a shapefile here
shapefile = "path/file.shp"

# Important! The shapefile example here assumes there is an ID for each polygon/tile called "Name"
# This ID is used in the #query and download section below. Swap ID as needed. 

gdf = gdp.read_file(shapefile)
gdf = gdf.to_crs(epsg=4326)

print(gdf.crs)

unique_shapes = gdf.geometry.nunique()
print(f"Number of unique shapes: {unique_shapes}")

In [None]:
#How many tiles/shapefiles are we dealing with?
gdf_unique = (
    gdf.drop_duplicates(subset="geometry")
       .copy()
       .reset_index(drop=True)
)
# quick fix for minor geometry issues (optional)
gdf_unique["geometry"] = gdf_unique.geometry.buffer(0)

In [None]:
#Output folder, data collection name, etc. 
BASE_OUT   = Path("path/folder")

#collection short name
SHORT_NAME = "HLSS30"

#set time range
TEMPORAL   = ("2017-01-01T00:00:00", "2020-12-30T23:59:00")

#tolerable cloud cover i.e. <50%
CLOUD_COV  = (0, 50)

# Required band keywords to keep (removes all others)
KEEP_KEYWORDS = ("B02", "B04", "B8A", "Fmask")

In [None]:
#query and download
for i, row in gdf_unique.iterrows():
    name = str(row["Name"]).replace(" ", "_").replace("/", "-")  # folder name
    outdir = BASE_OUT / name

    # --- Skip if folder already exists ---
    if outdir.exists() and any(outdir.iterdir()):
        print(f"[{i+1}/{len(gdf_unique)}] {name}: already exists, skipping.")
        continue
   
    minx, miny, maxx, maxy = row.geometry.bounds
    bbox = (minx, miny, maxx, maxy)

    try:
        results = earthaccess.search_data(
            short_name=SHORT_NAME,
            bounding_box=bbox,
            temporal=TEMPORAL,
            cloud_cover=CLOUD_COV
        )

        os.makedirs(outdir, exist_ok=True)

        print(f"[{i+1}/{len(gdf_unique)}] {name}: bbox={bbox}, hits={len(results)}")
        if results:
            earthaccess.download(results, str(outdir))

        # --- Clean downloaded files ---
        #If you want to keep all bands, remove code below - keep the sleep (0.5) piece onwards.
        for f in Path(outdir).iterdir():
            # Delete if none of the keywords appear in filename
            if not any(k in f.name for k in KEEP_KEYWORDS):
                f.unlink()
        print(f"  â†’ Cleaned: kept only {KEEP_KEYWORDS}")
        sleep(0.5)

    except Exception as e:
        print(f"Error on {name}: {e}")