In [None]:
# Install required libraries
!pip install pystac-client rio-tiler boto3 python-dateutil geopandas shapely rasterio

# Imports
import os
import json
import datetime
from dateutil.relativedelta import relativedelta
import numpy as np
import geopandas as gpd
import boto3
import rasterio
from shapely.geometry import shape
from pystac_client import Client
from rio_tiler.io import STACReader

# Colab‐only: mount your Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 1) Load your AOI from Drive
AOI_PATH = '/content/drive/Shareddrives/Sunbird AI/Projects/GIZ Mini-grid Identification/Phase II/Data/administrative areas/UGA Lamwo district.gpkg'   # ← update if needed
# load aoi with geopandas an assign variable to geometry
gdf = gpd.read_file(AOI_PATH)
geometry = gdf.iloc[0]['geometry']
print(geometry)

MULTIPOLYGON (((33.329295171589 3.75417023573826, 33.329662479234 3.72340178089165, 33.3125614766473 3.70575760684994, 33.3182324601876 3.68956169338361, 33.3028979425049 3.67030626941849, 33.2937432150377 3.64271632008894, 33.2939892914418 3.6240163074893, 33.3074077295729 3.57518576414918, 33.2896836734715 3.57141899509674, 33.2758682922742 3.56091074369521, 33.2678491853448 3.52985500325344, 33.2708353847752 3.51602389584785, 33.238030146681 3.51322844909065, 33.1840129781047 3.5219322148051, 33.1486630814069 3.50704718389953, 33.1332699702114 3.50987218416526, 33.1223056665544 3.49952389713805, 33.1121029599852 3.49715783998949, 33.0452854957029 3.52005567966441, 33.0296140729577 3.5101179074551, 33.0183591346619 3.48844149238095, 33.0038064234659 3.47949183570515, 32.9751583639278 3.48155286676052, 32.9650295869098 3.45665228200391, 32.9506351921046 3.44445222048598, 32.9505472874624 3.43600227574596, 32.9400765155276 3.43817296445668, 32.9168275482393 3.42707216019263, 32.9159851

In [None]:
"""
bands accepted by Galileo

blue: Blue (band 2) - 10m
green: Green (band 3) - 10m
red: Red (band 4) - 10m
rededge1: Red edge 1 (band 5) - 20m
rededge2: Red edge 2 (band 6) - 20m
rededge3: Red edge 3 (band 7) - 20m
nir: NIR 1 (band 8) - 10m
nir08: NIR 2 (band 8A) - 20m
swir16: SWIR 1 (band 11) - 20m
swir22: SWIR 2 (band 12) - 20m
"""

In [None]:
# 2) Open the AWS‐hosted STAC catalog for Sentinel-2 L2A COGs
catalog = Client.open("https://earth-search.aws.element84.com/v1")

# Bands to composite
BANDS = ["blue","green","red","rededge1","rededge2","rededge3","nir","nir08","swir16","swir22"]

# OUTPUT OPTION A: save to Drive folder
DRIVE_OUTDIR = '/content/drive/Shareddrives/Sunbird AI/Projects/suntrace/suntrace-multimodal/data/lamwo_sentinel_composites'
os.makedirs(DRIVE_OUTDIR, exist_ok=True)

"""# OUTPUT OPTION B: upload to S3
S3_BUCKET = 'your-s3-bucket-name'          # ← your bucket
S3_PREFIX = 'sentinel_composites/'         # ← optional key prefix
s3 = boto3.client('s3',
    aws_access_key_id='YOUR_ACCESS_KEY',    # ← fill in
    aws_secret_access_key='YOUR_SECRET_KEY',
    region_name='YOUR_REGION'               # e.g. 'us-west-2'
)"""

# 3) Loop over the last 12 calendar months
today = datetime.date.today()
for i in range(12):
    # compute start/end of month
    start = (today.replace(day=1) - relativedelta(months=i))
    end   = (start + relativedelta(months=1)) - datetime.timedelta(days=1)
    date_range = f"{start.isoformat()}/{end.isoformat()}"
    print(f"\n▶ Building composite for {date_range}")

    # search for low‐cloud scenes over your AOI
    search = catalog.search(
        collections=["sentinel-2-l2a"],
        intersects=geometry,
        datetime=date_range,
        query={"eo:cloud_cover": {"lt": 20}}
    )
    items = list(search.items())
    if not items:
        print("   → No scenes found; skipping.")
        continue

    # read each scene’s pixels over your AOI and stack
    arrays = []
    for item in items:
          # Use the item's self_href (URL) to initialize STACReader
        with STACReader(item.self_href) as reader:
            # get minx, miny, maxx, maxy from AOI
            minx, miny, maxx, maxy = shape(geometry).bounds
            data, mask = reader.part((minx, miny, maxx, maxy), assets=BANDS)
            arrays.append(data)

    # median‐composite (N_scenes × N_bands × H × W) → (N_bands × H × W)
    stack = np.stack(arrays, axis=0)
    composite = np.median(stack, axis=0).astype(np.uint16)

    # build output profile (use last reader’s profile as template)
    profile = reader.profile
    profile.update({
        "count": len(BANDS),
        "dtype": composite.dtype,
        "compress": "deflate"
    })

    out_fname = f"{start.year}_{start.month:02d}.tif"

    # A) write to Drive
    drive_path = os.path.join(DRIVE_OUTDIR, out_fname)
    with rasterio.open(drive_path, 'w', **profile) as dst:
        for b in range(len(BANDS)):
            dst.write(composite[b], b+1)
    print(f"   ✓ saved to Drive at {drive_path}")

"""    # B) upload to S3
    s3.upload_file(drive_path, S3_BUCKET, f"{S3_PREFIX}{out_fname}")
    print(f"   ✓ uploaded to s3://{S3_BUCKET}/{S3_PREFIX}{out_fname}")"""


▶ Building composite for 2025-05-01/2025-05-31




In [None]:
# -----------------------------------------------------------------------------
# 1) AUTH + DRIVE MOUNT
# -----------------------------------------------------------------------------
import ee, os, io, json

# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='ee-isekalala')

# -----------------------------------------------------------------------------
# 2) LOAD AOI AND MAKE 1 KM GRID
# -----------------------------------------------------------------------------
import geopandas as gpd
from shapely.geometry import box, mapping
import numpy as np

In [None]:
def ee_bytes_to_img(pixel_data):
    # no more num_channels check
    return np.stack([pixel_data[field] for field in pixel_data.dtype.names], axis=-1)

def geom_to_ee_fmt(geometry):
  # Convert the geometry to a GeoJSON format
  geometry_geojson = geometry.__geo_interface__

# Ensure it is in the format expected by ee.Geometry.MultiPolygon
  if geometry_geojson["type"] == "MultiPolygon":
    # Create an ee.Geometry.MultiPolygon
    ee_geometry = ee.Geometry.MultiPolygon(geometry_geojson["coordinates"])
  elif geometry_geojson["type"] == "Polygon":
    # If it's a single Polygon, convert it to MultiPolygon
    ee_geometry = ee.Geometry.MultiPolygon([geometry_geojson["coordinates"]])
  else:
    raise ValueError("Geometry type is not Polygon or MultiPolygon.")
  return ee_geometry

In [None]:
# 1) Load your AOI from Drive
AOI_PATH = '/content/drive/Shareddrives/Sunbird AI/Projects/GIZ Mini-grid Identification/Phase II/Data/administrative areas/UGA Lamwo district.gpkg'   # ← update if needed
# load aoi with geopandas an assign variable to geometry
gdf = gpd.read_file(AOI_PATH)
geometry = gdf.iloc[0]['geometry']

aoi = gpd.read_file(AOI_PATH).to_crs(epsg=4326)

# pick a temporary planar CRS (e.g. Web Mercator EPSG:3857)
aoi_merc = aoi.to_crs(epsg=3857)

# now no warning, because it's planar:
cent_pt = aoi_merc.geometry.centroid.iloc[0]

# reproject that point back to lat/lon so you know its lon/lat:
cent_wgs = gpd.GeoSeries([cent_pt], crs=3857).to_crs(epsg=4326).iloc[0]

cent_lon, cent_lat = cent_wgs.x, cent_wgs.y
zone  = int((cent_lon + 180) // 6) + 1
north = cent_lat >= 0
utm_crs = f"EPSG:{32600+zone if north else 32700+zone}"
aoi_utm = aoi.to_crs(utm_crs)

# build 1km×1km grid clipped to AOI
minx, miny, maxx, maxy = aoi_utm.total_bounds
cells = []
x = minx
while x < maxx:
    y = miny
    while y < maxy:
        cell = box(x, y, x+1000, y+1000)
        inter = cell.intersection(aoi_utm.union_all())
        if not inter.is_empty:
            cells.append(inter)
        y += 1000
    x += 1000

# back to lat/lon
grid = gpd.GeoDataFrame(geometry=cells, crs=utm_crs).to_crs(epsg=4326)

# -----------------------------------------------------------------------------
# 3) DEFINE MONTHLY COMPOSITE FUNCTION
# -----------------------------------------------------------------------------
from dateutil.relativedelta import relativedelta
import datetime

S2_BANDS = ["B2","B3","B4","B5","B6","B7","B8","B8A","B11","B12"]

def monthly_composites(region, year):
    def _make_month(m):
        start = ee.Date.fromYMD(year, m, 1)
        end = start.advance(1, "month")
        coll = (
            ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
              .filterBounds(region)
              .filterDate(start, end)
              .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 20))
              .select(S2_BANDS)
        )
        img = coll.median()
        mm = ee.Number(m).format("%02d")
        names = img.bandNames().map(lambda b: ee.String(b).cat("_m").cat(mm))
        return img.rename(names)
    return [ _make_month(m) for m in range(1,13) ]


  cent = aoi.geometry.centroid.iloc[0]


In [None]:
# prompt: return grid as a list of tuples, index and then geometry element

grid_list = [(index, row['geometry']) for index, row in grid.iterrows()]

In [None]:
# -----------------------------------------------------------------------------
# 4) TILE EXPORT + THREADPOOL
# -----------------------------------------------------------------------------
import requests
from concurrent.futures import ThreadPoolExecutor

OUT_DIR = '/content/drive/Shareddrives/Sunbird AI/Projects/suntrace/suntrace-multimodal/data/lamwo_sentinel_composites'   # ← update

YEAR = 2024
MAX_WORKERS = 10

def process_tile(idx, geom):

    ee_geom = geom_to_ee_fmt(geom)
    coords = mapping(geom)
    months = monthly_composites(ee_geom, YEAR)
    stacked = ee.Image.cat(months)
    bands = stacked.bandNames().getInfo()

    url = stacked.getDownloadUrl({
        "bands": bands,
        "region": ee_geom,
        "scale": 10,
        "format": "NPY"
    })
    resp = requests.get(url)
    arr = np.load(io.BytesIO(resp.content), allow_pickle=True)
    img_full = ee_bytes_to_img(data)
    # reshape to (12,100,100,10)
    H,W,Ctot = img_full.shape
    C = len(S2_BANDS)
    T = Ctot // C
    out = img_full.reshape(H, W, T, C).transpose(2,0,1,3)
    fn = f"tile_{idx:04d}_{YEAR}.npy"
    np.save(os.path.join(OUT_DIR, fn), out)
    print(f"[{idx}] saved {fn}")

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
  futures = [
        ex.submit(process_tile, i, geom)
        for i, geom in enumerate(grid.geometry)
    ]
    # wait for all to finish
  for f in futures:
      f.result()

print("All tiles done.")



KeyboardInterrupt: 



In [None]:
# prompt: save grid as a geojson using geopandas

# Define the output file path for the GeoJSON
geojson_path = os.path.join(DRIVE_OUTDIR, 'grid.geojson')

# Save the grid GeoDataFrame as a GeoJSON file
grid.to_file(geojson_path, driver='GeoJSON')

print(f"Grid saved as GeoJSON to {geojson_path}")