In [None]:
# --- IMPORTS ---
import numpy as np
import stackstac
import pystac_client
import planetary_computer
import dask.array as da
from dask.distributed import Client, LocalCluster
from urllib3.util.retry import Retry
from dask.diagnostics import ProgressBar
import xarray as xr
import matplotlib.pyplot as plt
import ipyleaflet
import geopandas as gpd
import fiona
import pandas as pd


spatial_resolution = 10  # Spatial resolution in meters
bands_to_load = ['B02', 'B03', 'B04', 'SCL']  # Blue, Green, Red, Scene Classification
local_cluster = True  # Set to False if using Coiled

In [None]:

# --- DASK CLUSTER SETUP ---
if local_cluster:
    cluster = LocalCluster()
    client = Client(cluster)
else:
    import coiled
    cluster = coiled.Cluster(name="Timelapse", shutdown_on_close=True)
    cluster.adapt(n_workers=1, maximum=8)
    client = cluster.get_client()


In [None]:

# --- MAP FOR BOUNDING BOX SELECTION ---
m = ipyleaflet.Map(scroll_wheel_zoom=True)
m.zoom = 12
m.layout.height = "500px"
m.layout.width = "500px"

In [None]:
# Add points from a kml file to the map, useful if making timelapses of specific locations. Provided with this script are some points in Gaza.

fiona.drvsupport.supported_drivers["LIBKML"] = "rw"
gdf = gpd.read_file("points_gaza.kml")

# If the GeoDataFrame is not empty, center the map on the first point
if not gdf.empty:
    # Get the centroid of the first geometry (works for points and polygons)
    first_geom = gdf.geometry.iloc[0]
    centroid = first_geom.centroid
    m.center = (centroid.y, centroid.x)

points = ipyleaflet.GeoData(
    geo_dataframe = gdf
)
m.add(points)
display(m)

In [None]:

# Wait for user to select area, then get bounding box
bounding_box = (m.west, m.south, m.east, m.north)

In [None]:
# Initialize the planetary computer catalogue
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

search = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=bounding_box,
    datetime="2024-01-01/2024-12-31"
)
items = search.item_collection()
print(f"Found {len(items)} items in the selected area and time range.")

# Stack the data using stackstac
data = stackstac.stack(
    items,
    assets=bands_to_load,
    resolution=spatial_resolution,
    epsg=3857,
    bounds_latlon=bounding_box,
    chunksize = (-1, 1, 256, 256)
)

print("Array size information:")
print(f"Shape: {data.shape}")
print(f"Size in bytes: {data.data.nbytes}")
print(f"Size in GB: {data.data.nbytes / 1e9:.2f} GB")
print(f"Number of chunks: {data.data.npartitions}")
print(f"Chunksize: {(data.data.nbytes / data.data.npartitions) / 1e6:.2f} MB")


# Have a look at the dataset before using it, you might want to keep the size low (sub GB) if processing on a laptop or limited network connection.
data

In [None]:
scl = data.sel(band='SCL') # select the sentinel 2 scene classification band
cloudy_classes = [3, 8, 9, 10, 11] # establish these pixel values as cloudy (undesirable), see sentinel 2 SCL documentation for details.
cloud_mask = scl.isin(cloudy_classes).compute() # create a mask for all pixels that are cloudy
clear_mask = ~cloud_mask.drop_vars('band') # invert the mask to have the opposite, all pixels that are good.
filtered_data = data.where(clear_mask, drop = True) # filter the original dataset to remove all pixels (over all bands) where there were clouds. 

filled_data = filtered_data.sel(band = ['B04', 'B03', 'B02']).groupby('time.month').quantile(q = 0.5) # group the data by month, and make a composite over time being the mean of pixel values per month.

filled_data

# Create a new datetime index for the 15th of each month in the year of interest (placeholder, just needed for GeoGIF to display a date in the corner)
year = 2024
month_dates = [pd.Timestamp(year=year, month=int(m), day=15) for m in filled_data['month'].values]

# Assign this as a new coordinate and swap the 'month' dimension for 'time'
filled_data = filled_data.assign_coords(time=("month", month_dates)).swap_dims({"month": "time"}).drop_vars("month")


In [None]:

# Import geogif for creating animated GIFs from geospatial data
import geogif

geogif = geogif.dgif(filled_data, fps=4, date_format=None)
geogif.compute()

In [None]:
# Optionally - this cell can export each frame (timestamp) of the dataset as a .tif file to a new folder.

import calendar
import os
from datetime import datetime
import rioxarray

# Get the current date and time as a string, e.g., "2024-07-03_22-35-00"
current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_folder = f"monthly_composites_{current_datetime}"

# Create the output directory if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Loop through each month in the 'month' dimension
for month_num in filled_data.month.values:
    # Select the data for the current month
    monthly_slice = filled_data.sel(month=month_num)
    
    # Get the month name for a more descriptive filename
    month_name = calendar.month_name[month_num]
    
    # Define the output filename inside the new folder
    output_filename = os.path.join(output_folder, f"monthly_composite_{month_name}.tif")
    
    # Export the slice to a GeoTIFF file
    # The 'compress' argument is optional but recommended for smaller file sizes
    monthly_slice.rio.to_raster(output_filename, compress='LZW')
    
    print(f"Exported {output_filename}")