# Import Packages

In [17]:
import pystac_client
import stackstac
import matplotlib.pyplot as plt
import geopandas as gpd
import requests
import numpy as np
import json
import os
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import sys
from tqdm import tqdm
import time
import rioxarray as rio
import leafmap


# Install pystac module using pip

In [None]:
!{sys.executable} -m pip install pystac

In [14]:
hdd_path = r"D:\Geospatial_Pessoal"
aoi_path = fr"{hdd_path}\STAC_py\pedrogao_grande_area_ardida_2017.geojson"

# Load the GeoJSON file
with open(aoi_path) as f:
    geojson_aoi = json.load(f)

# Extract all coordinates from the polygon

coordinates = geojson_aoi['features'][0]['geometry']['coordinates'][0]  # Assuming the first feature is the AOI

# Define the bounds from the GeoJSON for stacking

lon_min = min(coord[0] for coord in coordinates)
lat_min = min(coord[1] for coord in coordinates)
lon_max = max(coord[0] for coord in coordinates)
lat_max = max(coord[1] for coord in coordinates)

# Flatten the list of coordinates if you have a polygon (list of lists)
points = [(coord[1], coord[0]) for coord in coordinates]  # (lat, lon) tuples


In [15]:
start_date = "2017-01-01" 
end_date = "2017-07-31"
time_range = f"{start_date}/{end_date}"

# Search Sentinel 2 level 2 Images with cloud filtering 

In [18]:
# Set up the Sentinel STAC API URL
sentinel_search_url = "https://earth-search.aws.element84.com/v1"
sentinel_stac_client = pystac_client.Client.open(sentinel_search_url)

items = sentinel_stac_client.search(
    intersects=geojson_aoi['features'][0]['geometry'],  # Use the geometry directly
    collections=["sentinel-2-l2a"],
    datetime=time_range,
    query={"eo:cloud_cover": {"lte": 10}}  # Filter by cloud coverage. lte = less than or equal to
).item_collection()

print("Number of items found:", len(items))

Number of items found: 19


In [19]:
sentinel_stack = stackstac.stack(items, assets=["red", "nir", "scl"],
                          gdal_env=stackstac.DEFAULT_GDAL_ENV.updated(
                               {'GDAL_HTTP_MAX_RETRY': 3,
                                'GDAL_HTTP_RETRY_DELAY': 5,
                               }),
                          epsg=4326, chunksize=(1, 1, 50, 50)).to_dataset(
       dim='band')

# This length number represents the number  of assets (bands) that are to be extracted
# len(sentinel_stack)

sentinel_stack


Unnamed: 0,Array,Chunk
Bytes,32.85 GiB,19.53 kiB
Shape,"(19, 11060, 20981)","(1, 50, 50)"
Dask graph,1771560 chunks in 4 graph layers,1771560 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 32.85 GiB 19.53 kiB Shape (19, 11060, 20981) (1, 50, 50) Dask graph 1771560 chunks in 4 graph layers Data type float64 numpy.ndarray",20981  11060  19,

Unnamed: 0,Array,Chunk
Bytes,32.85 GiB,19.53 kiB
Shape,"(19, 11060, 20981)","(1, 50, 50)"
Dask graph,1771560 chunks in 4 graph layers,1771560 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,32.85 GiB,19.53 kiB
Shape,"(19, 11060, 20981)","(1, 50, 50)"
Dask graph,1771560 chunks in 4 graph layers,1771560 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 32.85 GiB 19.53 kiB Shape (19, 11060, 20981) (1, 50, 50) Dask graph 1771560 chunks in 4 graph layers Data type float64 numpy.ndarray",20981  11060  19,

Unnamed: 0,Array,Chunk
Bytes,32.85 GiB,19.53 kiB
Shape,"(19, 11060, 20981)","(1, 50, 50)"
Dask graph,1771560 chunks in 4 graph layers,1771560 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,32.85 GiB,19.53 kiB
Shape,"(19, 11060, 20981)","(1, 50, 50)"
Dask graph,1771560 chunks in 4 graph layers,1771560 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 32.85 GiB 19.53 kiB Shape (19, 11060, 20981) (1, 50, 50) Dask graph 1771560 chunks in 4 graph layers Data type float64 numpy.ndarray",20981  11060  19,

Unnamed: 0,Array,Chunk
Bytes,32.85 GiB,19.53 kiB
Shape,"(19, 11060, 20981)","(1, 50, 50)"
Dask graph,1771560 chunks in 4 graph layers,1771560 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


# Download dataset if required
* Sentinel 2 imagery is downloaded by asset, meaning as separate tiffs for red and nir bands

In [57]:
def download_asset(asset_url, local_path):
    try:
        response = requests.get(asset_url, stream=True)
        response.raise_for_status()  # Raises an error for bad responses
        
        # Get total file size for progress tracking
        total_size = int(response.headers.get('content-length', 0))
        
        # Create a tqdm progress bar
        with open(local_path, 'wb') as f:
            with tqdm(total=total_size, unit='B', unit_scale=True, desc=local_path, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} | {rate_fmt}]') as bar:
                downloaded_size = 0
                start_time = time.time()  # Start time for speed calculation
                
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
                    downloaded_size += len(chunk)
                    bar.update(len(chunk))
                    
                    # Calculate elapsed time and download speed
                    elapsed_time = time.time() - start_time
                    if elapsed_time > 0:
                        speed = downloaded_size / elapsed_time  # bytes per second
                        bar.set_postfix(speed=f"{speed / 1024:.2f} kB/s")  # Display speed in kB/s
        
        print(f"\nDownloaded: {local_path}")
    except requests.RequestException as e:
        print(f"Failed to download: {asset_url}, error: {str(e)}")
    except Exception as e:
        print(f"An error occurred while downloading {asset_url}: {str(e)}")

In [None]:
# Define the local directory for saving assets

output_directory = fr"{hdd_path}\STAC_py\output" 

assets_to_download = ["red", "nir", "scl"]  # Changed from B04 to red, B08 to nir

# Iterate over filtered_stack items and download assets

for item in items: 
    # Print the item ID and available assets for debugging
    print(f"Processing item: {item.id}")
    print("Available assets:", item.assets.keys())  # Print available asset names

    for asset_name in assets_to_download:
        if asset_name in item.assets:
            asset_url = item.assets[asset_name].href
            print(f"Downloading {asset_name} from {asset_url}")  # Print the URL being downloaded
            local_filename = os.path.join(output_directory, f"{item.id}_{asset_name}.tif")
            download_asset(asset_url, local_filename)
        else:
            print(f"Asset {asset_name} not found in item {item.id}")

print("All downloads completed.")

# Calculate NDVI

In [20]:
sentinel_stack['ndvi'] = xr.where(
    (sentinel_stack['nir'] + sentinel_stack['red']) != 0,
    (sentinel_stack['nir'] - sentinel_stack['red']) / (sentinel_stack['nir'] + sentinel_stack['red']),
    np.nan
)

# Remove attributes that are not time,y or x

sentinel_stack = sentinel_stack.drop_vars([c for c in sentinel_stack.coords if not (c in ['time', 'y', 'x'])])

In [21]:
aoi = gpd.read_file(fr"{hdd_path}/STAC_py/pedrogao_grande_area_ardida_2017.geojson")

# Set the CRS for the dataset

crs = "EPSG:4326"
crs_number = crs[5:]
sentinel_stack = sentinel_stack.rio.write_crs(fr"{crs}", inplace=True)

In [None]:
# Iterate over the time dimension and export each NDVI layer

for i in range(sentinel_stack.sizes['time']):
    # Extract the timestamp and format it as YYYYMMDD
    time_str = str(sentinel_stack['time'].isel(time=i).dt.strftime('%Y%m%d').values)
    ndvi_slice = sentinel_stack['ndvi'].isel(time=i)
    ndvi_slice = ndvi_slice.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
    ndvi_clipped = ndvi_slice.rio.clip(aoi.geometry, aoi.crs, drop=True)
    
    output_path = f"{hdd_path}/STAC_py/ndvi_output/teste/NDVI_{time_str}_{crs_number}.tiff"
    ndvi_clipped.rio.to_raster(output_path)

    print(f"Exported clipped NDVI time slice for {time_str} to {output_path}")

In [8]:
# Load NDVI data for the first point (as an example)

first_point = points[0]  # Change to the desired point if needed
y, x = first_point
sentinel_point = sentinel_stack.interp(x=x, y=y, method="nearest")


In [37]:
# Create a list to hold the interpolated NDVI values and their corresponding coordinates
interpolated_ndvi_data = []

# Loop through each point and interpolate the NDVI values
for point in points:
    y, x = point
    sentinel_point = sentinel_stack.interp(x=x, y=y, method="nearest")
    
    # Load the selected data subset for the current sentinel_point
    sentinel_point.load()  # Load the data for the current point if necessary
    
    # Assuming sentinel_point is a DataArray or similar structure that can be converted to a DataFrame
    sentinel_df = sentinel_point.to_dataframe()
    
    # Append the NDVI values along with their coordinates
    for index, row in sentinel_df.iterrows():
        interpolated_ndvi_data.append({
            'latitude': y,
            'longitude': x,
            'ndvi': row['ndvi']  # Assuming the NDVI value is in a column named 'ndvi'
        })


In [39]:
# Step 1: Transform the collected data into a DataFrame
sentinel_table = pd.DataFrame(interpolated_ndvi_data)
sentinel_table

Unnamed: 0,latitude,longitude,ndvi
0,39.856864,-8.925612,0.721045
1,39.856864,-8.925612,0.761252
2,39.856864,-8.925612,0.772682
3,39.856864,-8.925612,0.768158
4,39.856864,-8.925612,0.760684
...,...,...,...
90,39.856864,-8.925612,0.651313
91,39.856864,-8.925612,0.626194
92,39.856864,-8.925612,0.676083
93,39.856864,-8.925612,0.600000


# Check for x and y bounds for the first point

In [None]:
print(sentinel_stack.rio.crs)


# Check the bounds of the sentinel_stack
x_min, x_max = sentinel_stack.x.min().values, sentinel_stack.x.max().values
y_min, y_max = sentinel_stack.y.min().values, sentinel_stack.y.max().values

print(f"x bounds: {x_min} to {x_max}")
print(f"y bounds: {y_min} to {y_max}")

# Check if the point is within the bounds
if x_min <= x <= x_max and y_min <= y <= y_max:
    print("Point is within the bounds of the dataset.")
else:
    print("Point is outside the dataset bounds.")

In [None]:
# Load the selected data subset

sentinel_point.load()

In [28]:
# Transform the Dataset to a DataFrame
sentinel_table = sentinel_point.to_dataframe()
sentinel_table

# Step 2: Filter NDVI values that are out of the valid range (-1 to 1)
sentinel_table = sentinel_table[(sentinel_table['ndvi'] >= -1) & (sentinel_table['ndvi'] <= 1)]

# Step 3: Reset index if needed (optional, depending on how you want to handle the index)
sentinel_table.reset_index(inplace=True)

sentinel_table

Unnamed: 0,time,red,nir,scl,ndvi,spatial_ref,x,y
0,2017-01-05 11:27:37.242,0.0283,0.1746,4.0,0.721045,0,-8.925612,39.856864
1,2017-01-05 11:27:37.242,0.0244,0.18,4.0,0.761252,0,-8.925612,39.856864
2,2017-01-08 11:37:54.889,0.0228,0.1778,4.0,0.772682,0,-8.925612,39.856864
3,2017-01-08 11:37:54.889,0.0241,0.1838,4.0,0.768158,0,-8.925612,39.856864
4,2017-01-15 11:24:12.457,0.0252,0.1854,4.0,0.760684,0,-8.925612,39.856864
5,2017-01-15 11:24:12.457,0.025,0.189,4.0,0.766355,0,-8.925612,39.856864
6,2017-01-18 11:36:59.635,0.0224,0.181,4.0,0.779744,0,-8.925612,39.856864
7,2017-01-18 11:36:59.635,0.0223,0.1866,4.0,0.786501,0,-8.925612,39.856864
8,2017-01-25 11:23:33.458,0.0458,0.1916,4.0,0.614153,0,-8.925612,39.856864
9,2017-03-09 11:33:11.457,0.0285,0.1676,4.0,0.709332,0,-8.925612,39.856864


In [None]:
print(sentinel_table.head())


In [None]:
# Plot NDVI through time

plt.figure(figsize=(13, 6))
sentinel_table['ndvi'].plot(label='Unfiltered', marker='o', linestyle='-', markersize=2)

# Set title with coordinates

plt.title(f"NDVI Time Series for ({y:.6f}, {x:.6f})")  # Display coordinates with 6 decimal places
plt.xlabel('Date')
plt.ylabel('NDVI')
plt.legend()
plt.grid()

# Format x-axis date labels using the 'time' column for the ticks

plt.xticks(ticks=range(len(sentinel_table)), labels=sentinel_table['time'].dt.strftime('%Y-%m-%d'), rotation=45)

plt.show()

# Webmap integration with leafmap
* Dinamically see the NDVI timeseries in the area of interest

In [9]:
from ipyleaflet import Map, GeoJSON
from ipywidgets import Output, FloatText
import matplotlib.pyplot as plt
import json
import ipywidgets as widgets
import leafmap


In [10]:
# Load the GeoJSON file (if it's a file on disk)
with open(aoi_path) as f:
    geojson_aoi = json.load(f)

# Extract coordinates for centering the map
coordinates = geojson_aoi['features'][0]['geometry']['coordinates'][0]  # Assuming it's a polygon
lon_min = min(coord[0] for coord in coordinates)
lat_min = min(coord[1] for coord in coordinates)
lon_max = max(coord[0] for coord in coordinates)
lat_max = max(coord[1] for coord in coordinates)

# Calculate the center of the AOI
center_lat = (lat_min + lat_max) / 2
center_lon = (lon_min + lon_max) / 2

# Initialize the map centered on the AOI
m = leafmap.Map(center=[center_lat, center_lon], zoom=12)

# Add GeoJSON layer using the correct method
m.add_geojson(geojson_aoi)

# Create Float widgets to hold the clicked coordinates
lat_widget = FloatText(description='Latitude:')
lon_widget = FloatText(description='Longitude:')
output = Output()

# Display the widgets
display(lat_widget, lon_widget, output)


FloatText(value=0.0, description='Latitude:')

FloatText(value=0.0, description='Longitude:')

Output()

Event data received: {'event': 'interaction', 'type': 'mouseover', 'coordinates': [39.79455658747956, -8.85326385498047]}
Map clicked at latitude: -8.85326385498047, longitude: 39.79455658747956
Event data received: {'event': 'interaction', 'type': 'mousemove', 'coordinates': [39.79455658747956, -8.85326385498047]}
Map clicked at latitude: -8.85326385498047, longitude: 39.79455658747956
Event data received: {'event': 'interaction', 'type': 'mousemove', 'coordinates': [39.79482037706643, -8.85326385498047]}
Map clicked at latitude: -8.85326385498047, longitude: 39.79482037706643
Event data received: {'event': 'interaction', 'type': 'mousemove', 'coordinates': [39.795611739756964, -8.853607177734377]}
Map clicked at latitude: -8.853607177734377, longitude: 39.795611739756964
Event data received: {'event': 'interaction', 'type': 'mousemove', 'coordinates': [39.796139309825534, -8.85429382324219]}
Map clicked at latitude: -8.85429382324219, longitude: 39.796139309825534
Event data received

In [11]:
# Function to plot the NDVI time series for clicked location
def plot_ndvi_timeseries(lat, lon):
    with output:
        output.clear_output()  # Clear previous output
        try:
            # Define a tolerance level for coordinate matching
            tolerance = 0.0001  # Adjust this value based on your data's precision

            # Filter sentinel_table based on the clicked coordinates
            filtered_data = sentinel_table[
                (sentinel_table['y'].between(lat - tolerance, lat + tolerance)) & 
                (sentinel_table['x'].between(lon - tolerance, lon + tolerance))
            ]

            if not filtered_data.empty:
                # Ensure 'time' and 'ndvi' columns exist in filtered_data
                if 'time' in filtered_data.columns and 'ndvi' in filtered_data.columns:
                    # Plot NDVI through time
                    plt.figure(figsize=(13, 6))
                    plt.plot(filtered_data['time'], filtered_data['ndvi'], label='NDVI', marker='o', linestyle='-', markersize=2)

                    # Set title with coordinates
                    plt.title(f"NDVI Time Series for ({lat:.6f}, {lon:.6f})")  # Display coordinates with 6 decimal places
                    plt.xlabel('Date')
                    plt.ylabel('NDVI')
                    plt.legend()
                    plt.grid()

                    # Format x-axis date labels
                    plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
                    plt.tight_layout()  # Adjust layout for better fit
                    plt.show()
                else:
                    print("Filtered data does not contain required 'time' and 'ndvi' columns.")
            else:
                print(f"No NDVI data found for the clicked location: ({lat:.6f}, {lon:.6f})")

        except Exception as e:
            print(f"Error extracting NDVI time series: {e}")


In [12]:
# Function to handle clicks on the map and update the NDVI plot
def on_map_click(**kwargs):  # Accept the event data
    print("Event data received:", kwargs)  # Debugging output to see the entire kwargs

    # Extract latitude and longitude from the coordinates list
    coordinates = kwargs.get('coordinates', [None, None])
    lon = coordinates[0]  # Longitude from coordinates
    lat = coordinates[1]  # Latitude from coordinates

    print(f"Map clicked at latitude: {lat}, longitude: {lon}")  # Print for debugging

    if lat is not None and lon is not None:  # Ensure lat/lon are present
        # Update the validation logic to allow Southern Hemisphere and Eastern Hemisphere
        if lat < -90 or lat > 90 or lon < -180 or lon > 180:  # General validation for lat/lon ranges
            print("Invalid coordinates received. Latitude must be in range [-90, 90] and Longitude in range [-180, 180].")
            return
        
        # Update the widgets with clicked coordinates
        lat_widget.value = lat
        lon_widget.value = lon
        # Plot NDVI time series
        plot_ndvi_timeseries(lat, lon)
    else:
        print("Clicked location data is missing.")

In [13]:
# Attach the click event handler to the map
m.on_interaction(on_map_click)

# Display the map
m

Map(center=[39.840576545010705, -8.935851806631181], controls=(ZoomControl(options=['position', 'zoom_in_text'…