# 20250903 debug ndvi - Solved

Notebook used to debug NDVI, since after the fixes made to temperature (available_datasets()), running using sentinel satellite is not working.

__Identified issue:__ Was using sentinel with band 'nir08' (common name used in landsat) instead of 'nir' (common name used in sentinel).

## __Import libraries__

In [1]:
from pathlib import Path
current_path = Path().resolve()
for parent in current_path.parents:
    if parent.name == "accesibilidad-urbana":
        project_root = parent
        break
print(project_root)

/home/observatorio/Documents/repos/accesibilidad-urbana


In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## __From Script 19: Config notebook__

In [3]:
city = 'Culiacan'

In [4]:
band_name_dict = {'nir08':[False], #If GSD(resolution) of band is different, set True.
                  'red':[False], #If GSD(resolution) of band is different, set True.
                  'eq':['(nir08-red)/(nir08+red)']}
sat_query = {"eo:cloud_cover": {"lt": 10}}
index_analysis = 'ndvi'
tmp_dir = str(module_path)+f'/data/processed/tmp_{index_analysis}/'
res = [8,11]
freq = 'MS'
start_date = '2018-01-01'
end_date = '2023-12-31'
satellite = "sentinel-2-l2a"
save = True  # True
del_data = False # True

print(tmp_dir)

/home/observatorio/Documents/repos/accesibilidad-urbana/notebooks/data/processed/tmp_ndvi/


## __From Script 19: Main function__

### __Main function__ - Create hex_city

In [5]:
###############################
### Create city area of interest with biggest hexs
big_res = min(res)
schema_hex = 'hexgrid'
table_hex = f'hexgrid_{big_res}_city_2020'

# Download hexagons with type=urban
type = 'urban'
query = f"SELECT hex_id_{big_res},geometry FROM {schema_hex}.{table_hex} WHERE \"city\" = '{city}\' AND \"type\" = '{type}\'"
hex_urban = aup.gdf_from_query(query, geometry_col='geometry')

# Download hexagons with type=rural within 500m buffer
poly = hex_urban.to_crs("EPSG:6372").buffer(500).reset_index()
poly = poly.to_crs("EPSG:4326")
poly_wkt = poly.dissolve().geometry.to_wkt()[0]
type = 'rural'
query = f"SELECT hex_id_{big_res},geometry FROM {schema_hex}.{table_hex} WHERE \"city\" = '{city}\' AND \"type\" = '{type}\' AND (ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\'))"
hex_rural = aup.gdf_from_query(query, geometry_col='geometry')

# Concatenate urban and rural hex
hex_city = pd.concat([hex_urban, hex_rural])

# Show
print(f'Downloaded {len(hex_city)} hexagon features')
print(hex_city.shape)
print(hex_city.crs)
hex_city.head(2)

Downloaded 490 hexagon features
(490, 2)
EPSG:4326


Unnamed: 0,hex_id_8,geometry
0,88480285a1fffff,"POLYGON ((-107.35366 24.75914, -107.34904 24.7..."
1,884802b86dfffff,"POLYGON ((-107.36458 24.83744, -107.35997 24.8..."


### __a - Try whole download_raster_from_pc() function__

In [6]:
df_len = aup.download_raster_from_pc(hex_city, index_analysis, city, freq,
                                     start_date, end_date, tmp_dir, band_name_dict, 
                                     query=sat_query, satellite=satellite,
                                     compute_unavailable_dates=True)

KeyError: 'nir08 band not found'

### __b - download_raster_from_pc() Step by step debug__

In [7]:
# Rename variables for argument compatibility inside download_raster_from_pc function
gdf = hex_city.copy()
query = sat_query.copy()
projection_crs = "EPSG:6372"
compute_unavailable_dates = True

In [8]:
# Create area of interest coordinates from hexagons to download raster data
print('Extracting bounding coordinates from hexagons')
# Create buffer around hexagons
poly = gdf.to_crs(projection_crs).buffer(500)
poly = poly.to_crs("EPSG:4326")
poly = gpd.GeoDataFrame(geometry=poly).dissolve().geometry
# Extract coordinates from polygon as DataFrame
coord_val = poly.bounds
# Get coordinates for bounding box
n = coord_val.maxy.max()
s = coord_val.miny.min()
e = coord_val.maxx.max()
w = coord_val.minx.min()

# Set the coordinates for the area of interest
area_of_interest = {
    "type": "Polygon",
    "coordinates": [
        [
            [e, s],
            [w, s],
            [w, n],
            [e, n],
            [e, s],
        ]
    ],
}
area_of_interest

Extracting bounding coordinates from hexagons


{'type': 'Polygon',
 'coordinates': [[[np.float64(-107.1869514428522),
    np.float64(24.290122506007737)],
   [np.float64(-107.66177810594223), np.float64(24.290122506007737)],
   [np.float64(-107.66177810594223), np.float64(24.9447728471052)],
   [np.float64(-107.1869514428522), np.float64(24.9447728471052)],
   [np.float64(-107.1869514428522), np.float64(24.290122506007737)]]]}

In [9]:
# Create time of interest (Creates a list for all to-be-analysed-months with structure [start_day/end_day,(...)])
print('Defining time of interest')
time_of_interest = aup.create_time_of_interest(start_date, end_date, freq=freq)
time_of_interest

Defining time of interest


['2018-01-01/2018-01-31',
 '2018-02-01/2018-02-28',
 '2018-03-01/2018-03-31',
 '2018-04-01/2018-04-30',
 '2018-05-01/2018-05-31',
 '2018-06-01/2018-06-30',
 '2018-07-01/2018-07-31',
 '2018-08-01/2018-08-31',
 '2018-09-01/2018-09-30',
 '2018-10-01/2018-10-31',
 '2018-11-01/2018-11-30',
 '2018-12-01/2018-12-31',
 '2019-01-01/2019-01-31',
 '2019-02-01/2019-02-28',
 '2019-03-01/2019-03-31',
 '2019-04-01/2019-04-30',
 '2019-05-01/2019-05-31',
 '2019-06-01/2019-06-30',
 '2019-07-01/2019-07-31',
 '2019-08-01/2019-08-31',
 '2019-09-01/2019-09-30',
 '2019-10-01/2019-10-31',
 '2019-11-01/2019-11-30',
 '2019-12-01/2019-12-31',
 '2020-01-01/2020-01-31',
 '2020-02-01/2020-02-29',
 '2020-03-01/2020-03-31',
 '2020-04-01/2020-04-30',
 '2020-05-01/2020-05-31',
 '2020-06-01/2020-06-30',
 '2020-07-01/2020-07-31',
 '2020-08-01/2020-08-31',
 '2020-09-01/2020-09-30',
 '2020-10-01/2020-10-31',
 '2020-11-01/2020-11-30',
 '2020-12-01/2020-12-31',
 '2021-01-01/2021-01-31',
 '2021-02-01/2021-02-28',
 '2021-03-01

In [10]:
# Gather items for time and area of interest (Creates of list of available image items)
print('Gathering items for time and area of interest')
items = aup.gather_items(time_of_interest, area_of_interest, query=query, satellite=satellite)
print(f'Fetched {len(items)} items')
items

Gathering items for time and area of interest
Fetched 750 items


[<Item id=S2B_MSIL2A_20180129T174559_R098_T13RBH_20201014T095806>,
 <Item id=S2B_MSIL2A_20180129T174559_R098_T13QBG_20201014T095800>,
 <Item id=S2B_MSIL2A_20180129T174559_R098_T12RZN_20201025T221117>,
 <Item id=S2B_MSIL2A_20180129T174559_R098_T12QZM_20201014T095801>,
 <Item id=S2B_MSIL2A_20180119T174639_R098_T13RBH_20201014T070601>,
 <Item id=S2B_MSIL2A_20180119T174639_R098_T12RZN_20201025T212605>,
 <Item id=S2A_MSIL2A_20180114T174701_R098_T13RBH_20201014T054114>,
 <Item id=S2A_MSIL2A_20180114T174701_R098_T13QBG_20201014T054107>,
 <Item id=S2A_MSIL2A_20180114T174701_R098_T12RZN_20201014T054111>,
 <Item id=S2A_MSIL2A_20180114T174701_R098_T12QZM_20201014T054105>,
 <Item id=S2B_MSIL2A_20180330T173909_R098_T13RBH_20201013T165307>,
 <Item id=S2B_MSIL2A_20180330T173909_R098_T13QBG_20201013T165306>,
 <Item id=S2B_MSIL2A_20180330T173909_R098_T12RZN_20201013T165302>,
 <Item id=S2B_MSIL2A_20180330T173909_R098_T12QZM_20201013T165301>,
 <Item id=S2B_MSIL2A_20180320T174019_R098_T13RBH_20201013T1154

In [12]:
# Count available tiles for area of interest (Creates a list of available tiles, inside create_raster_by_month() logs available tiles per date vs total of area of interest)
aoi_tiles = []
for i in items:
    # Retrieve current tile
    if satellite == "sentinel-2-l2a":
        tile = i.properties['s2:mgrs_tile']
    elif satellite == "landsat-c2-l2":
        tile = i.properties['landsat:wrs_path']+i.properties['landsat:wrs_row']
    # Append if first find
    if tile not in aoi_tiles:
        aoi_tiles.append(tile)
print(f'Area of interest composed of {len(aoi_tiles)} tile: {aoi_tiles}.')

Area of interest composed of 4 tile: ['13RBH', '13QBG', '12RZN', '12QZM'].


In [13]:
print('Checking available tiles for area of interest')
# df_clouds, date_list = arrange_items(items, satellite=satellite)
date_list = aup.available_datasets(items, satellite, query)
# log(f"{len(date_list)} dates available with avg {round(df_clouds['avg_cloud'].mean(),2)}% clouds.")
date_list

Checking available tiles for area of interest


[datetime.date(2023, 1, 8),
 datetime.date(2023, 1, 13),
 datetime.date(2022, 3, 19),
 datetime.date(2022, 2, 7),
 datetime.date(2022, 2, 22),
 datetime.date(2022, 12, 24),
 datetime.date(2023, 12, 29),
 datetime.date(2023, 12, 9),
 datetime.date(2023, 12, 14),
 datetime.date(2022, 11, 29),
 datetime.date(2023, 12, 4),
 datetime.date(2022, 10, 20),
 datetime.date(2022, 10, 10),
 datetime.date(2023, 4, 28),
 datetime.date(2023, 1, 18),
 datetime.date(2022, 2, 27),
 datetime.date(2023, 2, 27),
 datetime.date(2022, 4, 28),
 datetime.date(2023, 3, 4),
 datetime.date(2022, 5, 28),
 datetime.date(2022, 4, 13),
 datetime.date(2023, 4, 8),
 datetime.date(2023, 4, 23),
 datetime.date(2022, 12, 14),
 datetime.date(2023, 3, 19),
 datetime.date(2023, 5, 13),
 datetime.date(2022, 5, 3),
 datetime.date(2021, 12, 4),
 datetime.date(2023, 6, 27),
 datetime.date(2018, 12, 10),
 datetime.date(2019, 3, 10),
 datetime.date(2022, 12, 19),
 datetime.date(2022, 3, 9),
 datetime.date(2023, 6, 12),
 datetime.d

In [14]:
# Create dictionary from links (assets_hrefs is a dict. of dates and links with structure {available_date:{band_n:[link]}})
band_name_list = list(band_name_dict.keys())[:-1]
assets_hrefs = aup.link_dict(band_name_list, items, date_list)
print('Created dictionary from items')
assets_hrefs

KeyError: 'nir08 band not found'

#### __b-01 - link_dict() Step by step debug__

In [16]:
import planetary_computer as pc

In [18]:
assets_hrefs = {}

for i in items:
    # only takes into account dates that are in the date list
    if i.datetime.date() not in date_list:
        continue
    # if date already in dictionary, append link to list
    if i.datetime.date() in list(assets_hrefs.keys()):
        for b in band_name_list:
            assets_hrefs[i.datetime.date()][b].append(pc.sign(aup.find_asset_by_band_common_name(i,b).href))
    # if date not in dictionary, create new dictionary entry
    else:
        assets_hrefs[i.datetime.date()] = {}
        for b in band_name_list:
            assets_hrefs[i.datetime.date()].update({b:[]})
            assets_hrefs[i.datetime.date()][b].append(pc.sign(aup.find_asset_by_band_common_name(i,b).href))

KeyError: 'nir08 band not found'

##### __b-02 - find_asset_by_band_common_name() Step by step debug__

In [26]:
from pystac.extensions.eo import EOExtension as eo

In [19]:
i

In [34]:
common_name = 'nir'
for asset in i.assets.values():
        asset_bands = eo.ext(asset).bands
        if asset_bands and asset_bands[0].common_name == common_name:
            print(asset)

<Asset href=https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/13/R/BH/2018/01/29/S2B_MSIL2A_20180129T174559_N0212_R098_T13RBH_20201014T095806.SAFE/GRANULE/L2A_T13RBH_A004700_20180129T174553/IMG_DATA/R10m/T13RBH_20180129T174559_B08_10m.tif>
