This notebook can be used to add average and entropy of spectral bands over a window.

It **does not** require points to have an associated polygon. 

it **does** requiere all opints in the csv to have the same crs.

It creates a small window around each pont in the NAIP scene and calculates the "window" features only in that small region.

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

#itemids = pd.read_csv(sr.path_to_aoi_itemids_csv())

# csv with the points for which to add spectral window features
csv_name = 'one_vegetation_point.csv'
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/model_2k/twok_dataset/'

fp = root + csv_name
#fp = os.path.join(os.getcwd(),'temp',csv_name)
all_pts = pd.read_csv(fp)

# radius of the disk(s) (in pixels) over which entropy is calculated
entropy_r = [1,2,3,4,5,6,7,8]

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point
itemid_col = 'pl_scene'
# name ofcolumns with the crs of all points
crs_col = 'pts_crs'

# column names for longitude and latitude
lon_label = 'lon' # lon = x
lat_label = 'lat' # lat = y


save = True

# ***************************************************
# ***************************************************

In [3]:
all_pts

Unnamed: 0,lon,lat,pts_crs,pl_scene,iceplant
0,-119.863543,34.413876,EPSG:4326,ca_m_3411934_sw_11_060_20200521,0


In [4]:
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)
    
# ===================================================
itemids = list(all_pts[itemid_col].unique()) # itemids with points
N = len(itemids)  # counter to finish
crs = CRS.from_string(all_pts[crs_col][0]) # crs of dataframe

# ===================================================
# length of side of the square window over which average/max/min are calculated.
box_sides = [r*2 +1 for r in entropy_r]

# ===================================================
sampled_pts = [] # sampled pts from each scene are collected here
t0 = time.time() # initial time tracker
print('REMAINING: ', N, 'scenes', end="\r")

# ===================================================
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    pts_scene = all_pts.loc[all_pts[itemid_col] == itemid]

    # double check there are points in that scene
    if len(pts_scene) !=0:
        # create geodataframe with pts in scene
        pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label=lon_label, lat_label=lat_label, crs=crs)
        # convert pts to crs of NAIP scene
        pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

        samples = []
        for pt in pts_col:
            pt_samples = []
            for ent_r, box_s in zip(entropy_r,box_sides):
                # this creates a box centered at point with side length=entropy_r*2 meters
                #     current pts coordinates are in the NAIP scene's crs, which is in meters
                #     entropy_r is in pixels and each pixel has a side of ~0.5m in the NAIP scene
                #     so pt.buffer(entropy_r) is a disk with radius entropy_r meters,
                #     this disk is inscribed in a square with side length entropy_r*2 meters,
                #     which translates into a square of side length entropy_r*4 pixels 
                #     this square is big enough to have a window of side length entropy_r*2 + 1 pixels 
                #      around the central pt
                reduce_box = box(*(pt.buffer(ent_r).bounds))            
                # clip NAIP scene to box
                rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

                # save auxiliary average and entropy rasters for R,G,B,NIR bands of clipped scene
                band_names = ['r_', 'g_', 'b_', 'nir_']
                #tags = ['_maxs','_mins','_avgs', '_entrs']
                tags = ['_avgs', '_entrs']
                window_fps = []
                window_cols = []

                for band_name, band_n in zip(band_names,range(1,5)):
                    rast_name = band_name + itemid + '_pt'
                    #sr.max_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=folp)
                    #sr.min_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=folp)
                    sr.avg_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=folp)
                    sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=ent_r, folder_path=folp)                        

                    for tag in tags:
                        window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                        window_cols.append(band_name.replace('_','')+tag.replace('s',str(box_s)))

                # ------------------------------
                # make auxiliary NDVI of clipped scene
                ndvi = sr.ndvi_xarray(rast)

                # make auxiliary NDVI entropy
                band_names.append('ndvi_')
                rast_name = 'ndvi_' + itemid + '_pt'

                sr.avg_raster(rast_data=ndvi, 
                                  crs=rast.rio.crs, 
                                  transf=rast.rio.transform(), 
                                  rast_name=rast_name, 
                                  n=box_s, 
                                  folder_path=folp)

                # adjusting to entropy input types
                ndvi = ndvi*100 +100
                sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                                  crs=rast.rio.crs, 
                                  transf=rast.rio.transform(), 
                                  rast_name=rast_name, 
                                  n=ent_r, 
                                  folder_path=folp)

                for tag in tags:
                    window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                    window_cols.append( 'ndvi'+tag.replace('s',str(box_s)))

                # ---------------------------------------
                # sample raster values for points in this scene
                
                for fp, col_name in zip(window_fps, window_cols):
                    rast_r = rasterio.open(fp)
                    pt_df = gpd.GeoDataFrame({'geometry':[pt]}, crs=pts_col.crs)
                    sample = sr.sample_raster_from_pts(pt_df.geometry, rast_r, [col_name])    
                    pt_samples.append(sample)
                    os.remove(fp)
            samples.append(pd.concat(pt_samples, axis=1))

        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        new_features = pd.concat(samples)
        pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

        # -----------------------------
        # collect all points from each polygon in the scene
        sampled_pts.append(pts)

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       

# ---------------------------------------
# create data frame with all points
sampled_pts = pd.concat(sampled_pts).sort_index()
if 'geometry' in sampled_pts.columns:
    sampled_pts = sampled_pts.drop(['geometry'],axis=1)

FINISHED PROCESSINGs


In [5]:
if save:
    fp = os.path.join(root, 'spectral_windows_'+csv_name)
    sampled_pts.to_csv(fp, index=False)

In [6]:
sampled_pts.columns

Index(['lon', 'lat', 'pts_crs', 'pl_scene', 'iceplant', 'r_avg3', 'r_entr3',
       'g_avg3', 'g_entr3', 'b_avg3', 'b_entr3', 'nir_avg3', 'nir_entr3',
       'ndvi_avg3', 'ndvi_entr3', 'r_avg5', 'r_entr5', 'g_avg5', 'g_entr5',
       'b_avg5', 'b_entr5', 'nir_avg5', 'nir_entr5', 'ndvi_avg5', 'ndvi_entr5',
       'r_avg7', 'r_entr7', 'g_avg7', 'g_entr7', 'b_avg7', 'b_entr7',
       'nir_avg7', 'nir_entr7', 'ndvi_avg7', 'ndvi_entr7', 'r_avg9', 'r_entr9',
       'g_avg9', 'g_entr9', 'b_avg9', 'b_entr9', 'nir_avg9', 'nir_entr9',
       'ndvi_avg9', 'ndvi_entr9', 'r_avg11', 'r_entr11', 'g_avg11', 'g_entr11',
       'b_avg11', 'b_entr11', 'nir_avg11', 'nir_entr11', 'ndvi_avg11',
       'ndvi_entr11', 'r_avg13', 'r_entr13', 'g_avg13', 'g_entr13', 'b_avg13',
       'b_entr13', 'nir_avg13', 'nir_entr13', 'ndvi_avg13', 'ndvi_entr13',
       'r_avg15', 'r_entr15', 'g_avg15', 'g_entr15', 'b_avg15', 'b_entr15',
       'nir_avg15', 'nir_entr15', 'ndvi_avg15', 'ndvi_entr15', 'r_avg17',
       'r_entr

In [7]:
sampled_pts

Unnamed: 0,lon,lat,pts_crs,pl_scene,iceplant,r_avg3,r_entr3,g_avg3,g_entr3,b_avg3,...,r_avg17,r_entr17,g_avg17,g_entr17,b_avg17,b_entr17,nir_avg17,nir_entr17,ndvi_avg17,ndvi_entr17
0,-119.863543,34.413876,EPSG:4326,ca_m_3411934_sw_11_060_20200521,0,87.888885,1.921928,113.888885,1.521928,82.111115,...,92.740486,5.8134,114.463669,5.191099,84.273354,4.851666,188.57785,3.349487,0.346021,4.986747


In [None]:
sampled_pts.describe()