This notebook can be used to add average and entropy of spectral bands over a window (and also min/max with small modifications).

It **does not** require points to have an associated polygon. 

it **does** requiere all opints in the csv to have the same crs.

It creates a small window around each pont in the NAIP scene and calculates the "window" features only in that small region.

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(sr.path_to_aoi_itemids_csv())

# csv with the points for which to add spectral window features
csv_name = 'spectral_window7_model_feb14_train.csv'
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/model_feb14/'
fp = root + csv_name
#fp = os.path.join(os.getcwd(),'temp',csv_name)
all_pts = pd.read_csv(fp)

# radius of the disk (in pixels) over which entropy is calculated
entropy_r = 5

# length of side of the square window over which average/max/min are calculated.
box_side = entropy_r *2 +1

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point
itemid_col = 'naip_id'
# name ofcolumns with the crs of all points
crs_col = 'pts_crs'

save = True

# ***************************************************
# ***************************************************

In [3]:
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)
    
# ===================================================
itemids = list(all_pts[itemid_col].unique()) # itemids with points
N = len(itemids)  # counter to finish
crs = CRS.from_string(all_pts[crs_col][0]) # crs of dataframe

# ===================================================
sampled_pts = [] # sampled pts from each scene are collected here
t0 = time.time() # initial time tracker
print('REMAINING: ', N, 'scenes', end="\r")

# ===================================================
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]

    # double check there are points in that scene
    if len(pts_scene) !=0:
        # create geodataframe with pts in scene
        pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
        # convert pts to crs of NAIP scene
        pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

        samples = []
        for pt in pts_col:
            # this creates a box centered at point with side length=entropy_r*2 meters
            #     current pts coordinates are in the NAIP scene's crs, which is in meters
            #     entropy_r is in pixels and each pixel has a side of ~0.5m in the NAIP scene
            #     so pt.buffer(entropy_r) is a disk with radius entropy_r meters,
            #     this disk is inscribed in a square with side length entropy_r*2 meters,
            #     which translates into a square of side length entropy_r*4 pixels 
            #     this square is big enough to have a window of side length entropy_r*2 + 1 pixels 
            #      around the central pt
            reduce_box = box(*(pt.buffer(entropy_r).bounds))            
            # clip NAIP scene to box
            rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

            # save auxiliary average and entropy rasters for R,G,B,NIR bands of clipped scene
            band_names = ['r_', 'g_', 'b_', 'nir_']
            tags = ['_maxs','_mins','_avgs', '_entrs']
            window_fps = []
            window_cols = []

            for band_name, band_n in zip(band_names,range(1,5)):
                rast_name = band_name + itemid + '_pt'
                sr.max_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)
                sr.min_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)
                sr.avg_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)
                sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=entropy_r, folder_path=folp)                        

                for tag in tags:
                    window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                    window_cols.append(band_name.replace('_','')+tag.replace('s',str(box_side)))

            # ------------------------------
            # make auxiliary NDVI of clipped scene
            ndvi = sr.ndvi_xarray(rast)

            # make auxiliary NDVI entropy
            band_names.append('ndvi_')
            rast_name = 'ndvi_' + itemid + '_pt'
            
            sr.max_min_avg_rasters(rast_data=ndvi, 
                              crs=rast.rio.crs, 
                              transf=rast.rio.transform(), 
                              rast_name=rast_name, 
                              n=box_side, 
                              folder_path=folp)

            # adjusting to entropy input types
            ndvi = ndvi*100 +100
            sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                              crs=rast.rio.crs, 
                              transf=rast.rio.transform(), 
                              rast_name=rast_name, 
                              n=entropy_r, 
                              folder_path=folp)

            for tag in tags:
                window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                window_cols.append( 'ndvi'+tag.replace('s',str(box_side)))

            # ---------------------------------------
            # sample raster values for points in this scene
            pt_samples = []
            for fp, col_name in zip(window_fps, window_cols):
                rast_r = rasterio.open(fp)
                pt_df = gpd.GeoDataFrame({'geometry':[pt]}, crs=pts_col.crs)
                sample = sr.sample_raster_from_pts(pt_df.geometry, rast_r, [col_name])    
                pt_samples.append(sample)
                os.remove(fp)
            samples.append(pd.concat(pt_samples, axis=1))
            
        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        new_features = pd.concat(samples)
        pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

        # -----------------------------
        # collect all points from each polygon in the scene
        sampled_pts.append(pts)

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       
     
# ---------------------------------------
# create data frame with all points
sampled_pts= pd.concat(sampled_pts).sort_index()
sampled_pts = sampled_pts.drop(['geometry'],axis=1)

FINISHED PROCESSINGs


In [4]:
print((time.time() - t0)/60)

20.304919429620107


In [5]:
sampled_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'g', 'b', 'nir', 'ndvi',
       'year', 'month', 'day_in_year', 'iceplant', 'r_max7', 'r_min7',
       'r_avg7', 'r_entr7', 'g_max7', 'g_min7', 'g_avg7', 'g_entr7', 'b_max7',
       'b_min7', 'b_avg7', 'b_entr7', 'nir_max7', 'nir_min7', 'nir_avg7',
       'nir_entr7', 'ndvi_max7', 'ndvi_min7', 'ndvi_avg7', 'ndvi_entr7',
       'r_max11', 'r_min11', 'r_avg11', 'r_entr11', 'g_max11', 'g_min11',
       'g_avg11', 'g_entr11', 'b_max11', 'b_min11', 'b_avg11', 'b_entr11',
       'nir_max11', 'nir_min11', 'nir_avg11', 'nir_entr11', 'ndvi_max11',
       'ndvi_min11', 'ndvi_avg11', 'ndvi_entr11'],
      dtype='object')

In [6]:
if save:
    fp = os.path.join(root, 'spectral_window'+str(box_side)+'_'+csv_name)
    sampled_pts.to_csv(fp, index=False)

In [7]:
sampled_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,...,b_avg11,b_entr11,nir_max11,nir_min11,nir_avg11,nir_entr11,ndvi_max11,ndvi_min11,ndvi_avg11,ndvi_entr11
0,-119.851632,34.411630,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,83,76,68,185,0.380597,...,75.669418,3.322213,193,155,182.223145,3.953461,0.477912,-0.015873,0.322314,4.093091
1,-119.843182,34.413305,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,77,105,70,182,0.405405,...,77.652893,3.512372,200,141,180.504135,4.334669,0.460076,0.050360,0.347107,3.851964
2,-119.865369,34.415011,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,107,102,84,155,0.183206,...,97.628098,4.688116,179,143,162.619827,4.484050,0.220472,0.017442,0.107438,3.990436
3,-119.845463,34.414732,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,81,108,78,185,0.390977,...,115.305786,4.801737,192,137,177.884293,4.461393,0.533040,-0.068120,0.239669,4.097358
4,-119.851264,34.415973,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,64,94,74,182,0.479675,...,98.123970,5.188193,184,151,167.305786,4.124887,0.479675,-0.055556,0.181818,4.667045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2468,-120.451944,34.456799,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,53,55,47,131,0.423913,...,56.024792,3.920970,165,72,131.107437,5.467885,0.482927,0.024911,0.355372,4.204444
2469,-120.438128,34.458685,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,92,87,74,84,-0.045455,...,83.669418,4.666726,121,54,86.429749,5.302330,0.039106,-0.226131,-0.082645,4.141958
2470,-120.484881,34.497110,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,97,92,78,110,0.062802,...,77.256195,4.469514,134,81,114.008263,4.732998,0.218274,-0.064000,0.057851,4.224118
2471,-120.485111,34.493451,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,109,87,68,140,0.124498,...,68.181816,4.266148,157,102,134.041321,4.330135,0.261261,-0.033175,0.132231,4.515595
