This notebook can be used to add average and entropy of spectral bands over a window.

It **does not** require points to have an associated polygon. 

it **does** requiere all opints in the csv to have the same crs.

It creates a small window around each pont in the NAIP scene and calculates the "window" features only in that small region.

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(sr.path_to_aoi_itemids_csv())

# csv with the points for which to add spectral window features
csv_name = 'new_iceplant_pts.csv'
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/snow_model/'
fp = root + csv_name
#fp = os.path.join(os.getcwd(),'temp',csv_name)
all_pts = pd.read_csv(fp)

# radius of the disk (in pixels) over which entropy is calculated
entropy_r = 6

# length of side of the square window over which average/max/min are calculated.
box_side = entropy_r*2 +1

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point
itemid_col = 'naip_id'
# name ofcolumns with the crs of all points
crs_col = 'pts_crs'

save = True

# ***************************************************
# ***************************************************

In [3]:
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)
    
# ===================================================
itemids = list(all_pts[itemid_col].unique()) # itemids with points
N = len(itemids)  # counter to finish
crs = CRS.from_string(all_pts[crs_col][0]) # crs of dataframe

# ===================================================
sampled_pts = [] # sampled pts from each scene are collected here
t0 = time.time() # initial time tracker
print('REMAINING: ', N, 'scenes', end="\r")

# ===================================================
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]

    # double check there are points in that scene
    if len(pts_scene) !=0:
        # create geodataframe with pts in scene
        pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
        # convert pts to crs of NAIP scene
        pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

        samples = []
        for pt in pts_col:
            # this creates a box centered at point with side length=entropy_r*2 meters
            #     current pts coordinates are in the NAIP scene's crs, which is in meters
            #     entropy_r is in pixels and each pixel has a side of ~0.5m in the NAIP scene
            #     so pt.buffer(entropy_r) is a disk with radius entropy_r meters,
            #     this disk is inscribed in a square with side length entropy_r*2 meters,
            #     which translates into a square of side length entropy_r*4 pixels 
            #     this square is big enough to have a window of side length entropy_r*2 + 1 pixels 
            #      around the central pt
            reduce_box = box(*(pt.buffer(entropy_r).bounds))            
            # clip NAIP scene to box
            rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

            # save auxiliary average and entropy rasters for R,G,B,NIR bands of clipped scene
            band_names = ['r_', 'g_', 'b_', 'nir_']
            tags = ['_maxs','_mins','_avgs', '_entrs']
            window_fps = []
            window_cols = []

            for band_name, band_n in zip(band_names,range(1,5)):
                rast_name = band_name + itemid + '_pt'
                sr.max_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)
                sr.min_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)
                sr.avg_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)
                sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=entropy_r, folder_path=folp)                        

                for tag in tags:
                    window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                    window_cols.append(band_name.replace('_','')+tag.replace('s',str(box_side)))

            # ------------------------------
            # make auxiliary NDVI of clipped scene
            ndvi = sr.ndvi_xarray(rast)

            # make auxiliary NDVI entropy
            band_names.append('ndvi_')
            rast_name = 'ndvi_' + itemid + '_pt'
            
            sr.max_min_avg_rasters(rast_data=ndvi, 
                              crs=rast.rio.crs, 
                              transf=rast.rio.transform(), 
                              rast_name=rast_name, 
                              n=box_side, 
                              folder_path=folp)

            # adjusting to entropy input types
            ndvi = ndvi*100 +100
            sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                              crs=rast.rio.crs, 
                              transf=rast.rio.transform(), 
                              rast_name=rast_name, 
                              n=entropy_r, 
                              folder_path=folp)

            for tag in tags:
                window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                window_cols.append( 'ndvi'+tag.replace('s',str(box_side)))

            # ---------------------------------------
            # sample raster values for points in this scene
            pt_samples = []
            for fp, col_name in zip(window_fps, window_cols):
                rast_r = rasterio.open(fp)
                pt_df = gpd.GeoDataFrame({'geometry':[pt]}, crs=pts_col.crs)
                sample = sr.sample_raster_from_pts(pt_df.geometry, rast_r, [col_name])    
                pt_samples.append(sample)
                os.remove(fp)
            samples.append(pd.concat(pt_samples, axis=1))
            
        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        new_features = pd.concat(samples)
        pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

        # -----------------------------
        # collect all points from each polygon in the scene
        sampled_pts.append(pts)

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       
     
# ---------------------------------------
# create data frame with all points
sampled_pts= pd.concat(sampled_pts).sort_index()
sampled_pts = sampled_pts.drop(['geometry'],axis=1)

FINISHED PROCESSINGs


In [4]:
print((time.time() - t0)/60)

4.196343326568604


In [5]:
sampled_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'g', 'b', 'nir', 'ndvi',
       'year', 'month', 'day_in_year', 'iceplant', 'aux', 'r_max13', 'r_min13',
       'r_avg13', 'r_entr13', 'g_max13', 'g_min13', 'g_avg13', 'g_entr13',
       'b_max13', 'b_min13', 'b_avg13', 'b_entr13', 'nir_max13', 'nir_min13',
       'nir_avg13', 'nir_entr13', 'ndvi_max13', 'ndvi_min13', 'ndvi_avg13',
       'ndvi_entr13'],
      dtype='object')

In [6]:
if save:
    fp = os.path.join(root, 'spectral_window'+str(box_side)+'_'+csv_name)
    sampled_pts.to_csv(fp, index=False)

In [7]:
sampled_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,...,b_avg13,b_entr13,nir_max13,nir_min13,nir_avg13,nir_entr13,ndvi_max13,ndvi_min13,ndvi_avg13,ndvi_entr13
0,-119.844222,34.405148,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,97,89,79,150,0.214575,...,92.668640,4.921211,171,125,151.130173,5.076384,0.341772,-0.034483,0.177515,4.698968
1,-119.844344,34.405159,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,92,94,82,143,0.217021,...,85.266273,3.859697,169,131,148.325439,4.551615,0.495327,-0.032258,0.218935,4.007368
2,-119.845189,34.405578,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,82,93,69,160,0.322314,...,71.798813,3.595210,177,112,156.786987,5.164546,0.480519,0.160622,0.337278,4.329010
3,-119.844864,34.405433,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,79,85,75,136,0.265116,...,70.520714,3.139149,174,130,157.443787,4.855917,0.392000,0.061489,0.295858,3.864215
4,-119.844879,34.405519,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,64,87,67,179,0.473251,...,75.497040,3.739329,180,95,158.195267,4.975759,0.477178,0.017921,0.319527,4.457259
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,-119.997625,34.459761,EPSG:4326,capitan,ca_m_3412040_ne_10_060_20200522,109,110,96,139,0.120968,...,99.366867,4.490379,168,62,133.437866,5.278843,0.270588,-0.354167,0.059172,4.649326
513,-119.997251,34.459701,EPSG:4326,capitan,ca_m_3412040_ne_10_060_20200522,106,113,89,164,0.214815,...,99.183434,4.442990,179,60,145.254440,5.079838,0.267399,-0.371728,0.076923,4.297278
514,-119.997230,34.459707,EPSG:4326,capitan,ca_m_3412040_ne_10_060_20200522,111,116,88,162,0.186813,...,106.278107,4.820007,180,60,133.337280,5.622832,0.267399,-0.371728,0.011834,5.055483
515,-119.997625,34.459725,EPSG:4326,capitan,ca_m_3412040_ne_10_060_20200522,124,116,94,140,0.060606,...,92.863907,3.237170,173,111,149.136093,5.103704,0.179688,0.055118,0.106509,3.191484


In [8]:
sampled_pts.describe()

Unnamed: 0,x,y,r,g,b,nir,ndvi,year,month,day_in_year,...,b_avg13,b_entr13,nir_max13,nir_min13,nir_avg13,nir_entr13,ndvi_max13,ndvi_min13,ndvi_avg13,ndvi_entr13
count,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,...,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0
mean,-119.966776,34.441281,97.561325,104.039391,84.774396,159.558639,0.248302,2020.0,5.128917,144.623993,...,90.682083,4.357884,177.428827,116.03402,155.150208,4.693472,0.386541,0.012937,0.207805,4.245382
std,0.310649,0.032233,25.852058,19.999289,16.195854,22.783566,0.132975,0.0,0.335258,5.552622,...,17.037661,0.75441,14.143453,34.554418,18.145344,0.67624,0.133043,0.133594,0.122284,0.614632
min,-120.495227,34.385653,30.0,44.0,40.0,37.0,-0.045455,2020.0,5.0,142.0,...,46.005917,1.801241,111.0,14.0,68.78698,1.911348,0.014749,-0.542857,-0.047337,1.570297
25%,-120.213509,34.412462,79.0,91.0,72.0,149.0,0.140449,2020.0,5.0,142.0,...,76.78698,3.904427,170.0,97.0,145.177521,4.274586,0.281481,-0.066246,0.106509,3.843649
50%,-120.010529,34.458368,99.0,102.0,83.0,164.0,0.230126,2020.0,5.0,143.0,...,89.74556,4.439958,179.0,123.0,156.976334,4.733791,0.396985,-0.003257,0.189349,4.313352
75%,-119.815755,34.471001,117.0,119.0,95.0,176.0,0.344,2020.0,5.0,143.0,...,102.66272,4.881694,186.0,141.0,168.349106,5.153347,0.488,0.080247,0.289941,4.688486
max,-119.496454,34.499585,175.0,170.0,157.0,203.0,0.676768,2020.0,6.0,159.0,...,157.278107,6.061113,231.0,200.0,202.538467,6.244785,0.781818,0.474359,0.639053,5.665509
