This notebook can be used to add average and entropy of spectral bands over a window.

It **does not** require points to have an associated polygon. 

it **does** requiere all opints in the csv to have the same crs.

It creates a small window around each pont in the NAIP scene and calculates the "window" features only in that small region.

In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(sr.path_to_aoi_itemids_csv())

# csv with the points for which to add spectral window features
csv_name = 'twok_test.csv'
root = '/home/jovyan/msai4earth-esa/iceplant_detection/models/model_2k/twok_dataset_for_training_experiments/'
fp = root + csv_name
#fp = os.path.join(os.getcwd(),'temp',csv_name)
all_pts = pd.read_csv(fp)

# radius of the disk(s) (in pixels) over which entropy is calculated
#entropy_r = [8,7,6,5,4,3,2,1]
entropy_r = [2,1]

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point
itemid_col = 'naip_id'
# name ofcolumns with the crs of all points
crs_col = 'pts_crs'

save = False

# ***************************************************
# ***************************************************

In [3]:
all_pts = all_pts[0:10]

In [4]:
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)
    
# ===================================================
itemids = list(all_pts[itemid_col].unique()) # itemids with points
N = len(itemids)  # counter to finish
crs = CRS.from_string(all_pts[crs_col][0]) # crs of dataframe

# ===================================================
# length of side of the square window over which average/max/min are calculated.
box_sides = [r*2 +1 for r in entropy_r]

# ===================================================
sampled_pts = [] # sampled pts from each scene are collected here
t0 = time.time() # initial time tracker
print('REMAINING: ', N, 'scenes', end="\r")

# ===================================================
for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]

    # double check there are points in that scene
    if len(pts_scene) !=0:
        # create geodataframe with pts in scene
        pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
        # convert pts to crs of NAIP scene
        pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

        samples = []
        for pt in pts_col:
            pt_samples = []
            for ent_r, box_s in zip(entropy_r,box_sides):
                # this creates a box centered at point with side length=entropy_r*2 meters
                #     current pts coordinates are in the NAIP scene's crs, which is in meters
                #     entropy_r is in pixels and each pixel has a side of ~0.5m in the NAIP scene
                #     so pt.buffer(entropy_r) is a disk with radius entropy_r meters,
                #     this disk is inscribed in a square with side length entropy_r*2 meters,
                #     which translates into a square of side length entropy_r*4 pixels 
                #     this square is big enough to have a window of side length entropy_r*2 + 1 pixels 
                #      around the central pt
                reduce_box = box(*(pt.buffer(ent_r).bounds))            
                # clip NAIP scene to box
                rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

                # save auxiliary average and entropy rasters for R,G,B,NIR bands of clipped scene
                band_names = ['r_', 'g_', 'b_', 'nir_']
                #tags = ['_maxs','_mins','_avgs', '_entrs']
                tags = ['_avgs', '_entrs']
                window_fps = []
                window_cols = []

                for band_name, band_n in zip(band_names,range(1,5)):
                    rast_name = band_name + itemid + '_pt'
                    #sr.max_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=folp)
                    #sr.min_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=folp)
                    sr.avg_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_s, folder_path=folp)
                    sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=ent_r, folder_path=folp)                        

                    for tag in tags:
                        window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                        window_cols.append(band_name.replace('_','')+tag.replace('s',str(box_s)))

                # ------------------------------
                # make auxiliary NDVI of clipped scene
                ndvi = sr.ndvi_xarray(rast)

                # make auxiliary NDVI entropy
                band_names.append('ndvi_')
                rast_name = 'ndvi_' + itemid + '_pt'

                sr.avg_raster(rast_data=ndvi, 
                                  crs=rast.rio.crs, 
                                  transf=rast.rio.transform(), 
                                  rast_name=rast_name, 
                                  n=box_s, 
                                  folder_path=folp)

                # adjusting to entropy input types
                ndvi = ndvi*100 +100
                sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                                  crs=rast.rio.crs, 
                                  transf=rast.rio.transform(), 
                                  rast_name=rast_name, 
                                  n=ent_r, 
                                  folder_path=folp)

                for tag in tags:
                    window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                    window_cols.append( 'ndvi'+tag.replace('s',str(box_s)))

                # ---------------------------------------
                # sample raster values for points in this scene
                
                for fp, col_name in zip(window_fps, window_cols):
                    rast_r = rasterio.open(fp)
                    pt_df = gpd.GeoDataFrame({'geometry':[pt]}, crs=pts_col.crs)
                    sample = sr.sample_raster_from_pts(pt_df.geometry, rast_r, [col_name])    
                    pt_samples.append(sample)
                    os.remove(fp)
            samples.append(pd.concat(pt_samples, axis=1))

        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        new_features = pd.concat(samples)
        pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

        # -----------------------------
        # collect all points from each polygon in the scene
        sampled_pts.append(pts)

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       

FINISHED PROCESSINGs


In [5]:
# ---------------------------------------
# create data frame with all points
sampled_pts= pd.concat(sampled_pts).sort_index()
sampled_pts = sampled_pts.drop(['geometry'],axis=1)

In [6]:
print((time.time() - t0)/60)

0.13314466476440429


In [7]:
sampled_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'g', 'b', 'nir', 'ndvi',
       'year', 'month', 'day_in_year', 'iceplant', 'r_avg5', 'r_entr5',
       'g_avg5', 'g_entr5', 'b_avg5', 'b_entr5', 'nir_avg5', 'nir_entr5',
       'ndvi_avg5', 'ndvi_entr5', 'r_avg3', 'r_entr3', 'g_avg3', 'g_entr3',
       'b_avg3', 'b_entr3', 'nir_avg3', 'nir_entr3', 'ndvi_avg3',
       'ndvi_entr3'],
      dtype='object')

In [8]:
if save:
    fp = os.path.join(root, 'spectral_window'+str(box_side)+'_'+csv_name)
    sampled_pts.to_csv(fp, index=False)

In [9]:
sampled_pts

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,g,b,nir,ndvi,...,r_avg3,r_entr3,g_avg3,g_entr3,b_avg3,b_entr3,nir_avg3,nir_entr3,ndvi_avg3,ndvi_entr3
0,-120.489551,34.499965,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,119,111,82,140,0.081081,...,114.222221,2.321928,106.333336,1.921928,81.666664,1.921928,135.888885,1.921928,0.0,1.921928
1,-120.46534,34.46992,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,90,85,77,100,0.052632,...,92.888885,1.921928,90.222221,1.521928,77.888885,2.321928,103.0,1.921928,0.0,2.321928
2,-120.446698,34.455653,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,111,99,79,161,0.183824,...,118.444443,1.921928,107.333336,2.321928,85.555557,1.921928,155.888885,1.921928,0.111111,1.521928
3,-120.438111,34.454353,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,53,63,59,101,0.311688,...,55.666668,1.921928,65.666664,2.321928,57.444443,2.321928,106.111115,1.921928,0.222222,2.321928
4,-120.444426,34.452852,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,34,58,43,167,0.661692,...,39.444443,2.321928,59.444443,2.321928,46.333332,1.921928,166.444443,2.321928,0.555556,1.921928
5,-120.469982,34.467804,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,105,103,84,139,0.139344,...,103.333336,1.921928,100.111115,1.921928,81.777779,2.321928,135.666672,2.321928,0.111111,1.921928
6,-120.473386,34.474912,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,77,88,70,129,0.252427,...,77.333336,2.321928,85.666664,2.321928,69.666664,2.321928,124.111115,1.921928,0.222222,2.321928
7,-119.834396,34.423423,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,117,127,94,180,0.212121,...,118.777779,2.321928,126.888885,1.921928,94.888885,1.921928,179.888885,1.370951,0.111111,1.370951
8,-119.832574,34.417197,EPSG:4326,campus_lagoon,ca_m_3411934_sw_11_060_20200521,49,68,66,113,0.395062,...,56.888889,2.321928,73.333336,1.921928,69.0,1.921928,113.222221,1.921928,0.222222,2.321928
9,-120.495187,34.497435,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,79,84,70,103,0.131868,...,76.111115,2.321928,78.888885,2.321928,68.555557,1.921928,98.777779,2.321928,0.111111,2.321928


In [10]:
sampled_pts.describe()

Unnamed: 0,x,y,r,g,b,nir,ndvi,year,month,day_in_year,...,r_avg3,r_entr3,g_avg3,g_entr3,b_avg3,b_entr3,nir_avg3,nir_entr3,ndvi_avg3,ndvi_entr3
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,...,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,-120.338965,34.461351,83.4,88.6,72.4,133.3,0.242174,2020.0,5.8,155.6,...,85.311111,2.161928,89.388878,2.081928,73.277771,2.081928,131.899994,1.98683,0.166667,2.02683
std,0.267055,0.02715,30.324907,21.96563,14.354248,29.177807,0.180503,0.0,0.421637,7.167829,...,28.625093,0.206559,21.008802,0.279682,14.189221,0.206559,28.104372,0.287026,0.159302,0.358009
min,-120.495187,34.417197,34.0,58.0,43.0,100.0,0.052632,2020.0,5.0,142.0,...,39.444443,1.921928,59.444443,1.521928,46.333332,1.921928,98.777779,1.370951,0.0,1.370951
25%,-120.472535,34.453227,59.0,72.0,67.0,105.5,0.133737,2020.0,6.0,159.0,...,61.694446,1.921928,74.722223,1.921928,68.666668,1.921928,107.888891,1.921928,0.111111,1.921928
50%,-120.456019,34.461729,84.5,86.5,73.5,134.0,0.197972,2020.0,6.0,159.0,...,85.111111,2.321928,87.944443,2.121928,73.777775,1.921928,129.888893,1.921928,0.111111,2.121928
75%,-120.43969,34.473664,109.5,102.0,81.25,155.75,0.296873,2020.0,6.0,159.0,...,111.5,2.321928,104.777781,2.321928,81.75,2.321928,150.888885,2.221928,0.222222,2.321928
max,-119.832574,34.499965,119.0,127.0,94.0,180.0,0.661692,2020.0,6.0,159.0,...,118.777779,2.321928,126.888885,2.321928,94.888885,2.321928,179.888885,2.321928,0.555556,2.321928
