In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [4]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(sr.path_to_aoi_itemids_csv())

# csv with the points for which to add spectral window features
csv_name = 'glcm_false_positives_model2k.csv'
fp = '/home/jovyan/msai4earth-esa/iceplant_detection/processing_results/model_2k/'+ csv_name
#fp = os.path.join(os.getcwd(),'temp',csv_name)
all_pts = pd.read_csv(fp)

entropy_r = 5
box_side = entropy_r *2 +1

# ***************************************************
# ***************************************************

In [6]:
all_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'r_max', 'r_min', 'r_diff',
       'r_avg', 'r_entr', 'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
       'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr', 'nir', 'nir_max',
       'nir_min', 'nir_diff', 'nir_avg', 'nir_entr', 'ndvi', 'ndvi_max',
       'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr', 'year', 'month',
       'day_in_year', 'r_contE', 'r_corrE', 'g_contE', 'g_corrE', 'b_contE',
       'b_corrE', 'nir_contE', 'nir_corrE', 'r_contN', 'r_corrN', 'g_contN',
       'g_corrN', 'b_contN', 'b_corrN', 'nir_contN', 'nir_corrN', 'r_entr5',
       'g_entr5', 'b_entr5', 'nir_entr5', 'ndvi_entr5', 'iceplant'],
      dtype='object')

In [7]:
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)

In [8]:
t0 = time.time() # initial time tracker

sampled_pts = [] # sampled pts from each scene are collected here

N = len(itemids)  # counter to finish

crss = all_pts.pts_crs.unique()

for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids.itemid[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    all_pts_scene = all_pts.loc[all_pts['naip_id'] == itemid]
    if len(all_pts_scene) !=0:
        for crs_str in crss:
            pts_scene = all_pts_scene[all_pts_scene.pts_crs == crs_str]  
    
            if len(pts_scene) !=0:
                crs = CRS.from_string(crs_str)
                pts_scene_df = sr.geodataframe_from_csv(df = pts_scene, lon_label='x', lat_label='y', crs=crs)
                pts_col = pts_scene_df.to_crs(naip_rast_r.rio.crs).geometry

                samples = []
                for pt in pts_col:
                    # this creates a box centered at point with side length=entropy_r*2 meters
                    reduce_box = box(*(pt.buffer(entropy_r).bounds))            
                    # clip NAIP scene
                    rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

                    # save auxiliary entropy rasters for R,G,B,NIR bands of clipped scene
                    band_names = ['r_', 'g_', 'b_', 'nir_']
#                    tags = ['_entrs']
                    tags = ['_avgs']
                    window_fps = []
                    window_cols = []

                    for band_name, band_n in zip(band_names,range(1,5)):
                        rast_name = band_name + itemid + '_pt'
                        #sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=entropy_r, folder_path=folp)
                        sr.avg_raster(raster = rast, band=band_n, rast_name=rast_name, n=box_side, folder_path=folp)

                        for tag in tags:
                            window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                            window_cols.append( band_name.replace('_','')+tag.replace('s',str(entropy_r)))

                    # ------------------------------
#                     # make auxiliary NDVI of clipped scene
#                     ndvi = sr.ndvi_xarray(rast)

#                     # make auxiliary NDVI entropy
#                     band_names.append('ndvi_')
#                     rast_name = 'ndvi_' + itemid + '_pt'

#                     # adjusting to entropy input types
#                     ndvi = ndvi*100 +100
#                     sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
#                                       crs=rast.rio.crs, 
#                                       transf=rast.rio.transform(), 
#                                       rast_name=rast_name, 
#                                       n=entropy_r, 
#                                       folder_path=folp)

#                     for tag in tags:
#                         window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
#                         window_cols.append( 'ndvi'+tag.replace('s',str(entropy_r)))

                    # ---------------------------------------
                    # sample raster values for points in this scene
                    pt_samples = []
                    for fp, col_name in zip(window_fps, window_cols):
                        rast_r = rasterio.open(fp)
                        pt_df = gpd.GeoDataFrame({'geometry':[pt]}, crs=pts_col.crs)
                        sample = sr.sample_raster_from_pts(pt_df.geometry, rast_r, [col_name])    
                        pt_samples.append(sample)
                        os.remove(fp)
                    samples.append(pd.concat(pt_samples, axis=1))
                # ---------------------------------------
                # Add all derived spectral data to pts dataframe
                new_features = pd.concat(samples)
                pts = pd.concat([pts_scene, new_features.set_index(pts_col.index)], axis=1)                

                # -----------------------------
                # collect all points from each polygon in the scene
                sampled_pts.append(pts)

    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       
     
# ---------------------------------------
# create data frame with all points
sampled_pts= pd.concat(sampled_pts).sort_index()

FINISHED PROCESSINGss


In [9]:
sampled_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'r_max', 'r_min', 'r_diff',
       'r_avg', 'r_entr', 'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
       'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr', 'nir', 'nir_max',
       'nir_min', 'nir_diff', 'nir_avg', 'nir_entr', 'ndvi', 'ndvi_max',
       'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr', 'year', 'month',
       'day_in_year', 'r_contE', 'r_corrE', 'g_contE', 'g_corrE', 'b_contE',
       'b_corrE', 'nir_contE', 'nir_corrE', 'r_contN', 'r_corrN', 'g_contN',
       'g_corrN', 'b_contN', 'b_corrN', 'nir_contN', 'nir_corrN', 'r_entr5',
       'g_entr5', 'b_entr5', 'nir_entr5', 'ndvi_entr5', 'iceplant', 'geometry',
       'r_avg5', 'g_avg5', 'b_avg5', 'nir_avg5'],
      dtype='object')

In [10]:
sampled_pts = sampled_pts[['x', 'y', 'pts_crs', #  point location
             'aoi', 'naip_id', #'polygon_id',  # sampling info
             'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', # spectral
             'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
             'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr',
             'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr',
             'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr',   
             'year', 'month', 'day_in_year', # date
            'r_entr5','g_entr5', 'b_entr5', 'nir_entr5', 'ndvi_entr5',  
             'r_avg5','g_avg5', 'b_avg5', 'nir_avg5',
            'r_contN', 'r_contE', 'r_corrN', 'r_corrE',
            'g_contN', 'g_contE', 'g_corrN', 'g_corrE',                            
            'b_contN', 'b_contE', 'g_corrN', 'b_corrE',                            
            'nir_contN', 'nir_contE', 'nir_corrN', 'nir_corrE',                                                       
             'iceplant'
             ]] 


In [11]:
sampled_pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'r', 'r_max', 'r_min', 'r_diff',
       'r_avg', 'r_entr', 'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
       'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr', 'nir', 'nir_max',
       'nir_min', 'nir_diff', 'nir_avg', 'nir_entr', 'ndvi', 'ndvi_max',
       'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr', 'year', 'month',
       'day_in_year', 'r_entr5', 'g_entr5', 'b_entr5', 'nir_entr5',
       'ndvi_entr5', 'r_avg5', 'g_avg5', 'b_avg5', 'nir_avg5', 'r_contN',
       'r_contE', 'r_corrN', 'r_corrE', 'g_contN', 'g_contE', 'g_corrN',
       'g_corrE', 'b_contN', 'b_contE', 'g_corrN', 'b_corrE', 'nir_contN',
       'nir_contE', 'nir_corrN', 'nir_corrE', 'iceplant'],
      dtype='object')

In [12]:
fp = os.path.join(os.getcwd(),'temp', 'spectral_window_'+csv_name)
sampled_pts.to_csv(fp, index=False)