In [1]:
import os
import time
import pandas as pd
import numpy as np

import geopandas as gpd
import rioxarray as rioxr
import rasterio

import sample_rasters as sr
from rasterio.crs import CRS

from shapely.geometry import box

import planetary_computer as pc

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(sr.path_to_aoi_itemids_csv())

# csv with the points for which to add spectral window features
csv_name = 'model3070_train_2020.csv'
fp = '/home/jovyan/msai4earth-esa/iceplant_detection/processing_results/model_3070/model3070_train_2020.csv'
#fp = os.path.join(os.getcwd(),'temp',csv_name)
all_pts = pd.read_csv(fp)

entropy_r = 5

# ***************************************************
# ***************************************************

In [3]:
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)

In [4]:
t0 = time.time() # initial time trcker

sampled_pts = [] # sampled pts from each poly are collected here

N = len(itemids)  # counter to finish

for i in range(len(itemids)):
    # ---------------------------------------
    # open raster reader for NAIP scene
    itemid = itemids.itemid[i]
    item = sr.get_item_from_id(itemid)    
    href = pc.sign(item.assets["image"].href)
    naip_rast_r = rioxr.open_rasterio(href) 

    # ---------------------------------------
    # find polygons for that NAIP scene
    poly_fp = sr.path_to_polygons(itemids.iloc[i].aoi_name, itemids.iloc[i].year)
    polys = gpd.read_file(poly_fp)
    
    # iterate through polygons in scene
    for j in list(polys.id):
        # ---------------------------------------
        # find points in current polygon
        pts_poly = all_pts.loc[ (all_pts['naip_id'] == itemid) & (all_pts['polygon_id'] == j)]
        if len(pts_poly) !=0:

            crs = CRS.from_string(pts_poly.pts_crs.iloc[0])
            pts_poly_df = sr.geodataframe_from_csv(df = pts_poly, lon_label='x', lat_label='y', crs=crs)
            pts_col = pts_poly_df.to_crs(naip_rast_r.rio.crs).geometry

            # ---------------------------------------
            # create enlarged bounding box

            # (this is ugly, but unfortunately the index and the polygon.id do not match in some files)
            poly_index = polys.index[polys['id']==j].tolist()[0]                

            poly = polys.geometry[poly_index]
            reduce = gpd.GeoDataFrame({'geometry':[box(*poly.bounds)]}, crs=polys.crs)
            reduce = reduce.to_crs(naip_rast_r.rio.crs) 
            poly = reduce.geometry[0]  # poly in scene's crs
            
            # ****** HERE *******
            reduce_box = box(*(poly.buffer(entropy_r*2).bounds)) 

            # ---------------------------------------
            # clip NAIP scene
            rast = naip_rast_r.rio.clip_box(*reduce_box.bounds)

            # ---------------------------------------
            # save auxiliary entropy rasters for R,G,B,NIR bands of clipped scene
            band_names = ['r_', 'g_', 'b_', 'nir_']
#            tags = ['_maxs', '_mins', '_avgs', '_entrs']
            tags = ['_entrs']
            window_fps = []
            window_cols = []

            for band_name, band_n in zip(band_names,range(1,5)):
                rast_name = band_name + itemid + '_poly_'+str(j)
              #  sr.max_min_avg_rasters(raster = rast, band=band_n, rast_name=rast_name, n=3, folder_path=folp)
                sr.entropy_raster(raster = rast, band=band_n, rast_name=rast_name, n=entropy_r, folder_path=folp)

                for tag in tags:
                    window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                    window_cols.append( band_name.replace('_','')+tag.replace('s',''))

            # ------------------------------
            # make auxiliary NDVI of clipped scene
            ndvi = sr.ndvi_xarray(rast)

            # make auxiliary NDVI entropy
            band_names.append('ndvi_')
            rast_name = 'ndvi_' + itemid + '_poly_'+str(j)
            
            #sr.max_min_avg_rasters(rast_data=ndvi, crs=rast.rio.crs, transf=rast.rio.transform(), rast_name=rast_name, n=3, folder_path=folp)
            
            # adjusting to entropy input types
            ndvi = ndvi*100 +100
            sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                              crs=rast.rio.crs, 
                              transf=rast.rio.transform(), 
                              rast_name=rast_name, 
                              n=entropy_r, 
                              folder_path=folp)

            for tag in tags:
                window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
                window_cols.append( 'ndvi'+tag.replace('s',''))

            # ---------------------------------------
            # sample raster values for points in current polygon
            samples = []
            for fp, col_name in zip(window_fps, window_cols):
                rast_r = rasterio.open(fp)
                sample = sr.sample_raster_from_pts(pts_col, rast_r, [col_name])    
                samples.append(sample)

            # ---------------------------------------
            # Add all derived spectral data to pts dataframe
            new_features = pd.concat(samples, axis = 1)
            pts = pd.concat([pts_poly, new_features.set_index(pts_poly_df.index)], axis=1)                

            # -----------------------------
            # collect all points from each polygon in the scene
            sampled_pts.append(pts)

            # ---------------------------------------
            # delete aux entropy rasters
            for fp in window_fps:
                os.remove(fp)
            
    # ---------------------------------------
    # processing message
    N = N-1                
    print('REMAINING: ', N, 'scenes', end="\r")

print('FINISHED PROCESSING')       

     
# ---------------------------------------
# create data frame with all points
sampled_pts= pd.concat(sampled_pts).sort_index()

# ---------------------------------------
# create max-min difference columns
for band in band_names:
    col_name = band + 'diff'
    sampled_pts[col_name] = sampled_pts[band +'max'] - sampled_pts[band +'min']
        
print(time.time()-t0)

FINISHED PROCESSINGss
15.030651807785034


In [None]:
sampled_pts

In [None]:
all_pts = all_pts.drop(['Unnamed: 0'], axis=1)

In [None]:
all_pts['r_entr5'] = sampled_pts.r_entr.iloc[:,1]
all_pts['g_entr5'] = sampled_pts.g_entr.iloc[:,1]
all_pts['b_entr5'] = sampled_pts.b_entr.iloc[:,1]
all_pts['nir_entr5'] = sampled_pts.nir_entr.iloc[:,1]
all_pts['ndvi_entr5'] = sampled_pts.ndvi_entr.iloc[:,1]

In [None]:
all_pts.columns

In [6]:
# ---------------------------------------
# clean dataframe columns
all_pts = all_pts[['x', 'y', 'pts_crs', #  point location
             'aoi', 'naip_id', 'polygon_id',  # sampling info
             'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', # spectral
             'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
             'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr',
             'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr',
             'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr',   
             'year', 'month', 'day_in_year', # date
#             'lidar', 'max_lidar', 'min_lidar', 'min_max_diff', 'avg_lidar', # lidar
            'r_entr5','g_entr5', 'b_entr5', 'nir_entr5', 'ndvi_entr5',                          
             'iceplant'
             ]] 

# sampled_pts = sampled_pts.rename(columns={'max_lidar':'lidar_max',
#                            'min_lidar':'lidar_min',
#                            'min_max_diff':'lidar_diff',
#                            'avg_lidar':'lidar_avg'})

In [8]:
# ------------------------------
## Save points with added spectral data
fp = os.path.join(os.getcwd(),'temp', 'spectral_window_'+csv_name)
all_pts.to_csv(fp, index=False)
