In [1]:
import os
import time
import pandas as pd
import numpy as np
import geopandas as gpd
import planetary_computer as pc
import rioxarray as rioxr

import rasterio as rio
from rasterio.crs import CRS

from shapely.geometry import box

from skimpy import clean_columns

#import raster_to_features as rf
import sample_rasters as sr

In [2]:
year = 2020

In [3]:
# ---------------------------------------------------
# load validation points
fp = os.path.join(os.getcwd(), 'ceo_vegetation_training_set_2020.csv' )
validation_pts = clean_columns(sr.geodataframe_from_csv(fp = fp, 
                                                        lon_label = 'lon', 
                                                        lat_label = 'lat', 
                                                        crs = CRS.from_epsg(4326)))

# ---------------------------------------------------
# create auxiliary canopy height rasters: min, max, and avg 
#lidar_fps = rf.create_aux_canopyheight_rasters(year)

In [4]:
pts = validation_pts.loc[:,'pl_scene':'geometry' ]
pts = pts[pts.confidence == 'High confidence']
pts = pts[pts.type_of_vegetation == 'non-iceplant']

In [5]:
scene_ids = ['ca_m_3412037_nw_10_060_20200607',
             'ca_m_3412039_nw_10_060_20200522',
             'ca_m_3412040_ne_10_060_20200522',
             'ca_m_3411934_sw_11_060_20200521',
             'ca_m_3411936_se_11_060_20200521']


aois = { scene_ids[0] : 'point_conception',
          scene_ids[1] : 'gaviota',
          scene_ids[2] : 'capitan',
          scene_ids[3] : 'campus_lagoon',
          scene_ids[4] : 'carpinteria',         
        }

# ---------------------------------------------------
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)


In [6]:
# ---------------------------------------------------
sampled_points = []
for itemid in scene_ids:

    to_sample = pts[pts.pl_scene == itemid].geometry

    # # ---------------------------------------------------
    # # sample canopy height at point, and max, min and avg canopy height around point        
    # canopy_h_samples = []
    # for i, tag in zip(range(4),['', '_max', '_min', '_avg']):
    #     aux_rast_r = rio.open(lidar_fps[i])
    #     canopy_h_samples.append(sr.sample_raster_from_pts(to_sample, aux_rast_r, ['lidar'+tag]))        
    # ---------------------------------------------------        
    # sample spectral bands from NAIP
    item = sr.get_item_from_id(itemid)
    scene_rast_r = sr.get_raster_from_item(item)     

    band_names = ['r', 'g', 'b', 'nir']
    spectral_bands = sr.sample_raster_from_pts(to_sample, scene_rast_r, band_names).set_index(to_sample.index)
    
    # ---------------------------------------------------        
    # sample max, min, avg and entrs from NAIP
    to_sample_match = to_sample.to_crs(scene_rast_r.crs)
    scene_rast = rioxr.open_rasterio(pc.sign(item.assets["image"].href)) 

    band_names.append('ndvi')
    tags = ['_max', '_min', '_avg', '_entr']
    window_cols = [band+tag for band in band_names for tag in tags]        

    window_features = []
    for i in range(len(to_sample_match)):
        pt = to_sample_match.iloc[[i]]
        
        # clip scene to box around point
        reduce_box = box(*(pt.iloc[0].buffer(6).bounds)) 
        rast = scene_rast.rio.clip_box(*reduce_box.bounds)

        # save auxiliary rasters for R,G,B,NIR: max,min,avg,entr
        for i in range(4):
            sr.max_min_avg_rasters(raster=rast, band=i+1, rast_name=band_names[i], n=3, folder_path=folp)
            sr.entropy_raster(raster=rast, band=i+1, rast_name=band_names[i], n=3, folder_path=folp)
        # ------------------------------
        # make auxiliary NDVI of clipped scene
        ndvi = sr.ndvi_xarray(rast)

        # save auxiliary NDVI rasters: max,min,avg
        sr.max_min_avg_rasters(rast_data=ndvi, 
                               crs=rast.rio.crs, 
                               transf=rast.rio.transform(), 
                               rast_name=band_names[4], 
                               n=3, 
                               folder_path=folp)

        # adjust ndvi to entropy input types
        ndvi = ndvi*100 +100
        sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                          crs=rast.rio.crs, transf=rast.rio.transform(), 
                          rast_name=band_names[4], 
                          n=3, 
                          folder_path=folp)
        # ---------------------------------------
        # sample raster values for current point
        samples = []
        for col_name in window_cols:
            fp = os.path.join(folp, col_name+'s.tif')
            aux_rast_r = rio.open(fp)
            sample = sr.sample_raster_from_pts(pt, aux_rast_r, [col_name])    
            os.remove(fp)
            samples.append(sample)       

        # ---------------------------------------
        # Add all derived spectral data to pts dataframe
        window_features.append(pd.concat(samples, axis = 1)) 
    # ---------------------------------------------------                    
    # ---------------------------------------------------
    # concatenate sampled data
    window_features = pd.concat(window_features).set_index(to_sample.index)
    #lidar_bands = pd.concat(canopy_h_samples, axis=1).set_index(to_sample.index)
    df = pd.concat([to_sample, spectral_bands, window_features], axis=1)
    # ---------------------------------------------------
    # add date and naipid information
    kwargs = {'year' : item.datetime.year,
              'month' : item.datetime.month,
              'day_in_year' : sr.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year),
              'naip_id' : itemid,
              'aoi' : aois[itemid],
              'iceplant' : 1}
    df = df.assign(**kwargs)
    # ---------------------------------------------------
    sampled_points.append(gpd.GeoDataFrame(df))
    

In [8]:
samples = pd.concat(sampled_points).sort_index()
# ---------------------------------------
# create max-min difference columns
for band in band_names:
    col_name = band + '_diff'
    samples[col_name] = samples[band +'_max'] - samples[band +'_min']

samples = samples.assign( x = lambda df : df.geometry.x, 
                          y = lambda df : df.geometry.y,
                          pts_crs = CRS.from_epsg(4326).to_string())

samples = samples[['x', 'y', 'pts_crs',
                    'aoi','naip_id',
                    'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', # spectral
                    'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
                    'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr',
                    'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr',
                    'year', 'month', 'day_in_year',
                    'iceplant']]
samples

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,r_max,r_min,r_diff,r_avg,...,nir,nir_max,nir_min,nir_diff,nir_avg,nir_entr,year,month,day_in_year,iceplant
0,-120.484881,34.497110,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,97,118,89,29,102.666664,...,110,127,100,27,115.111115,4.073330,2020,6,159,1
8,-120.465340,34.469920,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,90,99,90,9,92.888885,...,100,113,96,17,103.000000,4.349192,2020,6,159,1
16,-120.499539,34.494117,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,59,135,50,85,81.666664,...,111,147,103,44,120.666664,4.487123,2020,6,159,1
18,-120.471406,34.473210,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,96,118,93,25,100.111115,...,130,142,108,34,128.111115,4.461092,2020,6,159,1
19,-120.446698,34.455653,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,111,134,104,30,118.444443,...,161,167,135,32,155.888885,4.418157,2020,6,159,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,-119.565819,34.418144,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,85,92,82,10,85.222221,...,189,196,185,11,191.777771,3.486449,2020,5,142,1
495,-119.501487,34.393351,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,60,73,56,17,64.111115,...,192,199,190,9,194.444443,3.719376,2020,5,142,1
496,-119.562062,34.422923,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,127,128,89,39,112.222221,...,141,167,137,30,149.666672,4.556088,2020,5,142,1
497,-119.550608,34.414384,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,43,77,37,40,50.111111,...,92,167,84,83,123.333336,4.789015,2020,5,142,1


In [9]:
samples.to_csv(os.path.join(os.getcwd(),'vegetation_samples.csv'))