This notebook can be used to add features from NAIP images to the output csv from Collect Earth.

In [1]:
import os
import time
import pandas as pd
import numpy as np
import geopandas as gpd
import planetary_computer as pc
import rioxarray as rioxr

import rasterio as rio
from rasterio.crs import CRS

from shapely.geometry import box

from skimpy import clean_columns

#import raster_to_features as rf
import sample_rasters as sr

In [2]:
year = 2020
iceplant = 0
csv_name = 'extra_vegetation_samples_2023'

itemid_col = 'pl_scene'
confidence_col = 'confidence_high_confidence'
iceplant_col = 'type_of_vegetation_iceplant'

In [3]:
# ---------------------------------------------------
fp = os.path.join(os.getcwd(), 'ceo-extra_vegetation_training_set_2020-plot-data-2023-01-17.csv' )
crs = CRS.from_epsg(4326)
validation_pts = clean_columns(sr.geodataframe_from_csv(fp = fp, 
                                                        lon_label = 'center_lon', 
                                                        lat_label = 'center_lat', 
                                                        crs = crs))
validation_pts.head()

Unnamed: 0,plotid,center_lon,center_lat,shape,size_m,sample_points,email,flagged,flagged_reason,collection_time,analysis_duration,common_securewatch_date,total_securewatch_dates,pl_scene,type_of_vegetation_non_iceplant,type_of_vegetation_iceplant,confidence_high_confidence,confidence_low_confidence,geometry
0,0,-120.484881,34.49711,square,40.0,1,galaz-garcia@nceas.ucsb.edu,False,,2023-01-09 21:13,32.0 secs,,0,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.48488 34.49711)
1,1,-120.46546,34.452987,square,40.0,1,galaz-garcia@nceas.ucsb.edu,False,,2023-01-09 21:13,13.7 secs,,0,ca_m_3412037_nw_10_060_20200607,0.0,100.0,100.0,0.0,POINT (-120.46546 34.45299)
2,2,-120.470438,34.450327,square,40.0,1,galaz-garcia@nceas.ucsb.edu,False,,2023-01-17 22:39,7.6 secs,,0,ca_m_3412037_nw_10_060_20200607,0.0,100.0,0.0,100.0,POINT (-120.47044 34.45033)
3,3,-120.489551,34.499965,square,40.0,1,galaz-garcia@nceas.ucsb.edu,False,,2023-01-09 21:19,318.9 secs,,0,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.48955 34.49996)
4,4,-120.468204,34.464625,square,40.0,1,galaz-garcia@nceas.ucsb.edu,False,,2023-01-09 21:19,16.6 secs,,0,ca_m_3412037_nw_10_060_20200607,0.0,100.0,100.0,0.0,POINT (-120.46820 34.46462)


In [4]:
pts = validation_pts.loc[:,'pl_scene':'geometry' ]
pts = pts[pts[confidence_col] == 100]
if iceplant == 1:
    pts = pts[pts[iceplant_col] == 100]
elif iceplant == 0:
    pts = pts[pts[iceplant_col] == 0]
pts

Unnamed: 0,pl_scene,type_of_vegetation_non_iceplant,type_of_vegetation_iceplant,confidence_high_confidence,confidence_low_confidence,geometry
0,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.48488 34.49711)
3,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.48955 34.49996)
8,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.46534 34.46992)
16,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.49954 34.49412)
18,ca_m_3412037_nw_10_060_20200607,100.0,0.0,100.0,0.0,POINT (-120.47141 34.47321)
...,...,...,...,...,...,...
1095,ca_m_3411936_se_11_060_20200521,100.0,0.0,100.0,0.0,POINT (-119.54621 34.41000)
1096,ca_m_3411936_se_11_060_20200521,100.0,0.0,100.0,0.0,POINT (-119.53359 34.39955)
1097,ca_m_3411936_se_11_060_20200521,100.0,0.0,100.0,0.0,POINT (-119.55706 34.42189)
1098,ca_m_3411936_se_11_060_20200521,100.0,0.0,100.0,0.0,POINT (-119.53540 34.40230)


In [5]:
# fp = os.path.join(os.getcwd(), 'temp','capitan_iceplant_pts.csv')
# crs = CRS.from_epsg(26910)
# pts = sr.geodataframe_from_csv(fp = fp, 
#                          lon_label = 'x', 
#                          lat_label = 'y', 
#                          crs = crs)
# iceplant = 1
# itemid_col = 'naip_id'
# csv_name = 'cap_ice_pts'
# pts = pts.drop(['Unnamed: 0'],axis=1)

In [6]:
scene_ids = ['ca_m_3412037_nw_10_060_20200607',
             'ca_m_3412039_nw_10_060_20200522',
             'ca_m_3412040_ne_10_060_20200522',
             'ca_m_3411934_sw_11_060_20200521',
             'ca_m_3411936_se_11_060_20200521']


aois = { scene_ids[0] : 'point_conception',
          scene_ids[1] : 'gaviota',
          scene_ids[2] : 'capitan',
          scene_ids[3] : 'campus_lagoon',
          scene_ids[4] : 'carpinteria',         
        }

# ---------------------------------------------------
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)


In [7]:
# ---------------------------------------------------
sampled_points = []
for itemid in scene_ids:

    to_sample = pts[pts[itemid_col] == itemid].geometry
    
    if len(to_sample) > 0:
        # ---------------------------------------------------        
        # sample spectral bands from NAIP
        item = sr.get_item_from_id(itemid)
        scene_rast_r = sr.get_raster_from_item(item)     

        band_names = ['r', 'g', 'b', 'nir']
        spectral_bands = sr.sample_raster_from_pts(to_sample, scene_rast_r, band_names).set_index(to_sample.index)

        # ---------------------------------------------------        
        # sample max, min, avg and entrs from NAIP
        to_sample_match = to_sample.to_crs(scene_rast_r.crs)
        scene_rast = rioxr.open_rasterio(pc.sign(item.assets["image"].href)) 

        band_names.append('ndvi')
        tags = ['_max', '_min', '_avg', '_entr']
        window_cols = [band+tag for band in band_names for tag in tags]        

        window_features = []
        for i in range(len(to_sample_match)):
            pt = to_sample_match.iloc[[i]]

            # clip scene to box around point
            reduce_box = box(*(pt.iloc[0].buffer(6).bounds)) 
            rast = scene_rast.rio.clip_box(*reduce_box.bounds)

            # save auxiliary rasters for R,G,B,NIR: max,min,avg,entr
            for i in range(4):
                sr.max_min_avg_rasters(raster=rast, band=i+1, rast_name=band_names[i], n=3, folder_path=folp)
                sr.entropy_raster(raster=rast, band=i+1, rast_name=band_names[i], n=3, folder_path=folp)
            # ------------------------------
            # make auxiliary NDVI of clipped scene
            ndvi = sr.ndvi_xarray(rast)

            # save auxiliary NDVI rasters: max,min,avg
            sr.max_min_avg_rasters(rast_data=ndvi, 
                                   crs=rast.rio.crs, 
                                   transf=rast.rio.transform(), 
                                   rast_name=band_names[4], 
                                   n=3, 
                                   folder_path=folp)

            # adjust ndvi to entropy input types
            ndvi = ndvi*100 +100
            sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                              crs=rast.rio.crs, transf=rast.rio.transform(), 
                              rast_name=band_names[4], 
                              n=3, 
                              folder_path=folp)
            # ---------------------------------------
            # sample raster values for current point
            samples = []
            for col_name in window_cols:
                fp = os.path.join(folp, col_name+'s.tif')
                aux_rast_r = rio.open(fp)
                sample = sr.sample_raster_from_pts(pt, aux_rast_r, [col_name])    
                os.remove(fp)
                samples.append(sample)       

            # ---------------------------------------
            # Add all derived spectral data to pts dataframe
            window_features.append(pd.concat(samples, axis = 1)) 
        # ---------------------------------------------------                    
        # ---------------------------------------------------
        # concatenate sampled data
        window_features = pd.concat(window_features).set_index(to_sample.index)
        #lidar_bands = pd.concat(canopy_h_samples, axis=1).set_index(to_sample.index)
        df = pd.concat([to_sample, spectral_bands, window_features], axis=1)
        # ---------------------------------------------------
        # add date and naipid information
        kwargs = {'year' : item.datetime.year,
                  'month' : item.datetime.month,
                  'day_in_year' : sr.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year),
                  'naip_id' : itemid,
                  'aoi' : aois[itemid],
                  'iceplant' : iceplant}
        df = df.assign(**kwargs)
        # ---------------------------------------------------
        sampled_points.append(gpd.GeoDataFrame(df))


In [8]:
samples = pd.concat(sampled_points).sort_index()
# ---------------------------------------
# create max-min difference columns
for band in band_names:
    col_name = band + '_diff'
    samples[col_name] = samples[band +'_max'] - samples[band +'_min']

samples = samples.assign( x = lambda df : df.geometry.x, 
                          y = lambda df : df.geometry.y,
                         pts_crs = crs.to_string())

# make this with assign
samples['ndvi'] = (samples.nir.astype('int16') - samples.r.astype('int16'))/(samples.nir.astype('int16') + samples.r.astype('int16'))

samples = samples[['x', 'y', 'pts_crs',
                    'aoi','naip_id',
                    'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', # spectral
                    'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
                    'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr',
                    'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr',
                    'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr',                     
                    'year', 'month', 'day_in_year',
                    'iceplant']]
samples

Unnamed: 0,x,y,pts_crs,aoi,naip_id,r,r_max,r_min,r_diff,r_avg,...,ndvi,ndvi_max,ndvi_min,ndvi_diff,ndvi_avg,ndvi_entr,year,month,day_in_year,iceplant
0,-120.484881,34.497110,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,97,118,89,29,102.666664,...,0.062802,0.097674,0.016667,0.081008,0.000000,3.741963,2020,6,159,0
3,-120.489551,34.499965,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,119,131,102,29,114.222221,...,0.081081,0.224335,0.004651,0.219683,0.000000,3.948859,2020,6,159,0
8,-120.465340,34.469920,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,90,99,90,9,92.888885,...,0.052632,0.097087,0.005025,0.092062,0.000000,3.253521,2020,6,159,0
16,-120.499539,34.494117,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,59,135,50,85,81.666664,...,0.305882,0.354839,0.042553,0.312286,0.111111,4.116265,2020,6,159,0
18,-120.471406,34.473210,EPSG:4326,point_conception,ca_m_3412037_nw_10_060_20200607,96,118,93,25,100.111115,...,0.150442,0.166667,0.074627,0.092040,0.111111,4.004364,2020,6,159,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,-119.546213,34.410002,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,128,133,69,64,100.666664,...,0.149502,0.298893,0.149502,0.149391,0.222222,4.090234,2020,5,142,0
1096,-119.533589,34.399554,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,89,106,89,17,96.000000,...,0.219298,0.239669,-0.004739,0.244409,0.111111,3.715932,2020,5,142,0
1097,-119.557057,34.421892,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,102,114,101,13,106.000000,...,0.157025,0.157025,0.090909,0.066116,0.111111,3.302139,2020,5,142,0
1098,-119.535401,34.402302,EPSG:4326,carpinteria,ca_m_3411936_se_11_060_20200521,86,101,71,30,87.555557,...,0.070270,0.222222,-0.036585,0.258808,0.000000,4.017825,2020,5,142,0


In [9]:
samples.to_csv(os.path.join(os.getcwd(), csv_name+'.csv'), index=False) 