This notebook can be used to add features from NAIP images to the output csv from Collect Earth.

In [1]:
import os
import time
import pandas as pd
import numpy as np
import geopandas as gpd
import planetary_computer as pc
import rioxarray as rioxr

import rasterio as rio
from rasterio.crs import CRS

from shapely.geometry import box

#from skimpy import clean_columns

#import raster_to_features as rf
import sample_rasters as sr

In [2]:
year = 2020
iceplant = 0

# file path to csv or shp with points to add spectral info
#fp = os.path.join(os.getcwd(), 'ceo-extra_vegetation_training_set_2020-plot-data-2023-01-17.csv' )
#fp = '/home/jovyan/msai4earth-esa/iceplant_detection/processing_results/model_2k/false_positives_model2k_shp/false_positives_model2k.shp'
fp = '/home/jovyan/msai4earth-esa/iceplant_detection/validation_results_spectral_2020/validation_results_spectral_2020.shp'

# indicate if file is a csv or shapefile
csv = False
shp = True
# -------------------------------------------
# if file is csv indicate these:
crs = CRS.from_epsg(4326)
# whether csv is a CEO output
CEO = False

# --- FOR CEO samples ---
#name of column with confidence level
confidence_col = 'confidence_high_confidence'

# -------------------------------------------
# name of column containing itemid of the NAIP scene containing the point if there is one
itemid_col = 'naip_id'

# name of column indicating whether the point is iceplant (1) or other vegetation (0)
iceplant_col = 'iceplant'

# -------------------------------------------
# name of output csv with added spectral features to points
csv_name = 'validation_pts2020_3070model'

In [3]:
# open validation points as geodataframe
if csv:
    validation_pts = clean_columns(sr.geodataframe_from_csv(fp = fp, 
                                                            lon_label = 'center_lon', 
                                                            lat_label = 'center_lat', 
                                                            crs = crs))
if shp:
    validation_pts = gpd.read_file(fp)
validation_pts.head(3)

Unnamed: 0,r,g,b,nir,lidar,max_lidar,min_lidar,avg_lidar,year,month,day_in_yea,naip_id,pl_which_r,email,analysis_d,low_confid,map_class,ref_class,geometry
0,114,135,144,36,-9999,-9999,-9999,-2717.222168,2020,6,159,ca_m_3412037_ne_10_060_20200607,0,galaz-garcia@nceas.ucsb.edu,9.3 secs,0.0,3,3,POINT (-120.37323 34.45406)
1,91,98,81,154,2,4,0,2.111111,2020,5,142,ca_m_3411936_sw_11_060_20200521,2,galaz-garcia@nceas.ucsb.edu,5.9 secs,0.0,1,0,POINT (-119.58537 34.42428)
2,109,114,104,131,1,3,0,1.111111,2020,5,143,ca_m_3412039_ne_10_060_20200522,0,galaz-garcia@nceas.ucsb.edu,222.9 secs,0.0,0,0,POINT (-120.16952 34.47180)
3,103,119,109,159,2,3,1,1.777778,2020,5,143,ca_m_3412040_ne_10_060_20200522,0,galaz-garcia@nceas.ucsb.edu,155.6 secs,0.0,1,0,POINT (-120.00898 34.46674)
4,123,130,107,155,0,2,0,0.555556,2020,5,143,ca_m_3411933_nw_11_060_20200522,2,galaz-garcia@nceas.ucsb.edu,155.7 secs,100.0,0,0,POINT (-119.97703 34.45705)


In [4]:
pts = validation_pts
if CEO:
    pts = validation_pts.loc[:,'pl_scene':'geometry' ]
    pts = pts[pts[confidence_col] == 100]
    if iceplant == 1:
        pts = pts[pts[iceplant_col] == 100]
    elif iceplant == 0:
        pts = pts[pts[iceplant_col] == 0]

In [6]:
# fp = os.path.join(os.getcwd(), 'temp','capitan_iceplant_pts.csv')
# crs = CRS.from_epsg(26910)
# pts = sr.geodataframe_from_csv(fp = fp, 
#                          lon_label = 'x', 
#                          lat_label = 'y', 
#                          crs = crs)
# iceplant = 1
# itemid_col = 'naip_id'
# csv_name = 'cap_ice_pts'
# pts = pts.drop(['Unnamed: 0'],axis=1)

In [7]:
scene_ids = ['ca_m_3412037_nw_10_060_20200607',
             'ca_m_3412039_nw_10_060_20200522',
             'ca_m_3412040_ne_10_060_20200522',
             'ca_m_3411934_sw_11_060_20200521',
             'ca_m_3411936_se_11_060_20200521']


aois = { scene_ids[0] : 'point_conception',
          scene_ids[1] : 'gaviota',
          scene_ids[2] : 'capitan',
          scene_ids[3] : 'campus_lagoon',
          scene_ids[4] : 'carpinteria',         
        }

In [9]:
scene_ids = pd.read_csv('/home/jovyan/msai4earth-esa/iceplant_detection/separating_naip_flights/coastal_scenes_ids.csv')
scene_ids = scene_ids[scene_ids.year == 2020].reset_index().itemid

# ---------------------------------------------------
# temporary folder for aux rasters
folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
if os.path.exists(folp) == False:
    os.mkdir(folp)

In [10]:
# ---------------------------------------------------
sampled_points = []
for itemid in scene_ids:

    to_sample = pts[pts[itemid_col] == itemid].geometry
    
    if len(to_sample) > 0:
        # ---------------------------------------------------        
        # sample spectral bands from NAIP
        item = sr.get_item_from_id(itemid)
        scene_rast_r = sr.get_raster_from_item(item)     

        band_names = ['r', 'g', 'b', 'nir']
        spectral_bands = sr.sample_raster_from_pts(to_sample, scene_rast_r, band_names).set_index(to_sample.index)

        # ---------------------------------------------------        
        # sample max, min, avg and entrs from NAIP
        to_sample_match = to_sample.to_crs(scene_rast_r.crs)
        scene_rast = rioxr.open_rasterio(pc.sign(item.assets["image"].href)) 

        band_names.append('ndvi')
        tags = ['_max', '_min', '_avg', '_entr']
        window_cols = [band+tag for band in band_names for tag in tags]        

        window_features = []
        for i in range(len(to_sample_match)):
            pt = to_sample_match.iloc[[i]]

            # clip scene to box around point
            reduce_box = box(*(pt.iloc[0].buffer(6).bounds)) 
            rast = scene_rast.rio.clip_box(*reduce_box.bounds)

            # save auxiliary rasters for R,G,B,NIR: max,min,avg,entr
            for i in range(4):
                sr.max_min_avg_rasters(raster=rast, band=i+1, rast_name=band_names[i], n=3, folder_path=folp)
                sr.entropy_raster(raster=rast, band=i+1, rast_name=band_names[i], n=3, folder_path=folp)
            # ------------------------------
            # make auxiliary NDVI of clipped scene
            ndvi = sr.ndvi_xarray(rast)

            # save auxiliary NDVI rasters: max,min,avg
            sr.max_min_avg_rasters(rast_data=ndvi, 
                                   crs=rast.rio.crs, 
                                   transf=rast.rio.transform(), 
                                   rast_name=band_names[4], 
                                   n=3, 
                                   folder_path=folp)

            # adjust ndvi to entropy input types
            ndvi = ndvi*100 +100
            sr.entropy_raster(rast_data=ndvi.astype('uint8'), 
                              crs=rast.rio.crs, transf=rast.rio.transform(), 
                              rast_name=band_names[4], 
                              n=3, 
                              folder_path=folp)
            # ---------------------------------------
            # sample raster values for current point
            samples = []
            for col_name in window_cols:
                fp = os.path.join(folp, col_name+'s.tif')
                aux_rast_r = rio.open(fp)
                sample = sr.sample_raster_from_pts(pt, aux_rast_r, [col_name])    
                os.remove(fp)
                samples.append(sample)       

            # ---------------------------------------
            # Add all derived spectral data to pts dataframe
            window_features.append(pd.concat(samples, axis = 1)) 
        # ---------------------------------------------------                    
        # ---------------------------------------------------
        # concatenate sampled data
        window_features = pd.concat(window_features).set_index(to_sample.index)
        #lidar_bands = pd.concat(canopy_h_samples, axis=1).set_index(to_sample.index)
        df = pd.concat([to_sample, spectral_bands, window_features], axis=1)
        # ---------------------------------------------------
        # add date and naipid information
        kwargs = {'year' : item.datetime.year,
                  'month' : item.datetime.month,
                  'day_in_year' : sr.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year),
                  'naip_id' : itemid,
               #   'aoi' : aois[itemid],
                  'iceplant' : iceplant}
        df = df.assign(**kwargs)
        # ---------------------------------------------------
        sampled_points.append(gpd.GeoDataFrame(df))


ca_m_3412037_nw_10_060_20200607
ca_m_3412037_ne_10_060_20200607
ca_m_3412029_sw_10_060_20200607
ca_m_3412003_ne_10_060_20200607
ca_m_3412038_nw_10_060_20200523
ca_m_3412040_nw_10_060_20200522
ca_m_3412040_ne_10_060_20200522
ca_m_3412039_nw_10_060_20200522
ca_m_3412039_ne_10_060_20200522
ca_m_3412038_ne_10_060_20200522
ca_m_3411933_nw_11_060_20200522
ca_m_3411933_ne_11_060_20200522
ca_m_3411937_sw_11_060_20200521
ca_m_3411936_sw_11_060_20200521
ca_m_3411936_se_11_060_20200521
ca_m_3411935_sw_11_060_20200521
ca_m_3411935_se_11_060_20200521
ca_m_3411934_sw_11_060_20200521
ca_m_3411934_se_11_060_20200521
ca_m_3411933_sw_11_060_20200521
ca_m_3411933_se_11_060_20200521


In [11]:
samples = pd.concat(sampled_points).sort_index()
# ---------------------------------------
# create max-min difference columns
for band in band_names:
    col_name = band + '_diff'
    samples[col_name] = samples[band +'_max'] - samples[band +'_min']

samples = samples.assign( x = lambda df : df.geometry.x, 
                          y = lambda df : df.geometry.y,
                         pts_crs = crs.to_string())

# make this with assign
samples['ndvi'] = (samples.nir.astype('int16') - samples.r.astype('int16'))/(samples.nir.astype('int16') + samples.r.astype('int16'))

samples = samples[['x', 'y', 'pts_crs',
                   # 'aoi',
                   'naip_id',
                    'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr', # spectral
                    'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
                    'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr',
                    'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr',
                    'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr',                     
                    'year', 'month', 'day_in_year',
                    'iceplant']]
samples

Unnamed: 0,x,y,pts_crs,naip_id,r,r_max,r_min,r_diff,r_avg,r_entr,...,ndvi,ndvi_max,ndvi_min,ndvi_diff,ndvi_avg,ndvi_entr,year,month,day_in_year,iceplant
0,-120.373228,34.454063,EPSG:4326,ca_m_3412037_ne_10_060_20200607,114,145,83,62,111.444443,4.789015,...,-0.520000,-0.455696,-0.548387,0.092691,-0.444444,3.305583,2020,6,159,0
1,-119.585374,34.424282,EPSG:4326,ca_m_3411936_sw_11_060_20200521,91,97,77,20,87.333336,4.280226,...,0.257143,0.366255,0.224490,0.141765,0.222222,4.047299,2020,5,142,0
2,-120.169518,34.471802,EPSG:4326,ca_m_3412039_ne_10_060_20200522,109,112,107,5,109.444443,3.659537,...,0.091667,0.154150,0.075000,0.079150,0.000000,3.607475,2020,5,143,0
3,-120.008978,34.466743,EPSG:4326,ca_m_3412040_ne_10_060_20200522,103,116,101,15,108.888885,3.840403,...,0.213740,0.221374,0.171642,0.049732,0.111111,3.326727,2020,5,143,0
4,-119.977029,34.457047,EPSG:4326,ca_m_3411933_nw_11_060_20200522,123,142,116,26,129.444443,4.349192,...,0.115108,0.150183,0.003509,0.146674,0.000000,3.948859,2020,5,143,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,-120.346122,34.460730,EPSG:4326,ca_m_3412038_nw_10_060_20200523,23,51,19,32,29.222221,4.086791,...,0.148148,0.311828,0.037037,0.274791,0.111111,4.444188,2020,5,144,0
461,-120.142532,34.477972,EPSG:4326,ca_m_3412039_ne_10_060_20200522,191,191,184,7,188.666672,3.646967,...,-0.024129,-0.013774,-0.029650,0.015875,0.000000,1.453073,2020,5,143,0
462,-119.850584,34.408292,EPSG:4326,ca_m_3411934_sw_11_060_20200521,68,72,52,20,64.111115,4.254196,...,0.361502,0.458937,0.186047,0.272891,0.222222,4.017825,2020,5,142,0
463,-120.273931,34.475020,EPSG:4326,ca_m_3412038_ne_10_060_20200522,61,82,61,21,66.555557,4.280226,...,0.024000,0.073529,-0.086957,0.160486,0.000000,4.021268,2020,5,143,0


In [13]:
samples['LSWE_class'] = pts.map_class
samples['ref_class'] = pts.ref_class
samples = samples.drop(['iceplant'], axis=1)

In [20]:
samples.to_csv(os.path.join(os.getcwd(), 'temp', csv_name+'.csv'), index=False) 