In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import geopandas as gpd

import data_sampling_workflow.sample_rasters as sr

from skimpy import clean_columns

from rasterio.crs import CRS

import rasterio

In [2]:
year = 2020

In [3]:
# convert reference class columns in df from CollectEarth 
# into a single ref_class column with values from 0 to 3
def ref_class_column(df):

    map_class = df.pl_class
    ref_class = []

    for i in map_class.index:
        if df.category_non_iceplant_vegetation.loc[i] == 100:
            ref_class.append(0)
        elif df.category_iceplant.loc[i] == 100:
            ref_class.append(1)
        elif df.category_low_ndvi_impervious_surface.loc[i] == 100:
            ref_class.append(2)
        elif df.category_water.loc[i] == 100:
            ref_class.append(3)
        else:
            ref_class[j]= 100
            
    return ref_class

In [4]:
# ---------------------------------------------------
# load validation points
fp = os.path.join(os.getcwd(), 'map_validation', 'validation_results_'+str(year)+'.csv' )
validation_pts = clean_columns(sr.geodataframe_from_csv(fp, 'center_lon', 'center_lat', CRS.from_epsg(4326)))

# clean data frame 
validation_pts['ref_class'] = ref_class_column(validation_pts)
validation_pts = validation_pts.drop(['center_lon', 'center_lat', 'shape', 'size_m', 'sample_points',
        'flagged', 'flagged_reason',  
         'total_securewatch_dates', 'common_securewatch_date', 
         'validation_finished_yes_high_confidence',
         'collection_time',
         'category_low_ndvi_impervious_surface','category_non_iceplant_vegetation',
         'category_iceplant', 'category_water', 'plotid'], axis =1)
validation_pts = validation_pts.rename( columns = {'pl_class':'map_class',
                   'validation_finished_no_low_confidence':'low_confidence'})

# ---------------------------------------------------
# load NAIP scenes' footprints
footprints = gpd.read_file(os.path.join(os.getcwd(), 
                                        'separating_naip_flights',
                                        'naip_scenes_footprints',
                                        'naip_scenes_footprints.shp'))
footprints = footprints[footprints.year == str(year)]
footprints.reset_index(inplace=True, drop = True)

# ---------------------------------------------------
# Open canopy height raster and create auxiliary min, max, and avg rasters
lidar_rast_r = rasterio.open(sr.path_to_lidar(year))
rast_name = 'SB_canopy_height_'+str(year) # give a name to read raster

sr.min_raster(rast_reader = lidar_rast_r, rast_name = rast_name, n=3)
sr.max_raster(rast_reader = lidar_rast_r, rast_name = rast_name, n=3)
sr.avg_raster(rast_reader = lidar_rast_r, rast_name = rast_name, n=3)

# file paths to auxiliary canopy height rasters
lidar_fps = []
for tag in ['_maxs', '_mins', '_avgs']:
    lidar_fps.append(os.path.join(os.getcwd(),
                                 'temp',
                                 rast_name+tag+'.tif'))

# ---------------------------------------------------
# ---------------------------------------------------
# make dictionary with points that need to be sampled from each scene
unchecked = list(validation_pts.index)
samples = {key: [] for key in footprints.id}

for itemid in footprints.id:
    box = list(footprints[footprints.id == itemid].geometry)[0]
    
    to_remove = []
    for i in unchecked:
        point = validation_pts.iloc[i].geometry
        if box.contains(point) == True:
            samples[itemid].append(i)
            to_remove.append(i)
            
    for i in to_remove:
        unchecked.remove(i)

  arr = construct_1d_object_array_from_listlike(values)


In [5]:

sampled_points = []
for itemid in samples.keys():
    
    if len(samples[itemid]) != 0:

        # ---------------------------------------------------        
        # sample spectral bands from NAIP
        item = sr.get_item_from_id(itemid)
        rast_reader = sr.get_raster_from_item(item)     

        to_sample = validation_pts.iloc[samples[itemid]].geometry
        to_sample_match = to_sample.to_crs(rast_reader.crs)

        rast_band_names = ['r', 'g', 'b', 'nir']
        spectral_bands = sr.sample_raster_from_pts(to_sample_match, rast_reader, rast_band_names).set_index(to_sample.index)

        # ---------------------------------------------------
        ## Sample canopy_height at point, and max, min and avg canopy height around point        
        to_sample_match = to_sample.to_crs(lidar_rast_r.crs)

        lidar_samples = sr.sample_raster_from_pts(to_sample_match, lidar_rast_r, ['lidar'])

        maxs_rast_r = rasterio.open(lidar_fps[0])
        max_samples = sr.sample_raster_from_pts(to_sample_match, maxs_rast_r, ['max_lidar'])

        mins_rast_r = rasterio.open(lidar_fps[1])
        min_samples = sr.sample_raster_from_pts(to_sample_match, mins_rast_r, ['min_lidar'])

        avg_rast_r = rasterio.open(lidar_fps[2])
        avg_samples = sr.sample_raster_from_pts(to_sample_match, avg_rast_r, ['avg_lidar'])
        
        # ---------------------------------------------------
        # concatenate sampled data
        lidar_bands = pd.concat([lidar_samples, max_samples, min_samples, avg_samples], axis=1).set_index(to_sample.index)
        df = pd.concat([to_sample, spectral_bands, lidar_bands], axis=1)
        
        # ---------------------------------------------------
        # add date and naipid information
        df['year'] = item.datetime.year 
        df['month'] = item.datetime.month
        df['day_in_year'] = sr.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year )
        df['naip_id'] = itemid   
        
        # ---------------------------------------------------
        sampled_points.append(gpd.GeoDataFrame(df))

In [6]:
samples = pd.concat(sampled_points).sort_index()
samples = pd.concat([samples, validation_pts.drop(['geometry'], axis=1)], axis=1)

samples = samples[['geometry', 
 'r', 'g', 'b', 'nir', 
 'lidar', 'max_lidar', 'min_lidar', 'avg_lidar', 
 'year', 'month', 'day_in_year', 
 'naip_id', 'pl_which_raster',
 'email', 'analysis_duration','low_confidence',
  'map_class', 'ref_class']]

samples

Unnamed: 0,geometry,r,g,b,nir,lidar,max_lidar,min_lidar,avg_lidar,year,month,day_in_year,naip_id,pl_which_raster,email,analysis_duration,low_confidence,map_class,ref_class
0,POINT (-120.37323 34.45406),114,135,144,36,-9999,-9999,-9999,-2717.222168,2020,6,159,ca_m_3412037_ne_10_060_20200607,0,galaz-garcia@nceas.ucsb.edu,9.3 secs,0.0,3,3
1,POINT (-119.58537 34.42428),91,98,81,154,2,4,0,2.111111,2020,5,142,ca_m_3411936_sw_11_060_20200521,2,galaz-garcia@nceas.ucsb.edu,5.9 secs,0.0,1,0
2,POINT (-120.16952 34.47180),109,114,104,131,1,3,0,1.111111,2020,5,143,ca_m_3412039_ne_10_060_20200522,0,galaz-garcia@nceas.ucsb.edu,222.9 secs,0.0,0,0
3,POINT (-120.00898 34.46674),103,119,109,159,2,3,1,1.777778,2020,5,143,ca_m_3412040_ne_10_060_20200522,0,galaz-garcia@nceas.ucsb.edu,155.6 secs,0.0,1,0
4,POINT (-119.97703 34.45705),123,130,107,155,0,2,0,0.555556,2020,5,143,ca_m_3411933_nw_11_060_20200522,2,galaz-garcia@nceas.ucsb.edu,155.7 secs,100.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,POINT (-120.34612 34.46073),23,33,48,31,4,5,1,3.555556,2020,5,144,ca_m_3412038_nw_10_060_20200523,0,galaz-garcia@nceas.ucsb.edu,6.3 secs,0.0,0,0
461,POINT (-120.14253 34.47797),191,179,159,182,0,0,0,0.000000,2020,5,143,ca_m_3412039_ne_10_060_20200522,0,galaz-garcia@nceas.ucsb.edu,4.7 secs,0.0,2,2
462,POINT (-119.85058 34.40829),68,83,72,145,8,13,5,8.555555,2020,5,142,ca_m_3411934_sw_11_060_20200521,2,galaz-garcia@nceas.ucsb.edu,10.8 secs,0.0,0,0
463,POINT (-120.27393 34.47502),61,67,71,64,1,2,1,1.111111,2020,5,143,ca_m_3412038_ne_10_060_20200522,0,galaz-garcia@nceas.ucsb.edu,10.3 secs,0.0,2,0


In [7]:
folder = os.path.join(os.getcwd(), 
                      'validation_augmented_spectral_'+str(year))
if os.path.exists(folder) == False:
    os.mkdir(folder)

samples.to_file(os.path.join(folder, 'validation_augmented_spectral_'+str(year))+'.shp')

  samples.to_file(os.path.join(folder, 'validation_results_spectral_'+str(year))+'.shp')
