In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import geopandas as gpd

import data_sampling_workflow.sample_rasters as sr

from skimpy import clean_columns

from rasterio.crs import CRS

In [2]:
def ref_class_column(df):

    map_class = df.pl_class
    ref_class = []

    for i in map_class.index:
        if df.category_non_iceplant_vegetation.loc[i] == 100:
            ref_class.append(0)
        elif df.category_iceplant.loc[i] == 100:
            ref_class.append(1)
        elif df.category_low_ndvi_impervious_surface.loc[i] == 100:
            ref_class.append(2)
        elif df.category_water.loc[i] == 100:
            ref_class.append(3)
        else:
            ref_class[j]= 100
            
    return ref_class

In [3]:
year = 2018

fp = os.path.join(os.getcwd(), 'map_validation', 'validation_results_'+str(year)+'.csv' )
validation_pts = clean_columns(sr.geodataframe_from_csv(fp, 'center_lon', 'center_lat', CRS.from_epsg(4326)))

  arr = construct_1d_object_array_from_listlike(values)


In [4]:
validation_pts['ref_class'] = ref_class_column(validation_pts)
validation_pts = validation_pts.drop(['center_lon', 'center_lat', 'shape', 'size_m', 'sample_points',
        'flagged', 'flagged_reason',  
         'total_securewatch_dates', 'common_securewatch_date', 
         'validation_finished_yes_high_confidence',
         'collection_time',
         'category_low_ndvi_impervious_surface','category_non_iceplant_vegetation',
         'category_iceplant', 'category_water', 'plotid'], axis =1)
validation_pts = validation_pts.rename( columns = {'pl_class':'map_class',
                   'validation_finished_no_low_confidence':'low_confidence'})

In [5]:
footprints = gpd.read_file(os.path.join(os.getcwd(), 
                                        'separating_naip_flights',
                                        'naip_scenes_footprints',
                                        'naip_scenes_footprints.shp'))
footprints = footprints[footprints.year == str(year)]
footprints.reset_index(inplace=True, drop = True)

In [6]:
unchecked = list(validation_pts.index)
samples = {key: [] for key in footprints.id}

for itemid in footprints.id:
    box = list(footprints[footprints.id == itemid].geometry)[0]
    
    to_remove = []
    for i in unchecked:
        point = validation_pts.iloc[i].geometry
        if box.contains(point) == True:
            samples[itemid].append(i)
            to_remove.append(i)
            
    for i in to_remove:
        unchecked.remove(i)

In [7]:
sampled_points = []
for itemid in samples.keys():
    
    if len(samples[itemid]) != 0:

        item = sr.get_item_from_id(itemid)
        rast_reader = sr.get_raster_from_item(item)     

        to_sample = validation_pts.iloc[samples[itemid]].geometry
        to_sample_match = to_sample.to_crs(rast_reader.crs)

        rast_band_names = ['r', 'g', 'b', 'nir']
        spectral_bands = sr.sample_raster_from_pts(to_sample_match, rast_reader, rast_band_names).set_index(to_sample.index)

        df = pd.concat([to_sample, spectral_bands], axis=1)
        df['year'] = item.datetime.year 
        df['month'] = item.datetime.month
        df['day_in_year'] = sr.day_in_year(item.datetime.day, item.datetime.month, item.datetime.year )
        df['naip_id'] = itemid   

        sampled_points.append(gpd.GeoDataFrame(df))

In [9]:
samples = pd.concat(sampled_points).sort_index()
samples = pd.concat([samples, validation_pts.drop(['geometry'], axis=1)], axis=1)
samples

Unnamed: 0,geometry,r,g,b,nir,year,month,day_in_year,naip_id,email,analysis_duration,map_class,pl_which_raster,low_confidence,ref_class
0,POINT (-119.74256 34.40704),97,99,90,159,2018,7,205,ca_m_3411935_sw_11_060_20180724_20190209,brun@nceas.ucsb.edu,22.6 secs,0,2,0.0,0
1,POINT (-119.50527 34.38426),76,95,132,44,2018,7,205,ca_m_3411936_se_11_060_20180724_20190209,brun@nceas.ucsb.edu,6.6 secs,3,2,0.0,3
2,POINT (-119.63961 34.41325),125,128,133,50,2018,7,205,ca_m_3411935_se_11_060_20180724_20190209,brun@nceas.ucsb.edu,8.8 secs,3,2,0.0,3
3,POINT (-119.86679 34.40905),63,81,86,17,2018,7,203,ca_m_3411934_sw_11_060_20180722_20190209,brun@nceas.ucsb.edu,8.6 secs,3,2,0.0,3
4,POINT (-120.48848 34.49597),124,109,93,122,2018,9,256,ca_m_3412037_nw_10_060_20180913_20190208,brun@nceas.ucsb.edu,118.2 secs,2,0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,POINT (-119.74973 34.40378),145,148,140,84,2018,7,205,ca_m_3411935_sw_11_060_20180724_20190209,brun@nceas.ucsb.edu,10.5 secs,2,2,100.0,3
446,POINT (-120.04774 34.46608),158,143,118,162,2018,7,196,ca_m_3412040_ne_10_060_20180715_20190209,brun@nceas.ucsb.edu,8.9 secs,2,0,0.0,2
447,POINT (-120.47107 34.46718),66,66,62,83,2018,9,256,ca_m_3412037_nw_10_060_20180913_20190208,brun@nceas.ucsb.edu,101.9 secs,0,0,0.0,0
448,POINT (-119.64451 34.41721),73,84,91,143,2018,7,205,ca_m_3411935_se_11_060_20180724_20190209,brun@nceas.ucsb.edu,97.1 secs,1,2,0.0,0


In [13]:
folder = os.path.join(os.getcwd(), 
                      'data_sampling_workflow', 
                      'validation_results_spectral_'+str(year))
if os.path.exists(folder) == False:
    os.mkdir(folder)

samples.to_file(os.path.join(folder, 'validation_results_spectral_'+str(year))+'.shp')

  samples.to_file(os.path.join(folder, 'validation_results_spectral_'+str(year))+'.shp')
