In [1]:
import os
import time
import pandas as pd
import numpy as np

import rasterio

from skimage.filters.rank import entropy

from rasterio.crs import CRS

import sample_rasters as sr

In [2]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(os.path.join(os.getcwd(),'temp', 'aoi_naip_itemids.csv')).itemid

csv_name = 'spectral_window_test_set.csv'
df = pd.read_csv(os.path.join(os.getcwd(), csv_name))

# ***************************************************
# ***************************************************

In [3]:
pts_list = []

for itemid in itemids:
    print('PROCESSING: ', itemid)
    # ***************************************************
    # ------------------------------
    # Open NAIP scene and calculate auxiliary spectral rasters
    # locate raster
    naip_rast_r = sr.get_raster_from_item(sr.get_item_from_id(itemid))
    
    # temporary folder for aux rasters
    folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
    if os.path.exists(folp) == False:
        os.mkdir(folp)
        
    # ------------------------------
    t0 = time.time()
    # make auxiliary R,G,B,NIR spectral rasters from NAIP scene
    band_names = ['r_', 'g_', 'b_', 'nir_']
    tags = ['_entrs']
    window_fps = []
    window_cols = []

    for name, band in zip(band_names,range(1,5)):
        rast_name = name+itemid
        sr.entropy_raster(rast_reader = naip_rast_r, band=band, rast_name=rast_name, n=3, folder_path=folp)
        
        for tag in tags:
            window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
            window_cols.append( name.replace('_','')+tag.replace('s',''))

    # ------------------------------    
    print('CREATED R,G,B,NIR ENTROPY RASTERS (sec): ',time.time() -t0)

    # ------------------------------
    # make auxiliary NDVI from NAIP scene
    t0 = time.time()
    red_band = naip_rast_r.read([1]).squeeze().astype('int16') 
    nir_band = naip_rast_r.read([4]).squeeze().astype('int16')
    ndvi = ((nir_band - red_band) / (nir_band + red_band)*100)+100

    band_names.append('ndvi_')
    rast_name = 'ndvi_'+itemid
    sr.entropy_raster(rast_data=ndvi.astype('uint8'), crs=naip_rast_r.crs, transf=naip_rast_r.transform, rast_name=rast_name, n=3, folder_path=folp)
    
    for tag in tags:
        window_fps.append(os.path.join(folp, rast_name + tag + '.tif'))        
        window_cols.append( 'ndvi'+tag.replace('s',''))
        
    # ------------------------------    
    print('CREATED NDVI ENTROPY RASTER (sec): ',time.time() -t0)

    # ***************************************************
    # Find points in test and train sets with this itemid
    pts = df.loc[df['naip_id'] == itemid]

    # ------------------------------
    # Convert df to geopandas
    crs = CRS.from_string(pts.pts_crs.iloc[0])
    pts = sr.geodataframe_from_csv(df=pts, lon_label='x', lat_label='y', crs=crs)
    pts_col = pts.to_crs(naip_rast_r.crs).geometry

    # ***************************************************
    ## Sample canopy_height at point, and max, min and avg canopy height around point
    t0 = time.time()
    samples = []
    for fp, col_name in zip(window_fps, window_cols):
        rast_r = rasterio.open(fp)
        sample = sr.sample_raster_from_pts(pts_col, rast_r, [col_name])    
        samples.append(sample)

    print('SAMPLED RASTERS (sec): ',time.time() - t0)


    # ------------------------------
    ## Add all derived spectral data to pts dataframe
    new_features = pd.concat(samples, axis = 1)
    pts = pd.concat([pts, new_features.set_index(pts.index)], axis=1)    

    # ***************************************************
    # Clean dataframe
    #pts.drop(['geometry'],axis=1, inplace=True) # remove geometry column (already have lat,lon and CRS)
    pts = pts[['x', 'y', 'pts_crs', #  point location
             'aoi', 'naip_id', 'polygon_id',  # sampling info
             'r', 'r_max', 'r_min', 'r_diff', 'r_avg', 'r_entr',
             'g', 'g_max', 'g_min', 'g_diff', 'g_avg', 'g_entr',
             'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'b_entr',
             'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg', 'nir_entr',
             'ndvi', 'ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg', 'ndvi_entr',   # spectral
             'year', 'month', 'day_in_year', # date
             'lidar', 'max_lidar', 'min_lidar', 'min_max_diff', 'avg_lidar', # lidar
             'iceplant'
             ]] 
    pts_list.append(pts)
    print('finished scene')

    # ***************************************************
    # Delete auxiliary NAIP rasters created for this scene
    for fp in window_fps:
        os.remove(fp)


# ***************************************************
# ***************************************************
all_pts = pd.concat(pts_list, axis =0)

PROCESSING:  ca_m_3412037_nw_10_060_20200607
CREATED R,G,B,NIR ENTROPY RASTERS (sec):  566.315943479538
CREATED NDVI ENTROPY RASTER (sec):  114.39288663864136


  arr = construct_1d_object_array_from_listlike(values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geometry'] = xy


SAMPLED RASTERS (sec):  19.473827123641968


KeyError: "['ndvi_max', 'ndvi_min', 'ndvi_diff', 'ndvi_avg'] not in index"

In [5]:
pts.columns

Index(['x', 'y', 'pts_crs', 'aoi', 'naip_id', 'polygon_id', 'r', 'r_max',
       'r_min', 'r_diff', 'r_avg', 'g', 'g_max', 'g_min', 'g_diff', 'g_avg',
       'b', 'b_max', 'b_min', 'b_diff', 'b_avg', 'nir', 'nir_max', 'nir_min',
       'nir_diff', 'nir_avg', 'ndvi', 'year', 'month', 'day_in_year', 'lidar',
       'max_lidar', 'min_lidar', 'min_max_diff', 'avg_lidar', 'iceplant',
       'geometry', 'r_entr', 'g_entr', 'b_entr', 'nir_entr', 'ndvi_entr'],
      dtype='object')