In [1]:
import os
import pandas as pd
import numpy as np
import time

import rasterio
import sample_rasters as sr
import planetary_computer as pc
import geopandas as gpd

from scipy.ndimage import maximum_filter as maxf2D
from scipy.ndimage import minimum_filter as minf2D
from scipy.ndimage import convolve as conf2D

from shapely.geometry import Point
from rasterio.crs import CRS

In [2]:
# *********************************************************************

def min_raster(rast_reader, band, rast_name, n, folder_path=''):  
    """
        Creates a new raster by replacing each pixel p in given raster R by the minimum value in a nxn window centered at p.
        The raster with minimum values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the minimum values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_maxs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute minimum values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values
    mins = minf2D(rast, size=(n,n))    # calculate min in window
    
    if not folder_path:                         # if needed, create temp directory to save files 
        folder_path = make_directory('temp')
    
    dtype = rasterio.dtypes.get_minimum_dtype(mins)  # parameters for saving
    
    fp = os.path.join(folder_path, rast_name +'_mins.tif')      # save raster
    sr.save_raster(mins, 
                fp, 
                rast.shape,
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

# ------------------------------------------------------------------------------

def max_raster(rast_reader, band, rast_name, n, folder_path=''):  
    """
        Creates a new raster by replacing each pixel p in given raster R by the max value in a nxn window centered at p.
        The raster with maximum values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the maximum values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_maxs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute maximum values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values
    maxs = maxf2D(rast, size=(n,n))    # calculate min in window
    
    if not folder_path:                         # if needed, create temp directory to save files 
        folder_path = make_directory('temp')
    
    dtype = rasterio.dtypes.get_minimum_dtype(maxs)  # parameters for saving
    
    fp = os.path.join(folder_path, rast_name +'_maxs.tif')      # save raster
    sr.save_raster(maxs, 
                fp, 
                rast.shape,
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

# ------------------------------------------------------------------------------

def avg_raster(rast_reader, band, rast_name, n, folder_path=''): 
    """
        Creates a new raster by replacing each pixel p in given raster R by the avg value in a nxn window centered at p.
        The raster with averege values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the average values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_avgs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute average values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values

    w = np.ones(n*n).reshape(n,n)      # calculate averages in window
    avgs = conf2D(rast, 
             weights=w,
             mode='constant')
    avgs = avgs/(n*n)
    
    # if needed, create temp directory to save files 
    if not folder_path:  
        folder_path = make_directory('temp')
            
    # parameters for saving   
    fp = os.path.join(folder_path, rast_name +'_avgs.tif')                
    dtype = rasterio.dtypes.get_minimum_dtype(avgs)
            
    sr.save_raster(avgs,    # save rasters
                fp, 
                rast.shape, 
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

In [3]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(os.path.join(os.getcwd(),'temp', 'aoi_naip_itemids.csv')).itemid
#itemids = ['ca_m_3411934_sw_11_.6_20160713_20161004']
# open csv with all itemids

In [4]:
df = pd.read_csv(os.path.join(os.getcwd(), 'test_set.csv'))
# add train set

# ***************************************************
# ***************************************************

In [5]:
t0 = time.time()

In [6]:
pts_list = []

for itemid in itemids:
    print('********************************************')
    print('PROCESSING: ', itemid)
    # ------------------------------
    # Filter points in test set and train set with this itemid
    pts = df.loc[df['naip_id'] == itemid]

    # ------------------------------
    # Open NAIP scene and calculate auxiliary spectral rasters
    item = sr.get_item_from_id(itemid)    # locate raster
    href = pc.sign(item.assets["image"].href)
    lidar_rast_r = rasterio.open(href)

    folp = os.path.join(os.getcwd(),'temp')

    rast_name = 'r_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=1, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = lidar_rast_r, band=1,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = lidar_rast_r, band=1, rast_name = rast_name, n=3, folder_path=folp)

    rast_name = 'g_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=2, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = lidar_rast_r, band=2,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = lidar_rast_r, band=2, rast_name = rast_name, n=3, folder_path=folp)

    rast_name = 'b_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=3, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = lidar_rast_r, band=3,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = lidar_rast_r, band=3, rast_name = rast_name, n=3, folder_path=folp)

    rast_name = 'nir_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=4, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = lidar_rast_r, band=4,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = lidar_rast_r, band=4, rast_name = rast_name, n=3, folder_path=folp)

    # ------------------------------
    # Convert csv to geopandas

    ## Get points information from csv
    crs = CRS.from_string(pts.pts_crs.iloc[0])
    #TO DO: change geodataframe_from_csv to use pandas df and not open file there
    #    pts = sr.geodataframe_from_csv(pts_fp, 'x','y',crs)
    if 'geometry' in pts.columns:           # rename geometry column if it exists
            pts = pts.rename(columns = {'geometry': 'geometry_0'})

    # recreate geometry column as shapely Points
    xy = []
    for x,y in zip(pts['x'], pts['y']):
        xy.append(Point(x,y))

    pts_col = gpd.GeoDataFrame(pd.DataFrame(xy, columns=['geometry']), crs=crs)
    pts_col = pts_col.to_crs(lidar_rast_r.crs).geometry

    # ------------------------------
    ## Sample canopy_height at point, and max, min and avg canopy height around point

    samples = []
    for band in ['r_','g_','b_','nir_']:
        for tag in ['_maxs', '_mins', '_avgs']:

            fp = os.path.join(os.getcwd(), 'temp', band + itemid + tag + '.tif')
            col_name = band.replace('_', '') + tag.replace('s','')

            rast_r = rasterio.open(fp)
            sample = sr.sample_raster_from_pts(pts_col, rast_r, [col_name])

            samples.append(sample)

    new_features = pd.concat(samples, axis = 1)

    # ------------------------------
    ## Add all derived spectral data to pts dataframe

    pts = pd.concat([pts, new_features], axis=1)
    pts

    for band in ['r_','g_','b_','nir_']:
        col_name = band + 'diff'
        pts[col_name] = pts[band +'max'] - pts[band +'min']

    # ------------------------------    
    # Clean dataframe
    #pts.drop(['geometry'],axis=1, inplace=True) # remove geometry column (already have lat,lon and CRS)
    pts = pts[['x', 'y', 'pts_crs', #  point location
             'aoi', 'naip_id', 'polygon_id',  # sampling info
             'r', 'r_max', 'r_min', 'r_diff', 'r_avg',
             'g', 'g_max', 'g_min', 'g_diff', 'g_avg',
             'b', 'b_max', 'b_min', 'b_diff', 'b_avg',
             'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg',
             'ndvi',     # spectral
             'year', 'month', 'day_in_year', # date
             'lidar', 'max_lidar', 'min_lidar', 'min_max_diff', 'avg_lidar', # lidar
             'iceplant'
             ]] 

    # ------------------------------
    # Delete auxiliary LIDAR rasters created for this year
    for band in ['r_','g_','b_','nir_']:
        for tag in ['_maxs', '_mins', '_avgs']:
            fp = os.path.join(os.getcwd(), 'temp', band + itemid + tag + '.tif')   
            os.remove(fp)

    pts_list.append(pts)

all_pts = pd.concat(pts_list, axis =0)
print(time.time() -t0)

********************************************
PROCESSING:  ca_m_3412037_nw_10_060_20200607
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412037_nw_10_060_20180913_20190208
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412037_nw_10_1_20140603_20141030
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412037_nw_10_1_20120518_20120730
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412039_nw_10_060_20200522
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412039_nw_10_060_20180724_20190209
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412039_nw_10_.6_20160616_20161004
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412039_nw_10_1_20140603_20141030
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3412039_nw_10_1_20120518_20120730
filtered pts
finished R aux rasters
finished G aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411934_sw_11_060_20200521
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411934_sw_11_060_20180722_20190209
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411934_sw_11_.6_20160713_20161004
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411934_sw_11_1_20140601_20141030
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411934_sw_11_1_20120505_20120730
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411936_se_11_060_20200521
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411936_se_11_060_20180724_20190209
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411936_se_11_.6_20160713_20161004
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411936_se_11_1_20140901_20141030
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


********************************************
PROCESSING:  ca_m_3411936_se_11_1_20120505_20120730
filtered pts
finished R aux rasters
finished G aux rasters
finished B aux rasters
finished NIR aux rasters


  arr = construct_1d_object_array_from_listlike(values)


958.6162986755371


In [7]:
# ------------------------------
## Save points with added spectral data
ptslidar_fp = os.path.join(os.getcwd(), 
                           'temp', 
                           aoi +'_pts_spectral_lidar_'+str(year)+'.csv')
pts.to_csv(all_pts, index=False)

# ------------------------------
## Delete original csv files (points without LIDAR)
if delete_pts == True:
    os.remove(pts_fp)

NameError: name 'aoi' is not defined

In [11]:
all_pts.r_max

111798      NaN
111799      NaN
111800      NaN
111801      NaN
111802      NaN
          ...  
6305      100.0
6306      146.0
6307       97.0
6308      141.0
6309       91.0
Name: r_max, Length: 311016, dtype: float64