In [1]:
import os
import pandas as pd
import numpy as np
import time

import rasterio
import sample_rasters as sr
import planetary_computer as pc
import geopandas as gpd

from scipy.ndimage import maximum_filter as maxf2D
from scipy.ndimage import minimum_filter as minf2D
from scipy.ndimage import convolve as conf2D

from shapely.geometry import Point
from rasterio.crs import CRS

In [2]:
# *********************************************************************

def min_raster(rast_reader, band, rast_name, n, folder_path=''):  
    """
        Creates a new raster by replacing each pixel p in given raster R by the minimum value in a nxn window centered at p.
        The raster with minimum values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the minimum values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_maxs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute minimum values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values
    mins = minf2D(rast, size=(n,n))    # calculate min in window
    
    if not folder_path:                         # if needed, create temp directory to save files 
        folder_path = make_directory('temp')
    
    dtype = rasterio.dtypes.get_minimum_dtype(mins)  # parameters for saving
    
    fp = os.path.join(folder_path, rast_name +'_mins.tif')      # save raster
    sr.save_raster(mins, 
                fp, 
                rast.shape,
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

# ------------------------------------------------------------------------------

def max_raster(rast_reader, band, rast_name, n, folder_path=''):  
    """
        Creates a new raster by replacing each pixel p in given raster R by the max value in a nxn window centered at p.
        The raster with maximum values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the maximum values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_maxs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute maximum values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values
    maxs = maxf2D(rast, size=(n,n))    # calculate min in window
    
    if not folder_path:                         # if needed, create temp directory to save files 
        folder_path = make_directory('temp')
    
    dtype = rasterio.dtypes.get_minimum_dtype(maxs)  # parameters for saving
    
    fp = os.path.join(folder_path, rast_name +'_maxs.tif')      # save raster
    sr.save_raster(maxs, 
                fp, 
                rast.shape,
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

# ------------------------------------------------------------------------------

def avg_raster(rast_reader, band, rast_name, n, folder_path=''): 
    """
        Creates a new raster by replacing each pixel p in given raster R by the avg value in a nxn window centered at p.
        The raster with averege values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the average values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_avgs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute average values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values

    w = np.ones(n*n).reshape(n,n)      # calculate averages in window
    avgs = conf2D(rast, 
             weights=w,
             mode='constant')
    avgs = avgs/(n*n)
    
    # if needed, create temp directory to save files 
    if not folder_path:  
        folder_path = make_directory('temp')
            
    # parameters for saving   
    fp = os.path.join(folder_path, rast_name +'_avgs.tif')                
    dtype = rasterio.dtypes.get_minimum_dtype(avgs)
            
    sr.save_raster(avgs,    # save rasters
                fp, 
                rast.shape, 
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

In [3]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = pd.read_csv(os.path.join(os.getcwd(),'temp', 'aoi_naip_itemids.csv')).itemid

csv_name = 'test_set.csv'
df = pd.read_csv(os.path.join(os.getcwd(), csv_name))

# ***************************************************
# ***************************************************

In [4]:
pts_list = []

for itemid in itemids:
    print('PROCESSING: ', itemid)
    # ***************************************************
    # ------------------------------
    # Open NAIP scene and calculate auxiliary spectral rasters
    item = sr.get_item_from_id(itemid)    # locate raster
    naip_rast_r = sr.get_raster_from_item(item)

    folp = os.path.join(os.getcwd(),'temp','aux_naip_rasters')
    if os.path.exists(folp) == False:
        os.mkdir(folp)

    t0 = time.time()
    rast_name = 'r_'+itemid 
    min_raster(rast_reader = naip_rast_r, band=1, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = naip_rast_r, band=1,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = naip_rast_r, band=1, rast_name = rast_name, n=3, folder_path=folp)

    rast_name = 'g_'+itemid 
    min_raster(rast_reader = naip_rast_r, band=2, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = naip_rast_r, band=2,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = naip_rast_r, band=2, rast_name = rast_name, n=3, folder_path=folp)

    rast_name = 'b_'+itemid 
    min_raster(rast_reader = naip_rast_r, band=3, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = naip_rast_r, band=3,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = naip_rast_r, band=3, rast_name = rast_name, n=3, folder_path=folp)

    rast_name = 'nir_'+itemid 
    min_raster(rast_reader = naip_rast_r, band=4, rast_name = rast_name, n=3, folder_path=folp)
    max_raster(rast_reader = naip_rast_r, band=4,  rast_name = rast_name, n=3, folder_path=folp)
    avg_raster(rast_reader = naip_rast_r, band=4, rast_name = rast_name, n=3, folder_path=folp)
    
    print('CREATED RASTERS (sec): ',time.time() -t0)

    # ***************************************************
    # Find points in test and train sets with this itemid
    pts = df.loc[df['naip_id'] == itemid]

    # ------------------------------
    # Convert df to geopandas

    ## Get points information from csv
    crs = CRS.from_string(pts.pts_crs.iloc[0])
    #TO DO: change geodataframe_from_csv to use pandas df and not open file there
    #    pts = sr.geodataframe_from_csv(pts_fp, 'x','y',crs)
    if 'geometry' in pts.columns:           # rename geometry column if it exists
            pts = pts.rename(columns = {'geometry': 'geometry_0'})

    # recreate geometry column as shapely Points
    xy = []
    for x,y in zip(pts['x'], pts['y']):
        xy.append(Point(x,y))

    pts_col = gpd.GeoDataFrame(pd.DataFrame(xy, columns=['geometry']), crs=crs)
    pts_col = pts_col.to_crs(naip_rast_r.crs).geometry


    # ***************************************************
    ## Sample canopy_height at point, and max, min and avg canopy height around point
    t0 = time.time()
    samples = []
    for band in ['r_','g_','b_','nir_']:
        for tag in ['_maxs', '_mins', '_avgs']:

            fp = os.path.join(folp,  band + itemid + tag + '.tif')
            col_name = band.replace('_', '') + tag.replace('s','')

            rast_r = rasterio.open(fp)
            sample = sr.sample_raster_from_pts(pts_col, rast_r, [col_name])
    
            samples.append(sample)

    print('SAMPLED RASTERS (sec): ',time.time() -t0)
    new_features = pd.concat(samples, axis = 1)
    
    # ------------------------------
    ## Add all derived spectral data to pts dataframe

    pts = pd.concat([pts, new_features.set_index(pts.index)], axis=1)    
    
    # ***************************************************
    # create difference in window columns
    for band in ['r_','g_','b_','nir_']:
        col_name = band + 'diff'
        pts[col_name] = pts[band +'max'] - pts[band +'min']

    # ***************************************************
    # Clean dataframe
    #pts.drop(['geometry'],axis=1, inplace=True) # remove geometry column (already have lat,lon and CRS)
    pts = pts[['x', 'y', 'pts_crs', #  point location
             'aoi', 'naip_id', 'polygon_id',  # sampling info
             'r', 'r_max', 'r_min', 'r_diff', 'r_avg',
             'g', 'g_max', 'g_min', 'g_diff', 'g_avg',
             'b', 'b_max', 'b_min', 'b_diff', 'b_avg',
             'nir', 'nir_max', 'nir_min', 'nir_diff', 'nir_avg',
             'ndvi',     # spectral
             'year', 'month', 'day_in_year', # date
             'lidar', 'max_lidar', 'min_lidar', 'min_max_diff', 'avg_lidar', # lidar
             'iceplant'
             ]] 

    # ***************************************************
    # Delete auxiliary NAIP rasters created for this scene
    for band in ['r_','g_','b_','nir_']:
        for tag in ['_maxs', '_mins', '_avgs']:
            fp = os.path.join(folp, band + itemid + tag + '.tif')   
            os.remove(fp)

    pts_list.append(pts)

    
all_pts = pd.concat(pts_list, axis =0)


PROCESSING:  ca_m_3412037_nw_10_060_20200607
CREATED RASTERS (sec):  66.56072163581848


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  45.24656414985657
PROCESSING:  ca_m_3412037_nw_10_060_20180913_20190208
CREATED RASTERS (sec):  58.3112006187439


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  21.044336080551147
PROCESSING:  ca_m_3412037_nw_10_1_20140603_20141030
CREATED RASTERS (sec):  22.75282645225525


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  28.578367233276367
PROCESSING:  ca_m_3412037_nw_10_1_20120518_20120730
CREATED RASTERS (sec):  22.31207013130188


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  20.55971050262451
PROCESSING:  ca_m_3412039_nw_10_060_20200522
CREATED RASTERS (sec):  62.47792387008667


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  12.514458894729614
PROCESSING:  ca_m_3412039_nw_10_060_20180724_20190209
CREATED RASTERS (sec):  56.60197186470032


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  12.36085295677185
PROCESSING:  ca_m_3412039_nw_10_.6_20160616_20161004
CREATED RASTERS (sec):  59.45795440673828


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  11.275569677352905
PROCESSING:  ca_m_3412039_nw_10_1_20140603_20141030
CREATED RASTERS (sec):  22.571632862091064


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  7.794757604598999
PROCESSING:  ca_m_3412039_nw_10_1_20120518_20120730
CREATED RASTERS (sec):  22.64870285987854


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  8.85718297958374
PROCESSING:  ca_m_3411934_sw_11_060_20200521
CREATED RASTERS (sec):  60.241153717041016


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  20.94252324104309
PROCESSING:  ca_m_3411934_sw_11_060_20180722_20190209
CREATED RASTERS (sec):  61.069207429885864


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  34.64697313308716
PROCESSING:  ca_m_3411934_sw_11_.6_20160713_20161004
CREATED RASTERS (sec):  58.82361912727356


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  15.606934309005737
PROCESSING:  ca_m_3411934_sw_11_1_20140601_20141030
CREATED RASTERS (sec):  25.830581665039062


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  10.508309841156006
PROCESSING:  ca_m_3411934_sw_11_1_20120505_20120730
CREATED RASTERS (sec):  25.25995659828186


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  12.695674180984497
PROCESSING:  ca_m_3411936_se_11_060_20200521
CREATED RASTERS (sec):  60.02749824523926


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  22.660482168197632
PROCESSING:  ca_m_3411936_se_11_060_20180724_20190209
CREATED RASTERS (sec):  59.04326009750366


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  21.285736560821533
PROCESSING:  ca_m_3411936_se_11_.6_20160713_20161004
CREATED RASTERS (sec):  59.754981994628906


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  18.803821325302124
PROCESSING:  ca_m_3411936_se_11_1_20140901_20141030
CREATED RASTERS (sec):  23.806742429733276


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  10.934678554534912
PROCESSING:  ca_m_3411936_se_11_1_20120505_20120730
CREATED RASTERS (sec):  22.341187953948975


  arr = construct_1d_object_array_from_listlike(values)


SAMPLED RASTERS (sec):  12.440812587738037


In [5]:
# ------------------------------
## Save points with added spectral data
fp = os.path.join(os.getcwd(), 
                           'spectral_window_'+csv_name)
all_pts.to_csv(fp, index=False)

# ------------------------------
## Delete original csv files (points without LIDAR)
# if delete_pts == True:
#     os.remove(pts_fp)