In [1]:
import os
import pandas as pd
import numpy as np

import rasterio
import data_sampling_workflow.sample_rasters as sr
import raster_to_features as rf
import planetary_computer as pc

from scipy.ndimage import maximum_filter as maxf2D
from scipy.ndimage import minimum_filter as minf2D
from scipy.ndimage import convolve as conf2D

'/home/jovyan/msai4earth-esa/iceplant_detection/data_sampling_workflow'

In [None]:
# *********************************************************************

def min_raster(rast_reader, band = 1, rast_name, n, folder_path=''):  
    """
        Creates a new raster by replacing each pixel p in given raster R by the minimum value in a nxn window centered at p.
        The raster with minimum values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the minimum values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_maxs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute minimum values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values
    mins = minf2D(rast, size=(n,n))    # calculate min in window
    
    if not folder_path:                         # if needed, create temp directory to save files 
        folder_path = make_directory('temp')
    
    dtype = rasterio.dtypes.get_minimum_dtype(mins)  # parameters for saving
    
    fp = os.path.join(folder_path, rast_name +'_mins.tif')      # save raster
    sr.save_raster(mins, 
                fp, 
                rast.shape,
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

# ------------------------------------------------------------------------------

def max_raster(rast_reader, band=1, rast_name, n, folder_path=''):  
    """
        Creates a new raster by replacing each pixel p in given raster R by the max value in a nxn window centered at p.
        The raster with maximum values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the maximum values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_maxs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute maximum values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values
    maxs = maxf2D(rast, size=(n,n))    # calculate min in window
    
    if not folder_path:                         # if needed, create temp directory to save files 
        folder_path = make_directory('temp')
    
    dtype = rasterio.dtypes.get_minimum_dtype(maxs)  # parameters for saving
    
    fp = os.path.join(folder_path, rast_name +'_maxs.tif')      # save raster
    sr.save_raster(maxs, 
                fp, 
                rast.shape,
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

# ------------------------------------------------------------------------------

def avg_raster(rast_reader, band=1, rast_name, n, folder_path=''): 
    """
        Creates a new raster by replacing each pixel p in given raster R by the avg value in a nxn window centered at p.
        The raster with averege values is saved in a temp folder in the current working directory if no folder_path is given.
            Parameters: 
                        rast_reader (rasterio.io.DatasetReader):
                            reader to the raster from which to compute the average values in a window
                        rast_name (str):
                            name of raster. The resulting raster will be saved as rast_name_avgs.tif.
                        n (int):
                            Side length (in pixels) of the square window over which to compute average values for each pixel.
                        folder_path (str):
                            directory where to save raster. If none is given, then it saves the raster in a temp folder in the cwd.
            Return: None    
    """
    rast = rast_reader.read([band]).squeeze() # read raster values

    w = np.ones(n*n).reshape(n,n)      # calculate averages in window
    avgs = conf2D(rast, 
             weights=w,
             mode='constant')
    avgs = avgs/(n*n)
    
    # if needed, create temp directory to save files 
    if not folder_path:  
        folder_path = make_directory('temp')
            
    # parameters for saving   
    fp = os.path.join(folder_path, rast_name +'_avgs.tif')                
    dtype = rasterio.dtypes.get_minimum_dtype(avgs)
            
    sr.save_raster(avgs,    # save rasters
                fp, 
                rast.shape, 
                1,
                rast_reader.crs, 
                rast_reader.transform, 
                dtype)  
    return

In [None]:
# ***************************************************
# ************* NOTEBOOK VARIABLES ******************

itemids = ['ca_m_3412040_ne_10_060_20200522']
# open csv with all itemids

all_pts = pd.read_csv(os.path.join(os.getcwd(), 'test_set.csv'))
# add train set

# ***************************************************
# ***************************************************

In [None]:

for itemid in itemids:
    
    # ------------------------------
    # Filter points in test set and train set with this itemid
    pts = all_pts.loc[all_pts['itemid'] == itemid]
    pts
    
    # ------------------------------
    # Open NAIP scene and calculate auxiliary spectral rasters
    item = sr.get_item_from_id(itemid)    # locate raster
    href = pc.sign(item.assets["image"].href)
    lidar_rast_r = rasterio.open(href)

    # file paths to auxiliary spectral rasters
    lidar_fps = []
    
    rast_name = 'R_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=1, rast_name = rast_name, n=3)
    max_raster(rast_reader = lidar_rast_r, band=1,  rast_name = rast_name, n=3)
    avg_raster(rast_reader = lidar_rast_r, band=1, rast_name = rast_name, n=3)
    print('finished R aux rasters')
    
    rast_name = 'G_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=2, rast_name = rast_name, n=3)
    max_raster(rast_reader = lidar_rast_r, band=2,  rast_name = rast_name, n=3)
    avg_raster(rast_reader = lidar_rast_r, band=2, rast_name = rast_name, n=3)    
    print('finished G aux rasters')    
    
    rast_name = 'B_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=3, rast_name = rast_name, n=3)
    max_raster(rast_reader = lidar_rast_r, band=3,  rast_name = rast_name, n=3)
    avg_raster(rast_reader = lidar_rast_r, band=3, rast_name = rast_name, n=3)      
    print('finished B aux rasters')    
    
    rast_name = 'NIR_'+itemid 
    min_raster(rast_reader = lidar_rast_r, band=4, rast_name = rast_name, n=3)
    max_raster(rast_reader = lidar_rast_r, band=4,  rast_name = rast_name, n=3)
    avg_raster(rast_reader = lidar_rast_r, band=4, rast_name = rast_name, n=3)         
    print('finished NIR aux rasters')    

    for band in ['R_','G_','B_','NIR_']:
        for tag in ['_maxs', '_mins', '_avgs']:
            lidar_fps.append(
                os.path.join(os.getcwd(), 'temp', band + itemid + tag + '.tif'))        

    # ------------------------------
    # Convert csv to geopandas
            
    ## Get points information from csv
    crs = CRS.from_string(pts.pts_crs[0])
    #TO DO: change geodataframe from csv to use pandas df and not open file there
#    pts = sr.geodataframe_from_csv(pts_fp, 'x','y',crs)
    if 'geometry' in pts.columns:           # rename geometry column if it exists
            pts = pts.rename(columns = {'geometry': 'geometry_0'})

    # recreate geometry column as shapely Points
    xy = []
    for x,y in zip(df[lon_label],df[lat_label]):
        xy.append(Point(x,y))
    df['geometry'] = xy

    return gpd.GeoDataFrame(df, crs=crs)
    pts_col = pts.to_crs(lidar_rast_r.crs).geometry
    
    # ------------------------------
    ## Sample canopy_height at point, and max, min and avg canopy height around point
    
    samples = []
     for band in ['R_','G_','B_','NIR_']:
        for tag in ['_maxs', '_mins', '_avgs']:
            
            fp = os.path.join(os.getcwd(), 'temp', band + itemid + tag + '.tif')
            col_name = band - '_' + tag

            rast_r = rasterio.open(fp)
            sample = sr.sample_raster_from_pts(pts_col, rast_r, [col_name])
            samples.append(sample)
    # ------------------------------
    ## Add all LIDAR data to pts dataframe
    new_features = pd.concat(samples, axis = 1)
    pts = pd.concat([pts, new_features], axis=1)
    for band in ['R_','G_','B_','NIR_']:
        col_name = band + 'diff'
        pts[col_name] = pts[band +'_max'] - ts[band +'_min']
    # ------------------------------    
    # Clean dataframe
    pts.drop(['geometry'],axis=1, inplace=True) # remove geometry column (already have lat,lon and CRS)
    
    # ------------------------------
    ## Save points with added spectral data
    ptslidar_fp = os.path.join(os.getcwd(), 
                               'temp', 
                               aoi +'_pts_spectral_lidar_'+str(year)+'.csv')
    pts.to_csv(ptslidar_fp, index=False)

    # ------------------------------
    ## Delete original csv files (points without LIDAR)
    if delete_pts == True:
        os.remove(pts_fp)

    # ------------------------------
    # Delete auxiliary LIDAR rasters created for this year
    for fp in lidar_fps:
        os.remove(fp)