In [1]:
import os
import time
import pandas as pd
import numpy as np

import rasterio
import rioxarray as rioxr
import geopandas as gpd

import dask_gateway
import dask.array as da

# custom modules
import data_sampling_workflow.sample_rasters as sr
#import iceplant_detection_functions as ipf
import refactoring_modules as rm

In [2]:
# **************************************************************
# ********* SPECIFY ITEMID AND LIDAR YEAR TO MATCH HERE ********

itemid = 'ca_m_3412039_nw_10_060_20200522'
filename = 'LS_preds_'+itemid + '.tif'

# **************************************************************
# **************************************************************

In [3]:
# open pre-trained random forest classifier
#rfc = load('spectral_rfc.joblib') 
#print('loaded model')

# ---------------------------------------
# open NAIP scene
raster = rm.rioxr_from_itemid(itemid)
year = raster.datetime.year


In [4]:
#year = ### GET YEAR FOR LIDAR AND CHECKPOINT

# ---------------------------------------
# find vegetation pixels to go into model
# keep ndices of water and low-ndvi pixels
# adds ndvi and ndwi features for each pixel
t0 = time.time()
is_veg, water_index, not_veg_index = rm.add_spectral_features(df = rm.raster_as_df(raster.to_numpy(), 
                                                                                   ['r','g','b','nir']), #names of bands
                                                           ndwi_thresh = 0.3, 
                                                           ndvi_thresh = 0.05) 
# select features
is_veg.drop('ndwi', axis=1, inplace=True)
is_veg = rm.add_date_features(is_veg, raster.datetime)
print('assembled pixels dataframe with features\n   time taken to assemble: ', time.time() - t0,' s')

assembled pixels dataframe with features
   time taken to assemble:  26.73867130279541  s


In [5]:
t0 = time.time()
# Create auxiliary LIDAR files to sample from

lidar_rast_r = rasterio.open(sr.path_to_lidar(year))
rast_name = 'SB_canopy_height_'+str(year) # give a name to read raster

sr.min_raster(rast_reader = lidar_rast_r, rast_name = rast_name, n=3)  # save aux rasters in temp folder
sr.max_raster(rast_reader = lidar_rast_r, rast_name = rast_name, n=3)
sr.avg_raster(rast_reader = lidar_rast_r, rast_name = rast_name, n=3)

print('time to make auxiliary rasters: ', (time.time()-t0), 'seconds')

time to make auxiliary rasters:  9.629270553588867 seconds


In [12]:
# Resample lidar layers to match NAIP scene resolution and extent

lidar_fps = [sr.path_to_lidar(year)]
for tag in ['_avgs', '_maxs', '_mins']:
    lidar_fps.append(os.path.join(os.getcwd(),'temp',rast_name+tag+'.tif'))

lidar_values = []
for fp in lidar_fps:
    match = sr.open_and_match(fp, raster)
    match_vector = match.to_numpy().reshape(match.shape[0]*match.shape[1])
    lidar_values.append(match_vector)

    
    # TO DO!!!! SUPER IMPORTANT::: change the order of lidar features TO THIS ONE !!!!
df_lidar = pd.DataFrame(dict(zip(['lidar','avg_lidar', 'max_lidar', 'min_lidar'], lidar_values)))
df_lidar.assign(min_max_diff =  df_lidar.max_lidar - df_lidar.min_lidar)


for fp in lidar_fps:
    os.remove(fp)

df_lidar

In [18]:
# add LIDAR features to vegetation dataframe

# FEATURES MUST BE IN THIS ORDER TO MATCH THE MODEL:
# ['r', 'g', 'b', 'nir', 'ndvi', 
#  'year', 'month', 'day_in_year',
#  'lidar', 'max_lidar', 'min_lidar', 'min_max_diff', 'avg_lidar'])

scene_features = pd.concat([is_veg, df_lidar.iloc[is_veg.index]], axis=1)
scene_features

Unnamed: 0,r,g,b,nir,ndvi,year,month,day_in_year,lidar,avg_lidar,max_lidar,min_lidar
0,64,78,75,84,0.135135,2020,5,143,2,2.000000,3,1
1,47,62,59,78,0.248000,2020,5,143,2,2.000000,3,1
2,51,59,63,76,0.196850,2020,5,143,2,2.000000,3,1
3,63,70,72,81,0.125000,2020,5,143,2,2.000000,3,1
4,67,74,77,81,0.094595,2020,5,143,2,2.000000,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
131609247,110,190,151,140,0.120000,2020,5,143,-9999,-2717.222168,-9999,-9999
131774176,20,33,49,33,0.245283,2020,5,143,-9999,-2717.222168,-9999,-9999
131774177,19,31,50,33,0.269231,2020,5,143,-9999,-2717.222168,-9999,-9999
131784736,19,33,47,29,0.208333,2020,5,143,-9999,-2717.222168,-9999,-9999
