In [1]:
import os
import time
import pandas as pd
import numpy as np

import rasterio

import dask_gateway
import dask.array as da

from joblib import load

import refactoring_modules as rm

In [2]:
# **************************************************************
# **************************************************************

itemid = 'ca_m_3412039_nw_10_060_20200522'
filename = 'S_preds_' + itemid +'.tif'

# **************************************************************
# **************************************************************

In [3]:
# initialize DASK cluster
cluster = dask_gateway.GatewayCluster()
client = cluster.get_client()
cluster.scale(30)
client

# TO DO: maybe pass the dask client as a parameter to the next python script to reuse over multiple naip senes

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.fd8146005d1d44b39145b598772d4b8a/status,


In [4]:
# open pre-trained random forest classifier
rfc = load('spectral_rfc.joblib') 
print('loaded model')

# ---------------------------------------
# open NAIP scene
raster = rm.rioxr_from_itemid(itemid)

# find vegetation pixels to go into model
# keep ndices of water and low-ndvi pixels
# adds ndvi and ndwi features for each pixel
t0 = time.time()
is_veg, water_index, not_veg_index = rm.add_spectral_features(df = rm.raster_as_df(raster.to_numpy(), 
                                                                                   ['r','g','b','nir']), #names of bands
                                                           ndwi_thresh = 0.3, 
                                                           ndvi_thresh = 0.05) 
# select features
is_veg.drop('ndwi', axis=1, inplace=True)
is_veg = rm.add_date_features(is_veg, raster.datetime)
print('assembled pixels dataframe with features\n   time taken to assemble: ', time.time() - t0,' s')

# ---------------------------------------
# convert into dask.array and predict using model
da_pixels = da.from_array(np.array(is_veg), chunks=728802)
scene_preds = rfc.predict(da_pixels)
t0 = time.time()
preds = scene_preds.compute()
print('finished predicting\n   time taken to predict: ', time.time() - t0,' s')

# ---------------------------------------
# recover pixel indices for iceplant classifications
preds_df = pd.DataFrame(preds, 
                     columns=['is_iceplant'], 
                     index = is_veg.index)
is_iceplant_index = preds_df[preds_df.is_iceplant == 1].index.to_numpy()
non_iceplant_index = preds_df[preds_df.is_iceplant == 0].index.to_numpy()

# ---------------------------------------
# reconstruct indices into image
indices = [non_iceplant_index,
           is_iceplant_index, 
           not_veg_index,
           water_index]
values = [0,    # values assigned to pixels from each index
          1,
          2,
          3]
t0 = time.time()
reconstruct = rm.indices_to_image(raster.shape[1], raster.shape[2], indices, values, back_value=100)
print('reconstructed image\n   time taken to reconstruct: ', time.time() - t0,' s')

# ---------------------------------------
# save raster 
with rasterio.open(
    os.path.join(os.getcwd(),'temp',filename),  # file path
    'w',           # w = write
    driver = 'GTiff', # format
    height = reconstruct.shape[0], 
    width = reconstruct.shape[1],
    count = 1,  # number of raster bands in the dataset
    dtype = rasterio.uint8,
    crs = raster.rio.crs,
    transform = raster.rio.transform(),
) as dst:
    dst.write(reconstruct.astype(rasterio.uint8), 1)

loaded model
assembled pixels dataframe with features
   time taken to assemble:  24.993847846984863  s
finished predicting
   time taken to predict:  152.48782396316528  s
reconstructed image
   time taken to reconstruct:  4.890034914016724  s
