In [1]:
import os
import time
import pandas as pd
import numpy as np

import rasterio

import dask_gateway
import dask.array as da

from joblib import load

import raster_to_features as rm

In [2]:
#scene_ids = pd.read_csv(os.path.join(os.getcwd(),'temp','coastal_scenes_ids_2020.csv')).itemid.loc[19:26]
itemid = 'ca_m_3411934_sw_11_060_20200521'

In [3]:
# open pre-trained random forest classifier
rfc = load('spectral_rfc.joblib') 
print('loaded model')

loaded model


In [6]:
# initialize DASK cluster
cluster = dask_gateway.GatewayCluster()
cluster.scale(30)

client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.10fecebbafc34a32a4585a67692e727a/status,


In [3]:
#for itemid in scene_ids:

# ---------------------------------------
# open NAIP scene
t_alpha = time.time()
raster = rm.rioxr_from_itemid(itemid)

In [5]:
raster

In [4]:
rm.raster_as_df(raster.to_numpy(), ['r','g','b','nir'])

Unnamed: 0,r,g,b,nir
0,204,196,189,175
1,208,199,192,181
2,183,181,177,160
3,167,170,164,140
4,166,171,163,139
...,...,...,...,...
132249995,28,46,59,12
132249996,27,50,63,13
132249997,26,47,59,9
132249998,29,56,60,11


In [3]:
# find vegetation pixels to go into model
# keep ndices of water and low-ndvi pixels
# adds ndvi and ndwi features for each pixel
t0 = time.time()
is_veg, water_index, not_veg_index = rm.add_spectral_features(df = rm.raster_as_df(raster.to_numpy(), 
                                                                                   ['r','g','b','nir']), #names of bands
                                                           ndwi_thresh = 0.3, 
                                                           ndvi_thresh = 0.05) 
# select features
is_veg.drop('ndwi', axis=1, inplace=True)
is_veg = rm.add_date_features(is_veg, raster.datetime)
print('assembled pixels dataframe with features\n   time taken to assemble: ', time.time() - t0,' s')

assembled pixels dataframe with features
   time taken to assemble:  18.74682641029358  s


In [4]:
is_veg

Unnamed: 0,r,g,b,nir,ndvi,year,month,day_in_year
83,79,101,86,147,0.300885,2020,5,142
84,71,98,73,158,0.379913,2020,5,142
85,95,118,82,180,0.309091,2020,5,142
86,105,125,93,188,0.283276,2020,5,142
87,155,164,138,185,0.088235,2020,5,142
...,...,...,...,...,...,...,...,...
131964172,32,49,54,38,0.085714,2020,5,142
131974752,33,45,53,39,0.083333,2020,5,142
132047704,50,92,76,66,0.137931,2020,5,142
132174895,78,121,130,88,0.060241,2020,5,142


In [5]:
np.array(is_veg)

array([[  79.,  101.,   86., ..., 2020.,    5.,  142.],
       [  71.,   98.,   73., ..., 2020.,    5.,  142.],
       [  95.,  118.,   82., ..., 2020.,    5.,  142.],
       ...,
       [  50.,   92.,   76., ..., 2020.,    5.,  142.],
       [  78.,  121.,  130., ..., 2020.,    5.,  142.],
       [  68.,  100.,  106., ..., 2020.,    5.,  142.]])

In [7]:
# ---------------------------------------
# convert into dask.array and predict using model
da_pixels = da.from_array(np.array(is_veg), chunks=728802)
da_pixels

Unnamed: 0,Array,Chunk
Bytes,1.15 GiB,44.48 MiB
Shape,"(19293593, 8)","(728802, 8)"
Count,27 Tasks,27 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.15 GiB 44.48 MiB Shape (19293593, 8) (728802, 8) Count 27 Tasks 27 Chunks Type float64 numpy.ndarray",8  19293593,

Unnamed: 0,Array,Chunk
Bytes,1.15 GiB,44.48 MiB
Shape,"(19293593, 8)","(728802, 8)"
Count,27 Tasks,27 Chunks
Type,float64,numpy.ndarray


In [9]:
scene_preds = rfc.predict(da_pixels)    

In [5]:
    t0 = time.time()
    preds = scene_preds.compute()
    print('finished predicting\n   time taken to predict: ', time.time() - t0,' s')

In [5]:
    # ---------------------------------------
    # recover pixel indices for iceplant classifications
    preds_df = pd.DataFrame(preds, 
                         columns=['is_iceplant'], 
                         index = is_veg.index)
    is_iceplant_index = preds_df[preds_df.is_iceplant == 1].index.to_numpy()
    non_iceplant_index = preds_df[preds_df.is_iceplant == 0].index.to_numpy()

    # ---------------------------------------
    # reconstruct indices into image
    indices = [non_iceplant_index,
               is_iceplant_index, 
               not_veg_index,
               water_index]
    values = [0,    # values assigned to pixels from each index
              1,
              2,
              3]
    t0 = time.time()
    reconstruct = rm.indices_to_image(raster.shape[1], raster.shape[2], indices, values, back_value=100)
    print('reconstructed image\n   time taken to reconstruct: ', time.time() - t0,' s')

    # ---------------------------------------
    # save raster 
    filename = 'S_preds_' + itemid +'.tif'

    with rasterio.open(
        os.path.join(os.getcwd(),'temp',filename),  # file path
        'w',           # w = write
        driver = 'GTiff', # format
        height = reconstruct.shape[0], 
        width = reconstruct.shape[1],
        count = 1,  # number of raster bands in the dataset
        dtype = rasterio.uint8,
        crs = raster.rio.crs,
        transform = raster.rio.transform(),
    ) as dst:
        dst.write(reconstruct.astype(rasterio.uint8), 1)
    # ---------------------------------------
    print('total tme:', time.time() - t_alpha)
    print('FINISHED: ', itemid , '\n')