In [1]:
import os
import time
import pandas as pd
import numpy as np

import rasterio

import dask_gateway
import dask.array as da

from joblib import load

import raster_to_features as rf

import geopandas as gpd
from shapely.geometry import mapping

In [2]:
fp = os.path.join(os.getcwd(),'separating_naip_flights','SB_coastal_buffer','SB_coastal_buffer.shp')
coast = gpd.read_file(fp)
coast_geo = coast.geometry.apply(mapping)

In [3]:
# open pre-trained random forest classifier
rfc = load('spectral_rfc.joblib') 
print('loaded model')

loaded model


In [4]:
# initialize DASK cluster
cluster = dask_gateway.GatewayCluster()
cluster.scale(30)

client = cluster.get_client()
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.0a13b51c95c346ae96c88121d648fc65/status,


In [5]:
year = 2020
save_rasters = True

In [6]:
scene_ids = pd.read_csv(os.path.join(os.getcwd(),
                                     'separating_naip_flights',
                                     'coastal_scenes_ids.csv'))
scene_ids = scene_ids.loc[scene_ids['year'] == year]
scene_ids = scene_ids.reset_index().itemid
scene_ids 

0     ca_m_3412037_nw_10_060_20200607
1     ca_m_3412037_ne_10_060_20200607
2     ca_m_3412029_sw_10_060_20200607
3     ca_m_3412004_sw_10_060_20200607
4     ca_m_3412003_se_10_060_20200607
5     ca_m_3412003_ne_10_060_20200607
6     ca_m_3412038_nw_10_060_20200523
7     ca_m_3412040_nw_10_060_20200522
8     ca_m_3412040_ne_10_060_20200522
9     ca_m_3412039_nw_10_060_20200522
10    ca_m_3412039_ne_10_060_20200522
11    ca_m_3412038_ne_10_060_20200522
12    ca_m_3411933_nw_11_060_20200522
13    ca_m_3411933_ne_11_060_20200522
14    ca_m_3411945_nw_11_060_20200521
15    ca_m_3411937_sw_11_060_20200521
16    ca_m_3411936_sw_11_060_20200521
17    ca_m_3411936_se_11_060_20200521
18    ca_m_3411935_sw_11_060_20200521
19    ca_m_3411935_se_11_060_20200521
20    ca_m_3411934_sw_11_060_20200521
21    ca_m_3411934_se_11_060_20200521
22    ca_m_3411933_sw_11_060_20200521
23    ca_m_3411933_se_11_060_20200521
Name: itemid, dtype: object

In [None]:
times = []
processed = []
reason = []

for itemid in scene_ids:

# ---------------------------------------
# open NAIP scene and clip to coast
    t_alpha = time.time()
    t0 = time.time()
    
    raster = rf.rioxr_from_itemid(itemid).rio.clip(coast_geo,coast.crs)
    print('clipped raster', time.time() - t0,' s')

    df = rf.raster_as_df(raster.to_numpy(), ['r','g','b','nir'])
    df = df.loc[ (df['nir'] != 0) | (df['r'] != 0) | (df['g'] != 0) | (df['b'] != 0)]
    print('selected non-zero pixels')

    if df.shape[0] == 0:
        processed.append('N')
        reason.append('no data in intersection')
        times.append(0)
        print('no data at intersection of scene with coastal buffer')
        print('FINISHED: ', itemid , '\n')

    else:
        # find vegetation pixels to go into model
        # keep ndices of water and low-ndvi pixels
        # adds ndvi and ndwi features for each pixel
        t0 = time.time()
        is_veg, water_index, not_veg_index = rf.add_spectral_features(df, 
                                                                      ndwi_thresh = 0.3, 
                                                                      ndvi_thresh = 0.05) 
        if is_veg.shape[0] == 0:
            print('no vegetation pixels at intersection of scene data with coastal buffer')
            processed.append('N')
            reason.append('no vegeatation in intersection')
            times.append(0)
            print('FINISHED: ', itemid , '\n')
            
        else:
            processed.append('Y')
            reason.append('processed')            
            
            # select features
            is_veg.drop('ndwi', axis=1, inplace=True)
            is_veg = rf.add_date_features(is_veg, rf.rioxr_from_itemid(itemid).datetime)
            print('assembled pixels dataframe with features\n   time taken to assemble: ', time.time() - t0,' s')

            # ---------------------------------------
            # convert into dask.array and predict using model
            da_pixels = da.from_array(np.array(is_veg), chunks=728802)

            scene_preds = rfc.predict(da_pixels)    

            t0 = time.time()
            preds = scene_preds.compute()
            print('finished predicting\n   time taken to predict: ', time.time() - t0,' s')

            # ---------------------------------------
            # recover pixel indices for iceplant classifications
            preds_df = pd.DataFrame(preds, 
                                 columns=['is_iceplant'], 
                                 index = is_veg.index)
            is_iceplant_index = preds_df[preds_df.is_iceplant == 1].index.to_numpy()
            non_iceplant_index = preds_df[preds_df.is_iceplant == 0].index.to_numpy()

            # ---------------------------------------
            # reconstruct indices into image
            indices = [non_iceplant_index,
                       is_iceplant_index, 
                       not_veg_index,
                       water_index]
            values = [0,    # values assigned to pixels from each index
                      1,
                      2,
                      3]
            t0 = time.time()
            reconstruct = rf.indices_to_image(raster.shape[1], raster.shape[2], indices, values, back_value=100)
            print('reconstructed image\n   time taken to reconstruct: ', time.time() - t0,' s')

            # ---------------------------------------
            # save raster 
            if save_rasters:
                filename = 'S_clip_preds_' + itemid +'.tif'

                with rasterio.open(
                    os.path.join(os.getcwd(),'temp',filename),  # file path
                    'w',           # w = write
                    driver = 'GTiff', # format
                    height = reconstruct.shape[0], 
                    width = reconstruct.shape[1],
                    count = 1,  # number of raster bands in the dataset
                    dtype = rasterio.uint8,
                    crs = raster.rio.crs,
                    transform = raster.rio.transform(),
                ) as dst:
                    dst.write(reconstruct.astype(rasterio.uint8), 1)
            # ---------------------------------------
            total =  time.time() - t_alpha
            times.append(total)
            print('total time:', total)
            print('FINISHED: ', itemid , '\n')

clipped raster 21.873346090316772  s
selected non-zero pixels
assembled pixels dataframe with features
   time taken to assemble:  4.7049641609191895  s
finished predicting
   time taken to predict:  45.68573713302612  s
reconstructed image
   time taken to reconstruct:  2.0914859771728516  s
total time: 79.16330742835999
FINISHED:  ca_m_3412037_nw_10_060_20200607 

clipped raster 18.33892583847046  s
selected non-zero pixels
assembled pixels dataframe with features
   time taken to assemble:  3.203667640686035  s
finished predicting
   time taken to predict:  22.80280375480652  s
reconstructed image
   time taken to reconstruct:  1.1246931552886963  s
total time: 47.64914083480835
FINISHED:  ca_m_3412037_ne_10_060_20200607 

clipped raster 12.64512586593628  s
selected non-zero pixels
assembled pixels dataframe with features
   time taken to assemble:  0.6901860237121582  s
finished predicting
   time taken to predict:  14.882543087005615  s
reconstructed image
   time taken to recons

In [None]:
#save times processed and itemids as dataframe
D = { 'itemid': scene_ids,
      'processed': processed,
    'reason':reason,
    'process_time': times}
processing_df = pd.DataFrame( D )
processing_df

In [None]:
fp = os.path.join(os.getcwd(),'processing_results')

if os.path.exists(fp) == False:
    os.mkdir(fp)

processing_df.to_csv(os.path.join(fp, 'processing_results_' + str(year) + '.csv'))