In [None]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier

import rasterio
import geopandas as gpd

import iceplant_detection_functions as ipf
import model_prep_and_evals as mpe

import warnings

import planetary_computer as pc
import rioxarray as rioxr

import gc # garbage collector

import dask
import dask_gateway
import dask.array as da
from dask_ml.wrappers import ParallelPostFit

from shapely.geometry import Polygon

# Import train set

In [None]:
X_train = pd.read_csv(os.path.join(os.getcwd(),'data_sampling_workflow','train_set.csv')).loc[:,'r':'avg_lidar']
y_train = pd.read_csv(os.path.join(os.getcwd(),'data_sampling_workflow','train_set.csv')).loc[:,'iceplant'] 

In [None]:
X_train = X_train.drop(['lidar','max_lidar','min_lidar','min_max_diff','avg_lidar'],axis=1)
X_train.head(3)

## Train model

In [None]:
t0 = time.time()

rfc = ParallelPostFit(RandomForestClassifier(n_estimators = 100, random_state = 42))
rfc.fit(X_train, y_train)

print('time to train: ', (time.time()-t0))

# Probabilities over aoi

## Specify aoi

In [None]:
aois = gpd.read_file(os.path.join(os.getcwd(),'areas_of_interest','areas_of_interest.shp'))
aois

In [None]:
# Fields next to Goleta Slough
reduce_box = Polygon([[-119.8284196946,34.4162731913],
                       [-119.8101541026,34.4162731913],
                       [-119.8101541026,34.4353838099],
                       [-119.8284196946,34.4353838099],
                       [-119.8284196946,34.4162731913]])

In [None]:
aoi = aois.iloc[4] # Campus Lagoon aoi
itemid = aoi.itemid_12
geometry = reduce_box
image = ipf.open_window_in_scene(itemid, geometry)

In [None]:
save_raster = True
year = 2012
aoi = 'goleta_fields'
filename = 'SPECTRAL_'+aoi+'_'+str(year)+'_probabilities.tif'

## Add features for prediction

In [None]:
veg_df = ipf.features_over_aoi(ipf.get_item_from_id(itemid), 
                           image, 
                           thresh=0.05)
veg_df.head(3)

## Calculate probabilities

In [None]:
scene_probs = rfc.predict_proba(veg_df.to_numpy())
scene_probs

## Assemble dataframe with iceplant and non-iceplant probabilities

In [None]:
ip_probs = np.reshape(scene_probs, scene_probs.shape[0]*2)[1::2] * 100
ip_probs = ip_probs.astype('int16')
#np.unique(ip_probs)

In [None]:
ip_probs

In [None]:
nonip_probs = np.reshape(scene_probs, scene_probs.shape[0]*2)[::2] * 100
nonip_probs = nonip_probs.astype('int16')
#np.unique(nonip_probs)

In [None]:
nonip_probs

In [None]:
d = {'ip_prob': ip_probs,
     'non_ip_prob' : nonip_probs}
probs_df = pd.DataFrame(data = d)
probs_df

In [None]:
x = probs_df.ip_prob + probs_df.non_ip_prob
x.unique()

## Reconstruct iceplant probs into raster

In [None]:
def probs_backto_image(nrows, ncols, index, probs):
    reconstruct = np.zeros((nrows,ncols))
    m=0
    for n in index:
        if probs[m]!=0:
            i = int(n / ncols)
            j = n % ncols
            reconstruct[i][j] = probs[m]
        m = m+1
    return reconstruct

In [None]:
probs_image = probs_backto_image(image.shape[1], image.shape[2], veg_df.index, ip_probs)

In [None]:
fig, ax = plt.subplots(figsize=(15, 15))
plt.title("PREDICTIONS : 2020 whole naip scene")
ax.imshow(probs_image)
plt.show()

In [None]:
def small_raster(itemid, reduce_box):
    item = ipf.get_item_from_id(itemid)
    href = pc.sign(item.assets["image"].href)
    
    rast = rioxr.open_rasterio(href)
    
    reduce = gpd.GeoDataFrame({'geometry':[reduce_box]}, crs="EPSG:4326")
    reduce = reduce.to_crs(rast.rio.crs)    
    
    rast_small = rast.rio.clip_box(*reduce.total_bounds)
    return rast_small

In [None]:
# save if necessary

if save_raster == True:
    
    small = small_raster(itemid,  geometry)
    
    with rasterio.open(
        os.path.join(os.getcwd(),'temp',filename),  # file path
        'w',           # w = write
        driver='GTiff', # format
        height = probs_image.shape[0], 
        width = probs_image.shape[1],
        count = 1,  # number of raster bands in the dataset
        dtype = rasterio.uint8,
        crs = small.rio.crs,
        transform = small.rio.transform(),
    ) as dst:
        dst.write(probs_image.astype(rasterio.uint8), 1)