This notebook can be used to generate water masks from new Planet scenes. The notebook expects an 8 band or 4 band PlanetScope image, along with the metadata file (*.xml) to be within the scene folder.

The water mask will be written to a file labeled `model_classification.tif` within the same folder

In [None]:
# GIS imports
import rasterio

# Numpy and sklear imports
import numpy as np
from sklearn.ensemble import HistGradientBoostingClassifier
from skimage.segmentation import felzenszwalb
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support as score
from skimage import filters, exposure

# misc imports
from pathlib import Path
import joblib
from joblib import dump
from tqdm import tqdm

# local imports
from tools import get_superpixel_stds_as_features, get_superpixel_means_as_features, get_array_from_features, get_segment_sizes
from rf_funcs import calc_ndwi, calc_ndvi, return_grn_indices, return_img_bands, return_reflectance_coeffs

# for repeatability
np.random.seed(42)

In [None]:
# The parameters in this cell can be modified using papermill
PLANET_ID = '20210903_150800_60_2458' # name of Planet scene
DATA_PATH = '../data' # relative location containing Planet scene folder
MODEL_PATH = "../trained_models/gb_tree_model.joblib" # relative location of model weights

In [None]:
# FELZENSZWALB PARAMETERS
F_SCALE = 20
F_MINSIZE = 20
F_SIGMA = 0

data_path = Path(DATA_PATH)

In [None]:
# Let's make inferences on the given planet images
def generate_inference_helper(rf, img:str|Path, xml_file:str|Path, denoising_weight=0.):
    band_idxs = return_grn_indices(xml_file)
    coeffs = return_reflectance_coeffs(xml_file, band_idxs)
    
    full_img = return_img_bands(img, band_idxs, denoising_weight=denoising_weight)

    green = full_img[0]*coeffs[band_idxs[0]]
    red = full_img[1]*coeffs[band_idxs[1]]
    nir = full_img[2]*coeffs[band_idxs[2]]

    with rasterio.open(img) as ds:
        ref_profile = ds.profile

    ndwi = calc_ndwi(green, nir)
    ndvi = calc_ndvi(red, nir)

    new_img = exposure.adjust_gamma(exposure.equalize_hist(filters.scharr(nir), nbins=64), gamma=20)
    print(f"Starting segmentation for {img}")
    segments = felzenszwalb(new_img, scale=F_SCALE, min_size=F_MINSIZE, sigma=F_SIGMA)
    print(f"Completed segmentation for {img}")

    # for inference we include other channels as well
    img_stack = np.stack([red, nir, green, ndwi, ndvi], axis=-1)
    std_features = get_superpixel_stds_as_features(segments, img_stack)
    mean_features = get_superpixel_means_as_features(segments, img_stack)
    segment_sizes = get_segment_sizes(segments)
    
    print(f"starting inference {img}")
    X = np.concatenate([mean_features, std_features, segment_sizes], axis = 1)
    y = rf.predict(X)
    print(f"finished inference {img}")

    return get_array_from_features(segments, np.expand_dims(y, axis=1))

def generate_inference(planet_id):
    """ 
    This function takes in a planet_id and generates inferences for the overlapping planet image
    """
    current_img_path = data_path / planet_id
    xml_file = list(current_img_path.glob('*.xml'))[0]
    img = list(current_img_path.glob(f'{planet_id}*.tif'))[0]

    output_filename = current_img_path / f'model_classification.tif'

    print("Test file name:", img)
    assert img.exists(), "File does not exist!!"

    rf = joblib.load(MODEL_PATH)

    inference = generate_inference_helper(rf, img, xml_file)
    print(f"Returned from inference generation {planet_id}")

    # use planet image to mask out regions of no data in the model inference
    print(f"writing inference {img}")
    with rasterio.open(img) as src_ds:
        nodata_mask = np.where(src_ds.read(1) == src_ds.profile['nodata'], 1, 0)
        inference[nodata_mask==1] = 255
        profile_copy = src_ds.profile
        profile_copy.update({'count':1, 'dtype':np.uint8, 'nodata':255})

        # write out model inference
        with rasterio.open(output_filename, 'w', **profile_copy) as dst_ds:
            dst_ds.write(inference.astype(np.uint8).reshape(1, *inference.shape))

    print(f"Completed inference for planet id {planet_id}")
    print(f"Output filename: ", output_filename)

In [None]:
_ = generate_inference(PLANET_ID)