# DenseCRF Hyperparameter Tuning

This notebook tunes the hyperparameters of the DenseCRF by comparing to a ground truth raster.

In [None]:
# Paths to the input raster, GLM model output, and ground truth labels.
IMAGE_PATH = 'para_condensed_stacks_2020_h7v3_crop.tif' # path to input raster
MODEL_PATH = 'para_model_2020_h7v3_crop.tif'            # path to GLM raster output (float32, [0 1] range)
GROUNDTRUTH_PATH = 'para_gt_2020_h7v3_crop.tif'         # path to ground truth label raster (0 or 1)

# Number of internal iterations for DenseCRF.  Higher is better but slower.
CRF_ITER = 5

# Maximum number of iterations for hyperparameter search.  Higher is better but slower.
MAX_ITER = 100

# Output path for DenseCRF hyperparamters.
PARAMS_PATH = 'params.yml'

### Import necessary packages

In [None]:
import pandas as pd
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import pydensecrf.densecrf as dcrf
from pydensecrf.utils import unary_from_softmax, create_pairwise_bilateral, create_pairwise_gaussian
from tqdm import trange
from sklearn.metrics import f1_score

import cv2

from hyperopt import hp, fmin, tpe, Trials

### Load rasters

In [None]:
with rasterio.open(IMAGE_PATH,'r') as f:
    meta = f.meta
    image = f.read().transpose([1,2,0])
    if image.shape[-1] > 4: image = image[...,:4]
H,W = image.shape[:2]

In [None]:
with rasterio.open(MODEL_PATH,'r') as f:
    probs = np.squeeze(f.read().transpose([1,2,0]))
    probs = probs.astype('float32')
    probs[np.isnan(probs)] = 0
    
    # The probabilities should be between 0 and 1.
    assert(np.min(probs)>=0)
    assert(np.max(probs)<=1)

In [None]:
with rasterio.open(GROUNDTRUTH_PATH,'r') as f:
    gt = np.squeeze(f.read().transpose([1,2,0]))

In [None]:
print('image:',image.shape,image.dtype)

In [None]:
print('probs:',probs.shape,probs.dtype)

In [None]:
print('gt:',gt.shape,gt.dtype)

### Hyperparameter tuning

In [None]:
def run_crf(image, probs, num_iter=5,
            gaussian_sdim=3, gaussian_compat=3, 
            bilateral_sdim=80,
            bilateral_schan1=13, bilateral_schan2=13, bilateral_schan3=13, bilateral_schan4=13,
            bilateral_compat=10,
           *args, **kwargs):
    U = unary_from_softmax(np.stack([1-probs,probs],axis=0))
    pairwise_gaussian = create_pairwise_gaussian(sdims=[gaussian_sdim]*2, shape=image.shape[:2])
    pairwise_bilateral = create_pairwise_bilateral(sdims=[bilateral_sdim]*2, schan=[bilateral_schan1,bilateral_schan2,bilateral_schan3,bilateral_schan4], img=image, chdim=2)
    d = dcrf.DenseCRF2D(image.shape[1], image.shape[0], 2)
    d.setUnaryEnergy(U)
    d.addPairwiseEnergy(pairwise_gaussian, compat=gaussian_compat, kernel=dcrf.DIAG_KERNEL, normalization=dcrf.NORMALIZE_SYMMETRIC)
    d.addPairwiseEnergy(pairwise_bilateral, compat=bilateral_compat, kernel=dcrf.DIAG_KERNEL, normalization=dcrf.NORMALIZE_SYMMETRIC)
    Q, tmp1, tmp2 = d.startInference()
    for _ in trange(num_iter):
        d.stepInference(Q, tmp1, tmp2)
    soln = np.argmax(Q, axis=0).reshape(image.shape[:2])
    return soln

In [None]:
space = hp.choice('a',[{
    'bilateral_sdim':hp.uniform('bilateral_sdim',1,100),
    'bilateral_schan1':hp.uniform('bilateral_schan1',1,100),
    'bilateral_schan2':hp.uniform('bilateral_schan2',1,100),
    'bilateral_schan3':hp.uniform('bilateral_schan3',1,100),
    'bilateral_schan4':hp.uniform('bilateral_schan4',1,100)
}])

def run_hyperopt_search(image,probs,gt,space):
    trials = Trials()

    def objective(params):
        print(params)
        soln = run_crf(image, probs, num_iter=CRF_ITER, **params)
        score = f1_score(gt.flatten(),soln.flatten())
        return 1-score
        
    best = fmin(objective, space, algo=tpe.suggest, max_evals=MAX_ITER, trials=trials)

    return best

In [None]:
best_params = run_hyperopt_search(image,probs,gt,space)

In [None]:
best_params

### Selection by connected component size
This will find the size of the smallest connected component in the ground truth raster.  Any predicted connected component smaller than this will be removed.

In [None]:
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(gt.astype('uint8'), connectivity=8)
sizes = stats[1:, -1]; nb_components = nb_components - 1
min_size = np.min(sizes)

### Saving hyperparameters to file

In [None]:
params = {
    'bilateral_schan1':float(best_params['bilateral_schan1']),
    'bilateral_schan2':float(best_params['bilateral_schan2']),
    'bilateral_schan3':float(best_params['bilateral_schan3']),
    'bilateral_schan4':float(best_params['bilateral_schan4']),
    'bilateral_sdim':float(best_params['bilateral_sdim']),
    'min_size':int(min_size)
}

In [None]:
params

In [None]:
import yaml
with open(PARAMS_PATH,'w') as f:
    f.write(yaml.dump(params))