# Prediction post-processing

## Imports and settings

In [None]:
#!pip3 install plotly

In [None]:
import os
import numpy as np
import cv2 as cv
import pandas as pd
import PIL
import matplotlib.pyplot as plt
import plotly.express as px

from os import path
from glob import glob
from matplotlib import gridspec

In [None]:
# visualization
def visualize(**images):
    n = len(images)
    plt.figure(figsize=(16, 10))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i+1)
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()
    
# non-keyworded images: image1, image2, ...
def visualize_grid(*images):
    n, cols = len(images), 4
    rows = int(np.ceil(n / cols))
    gs = gridspec.GridSpec(rows, cols)
    fig = plt.figure(figsize=(16, 4*rows))
    fig.tight_layout()
    for i in range(n):
        ax = fig.add_subplot(gs[i])
        ax.imshow(images[i])
        ax.axis('off')    
    plt.show()

In [None]:
!ls /raid/bruengel/Wound/projects/dfuc2022/predictions/baseline/test/

In [None]:
DIR_PREDICTIONS = 'root_dir'
DIR_IMAGES = 'image_dir'

NAME_PREDICTION = 'prediction_dir'
NAME_DATASET = 'baseline'
NAME_PART = 'test'
NAME_POSTPROC = 'pp_geq01_fill_open2'
DIR_PREDICTIONS_IN = path.join(DIR_PREDICTIONS, NAME_PREDICTION)
DIR_PREDICTIONS_OUT = path.join(DIR_PREDICTIONS, NAME_PREDICTION + '__' + NAME_POSTPROC)
print('in:', DIR_PREDICTIONS_IN)
print('out:', DIR_PREDICTIONS_OUT)

In [None]:
!ls $DIR_PREDICTIONS_IN/

## Prediction visualization

In [None]:
# view samples
samples = range(140, 170, 1)
for sample in samples:
    fi = sorted(glob(path.join(DIR_IMAGES, "*.png")))[sample]
    fp = sorted(glob(path.join(DIR_PREDICTIONS_IN, "*.png")))[sample]
    print('\nfile:', fp)

    # original image, prediction, extract
    image = cv.imread(fi, cv.IMREAD_COLOR) 
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    pred_in = cv.imread(fp, cv.IMREAD_GRAYSCALE)
    extract = cv.bitwise_and(image, image, mask=pred_in*255)
    visualize(original_image=image, extract=extract)
    #visualize(original_image=image, predicted_mask=pred_in, extract=extract)

## Prediction analysis

In [None]:
# containers
rows_file = []
rows_inst = []

# process all 
files = sorted(glob(path.join(DIR_PREDICTIONS_IN, "*.png")))
for f in files:
    
    # read as rgb image
    mask_in = cv.imread(f, cv.IMREAD_COLOR) 
    mask_in = cv.cvtColor(mask_in, cv.COLOR_BGR2RGB)

    # append file row
    rows_file.append([path.basename(f), mask_in.shape[1], mask_in.shape[0]])
    
    # set as binary mask
    gray = cv.cvtColor(mask_in, cv.COLOR_BGR2GRAY);
    mask = cv.inRange(gray, 1, 255);

    # detect contours and analyze
    contours, _ = cv.findContours(mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE);
    instance = 0
    for c in contours:
        instance += 1

        # area, arc length
        area = np.int0(cv.contourArea(c))
        arcl = cv.arcLength(c, True)

        # rectangle (bbox)
        x, y, w, h = cv.boundingRect(c)    
        bbox_min, bbox_max = min(w, h), max(w, h)

        # min area rotated rectangle (rbox)
        # sloppy opencv definitions lead to slightly inconsistent results between boundingRect and minAreaRect
        # https://stackoverflow.com/questions/69911364/whats-the-difference-in-results-of-cvboundingrect-and-cvminarearect
        rect = cv.minAreaRect(c)
        rbox = np.int0(cv.boxPoints(rect))
        a, b = cv.norm(rbox[0], rbox[1]), cv.norm(rbox[1], rbox[2])
        rbox_min, rbox_max = min(a, b), max(a, b)
        
        # append instance row
        rows_inst.append([path.basename(f), instance, area, arcl, bbox_min, bbox_max, rbox_min, rbox_max])
    
    # exceptional case of no contour
    if instance == 0:
        rows_inst.append([path.basename(f), 0, None, None, None, None, None, None])

# create dfs
df_file = pd.DataFrame(rows_file, columns=[
    'file', 'width', 'height'])
df_inst = pd.DataFrame(rows_inst, columns=[
    'file', 'inst', 'area', 'arc_len', 'bbox_min', 'bbox_max', 'rbox_min', 'rbox_max'])

In [None]:
# extension of instances data frame
df_inst = df_file.join(df_inst.set_index('file'), on = 'file')

# further area features
area_sqrt = np.sqrt(df_inst['area'])
df_inst.insert (5, 'area_sqrt', area_sqrt) # 
area_perc = df_inst['area'] / (df_inst['width'] * df_inst['height']) * 100
df_inst.insert (6, 'area_perc', area_perc) # area percentage

# further box features
bbox_ratio = df_inst['bbox_max'] / df_inst['bbox_min']
df_inst.insert (10, 'bbox_ratio', bbox_ratio) # bbox side ratio
rbox_ratio = df_inst['rbox_max'] / df_inst['rbox_min']
df_inst.insert (13, 'rbox_ratio', rbox_ratio) # rbox side ratio

# remove file-related cols
df_inst = df_inst.drop(columns = ['width', 'height'])

# basic description
df_inst.describe()

In [None]:
# instances per image
fig = px.histogram(df_inst, x="inst", marginal="box")
fig.update_traces(xbins = dict(start=0.0, size=1.0), selector=dict(type='histogram'))
fig.show()

In [None]:
# instance area (percent)
fig = px.histogram(df_inst, x="area_perc", marginal="box", hover_data=df_inst.columns)
fig.update_traces(xbins = dict(start=0.0, size=0.05), selector=dict(type='histogram'))
fig.show()

## Post-processing pipeline

In [None]:
MIN_AREA_PERC_TO_KEEP = 0.02 # min percentage instance area to keep

In [None]:
def postprocessing(pred_in):
    
    h, w = pred_in.shape
    pred_out = pred_in.copy()    
    
    ###
    # step 1: instance area filtering for more than one instance, potential filling
    contours, _ = cv.findContours(pred_in, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE);
    contours = sorted(contours, key=cv.contourArea)
    instances = len(contours)
    print(instances, 'instances')
    for c in contours:  
        area_perc = (cv.contourArea(c) / (w * h)) * 100 # area percent
        
        # blacken if too small and more than one instance in prediction
        if instances > 1 and area_perc < MIN_AREA_PERC_TO_KEEP:
            cv.drawContours(pred_out, [c], contourIdx=-1, color=0, thickness=-1, lineType=cv.LINE_AA)
            print(area_perc, 'removed')
            continue # skip rest to avoid restoring of removed contours
            
        # skip if just one instance or large enough
        else:
            print(area_perc, 'only instance, filtering skipped')

        # fill potential holes in masks
        cv.drawContours(pred_out, [c], contourIdx=-1, color=1, thickness=-1, lineType=cv.LINE_AA)
        print(area_perc, 'potential holes closed')
        
    ###
    # step 2: mild opening to remove contour removal artifacts
    kernel = cv.getStructuringElement(cv.MORPH_RECT, (2, 2))
    pred_out = cv.morphologyEx(pred_out, cv.MORPH_OPEN, kernel)
    print('mildly opened')
    
    return pred_out

In [None]:
# check effects on subset
samples = range(200, 250, 1)
for sample in samples:
    fi = sorted(glob(path.join(DIR_IMAGES, "*.png")))[sample]
    fp = sorted(glob(path.join(DIR_PREDICTIONS_IN, "*.png")))[sample]
    print('\nfile:', fp)

    # original image, prediction, extract
    image = cv.imread(fi, cv.IMREAD_COLOR) 
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    pred_in = cv.imread(fp, cv.IMREAD_GRAYSCALE)
    extract_in = cv.bitwise_and(image, image, mask=pred_in*255)
    
    # post-processed prediction, extract
    pred_out = postprocessing(pred_in)
    extract_out = cv.bitwise_and(image, image, mask=pred_out*255)
    
    visualize(image=image, prediction_in=pred_in, prediction_out=pred_out)
    visualize(image=image, extract_in=extract_in, extract_out=extract_out)
    print('-'*80)

## Export

In [None]:
# create dir if not existing
if not path.isdir(DIR_PREDICTIONS_OUT):
    os.makedirs(DIR_PREDICTIONS_OUT)

# post-process predictions
print('out:', DIR_PREDICTIONS_OUT)
files = sorted(glob(path.join(DIR_PREDICTIONS_IN, "*.png")))
for f in files:
    print('\nfile:', f)
    
    # process
    pred_in = cv.imread(f, cv.IMREAD_GRAYSCALE)
    pred_out = postprocessing(pred_in)
    
    # write
    cv.imwrite(path.join(DIR_PREDICTIONS_OUT, path.basename(f)), pred_out)