# Analysis of network model results
To do:
* implement, test/check multi-label computations
* use threshold at breakeven point to generate labels
* write labels to geotiffs to dir data/test/predict_process or so 
* implement masks for selecting no_img pixels


In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np

from keras.models import load_model
from src.data import utils
from src.models.data import *
from src.models.model import *
from src.models.predict_model import *
from src.visualization.visualize import *

from src.data.utils import get_tile_prefix
from src.models.metrics_img import auc_roc

import matplotlib
import matplotlib.pyplot as plt

import skimage.io as io

from pathlib import Path
import os, shutil
import sys
%matplotlib inline

## User settings

In [None]:
# paths to append
sys.path.append("/home/ubuntu/roaddetection/")
sys.path.append("/media/hh/hd_internal/hh/DSR_Berlin_2018/roaddetection/")

# base directory with data (image tiles) to be analyzed
# dir_eval = "../../data/validate"
dir_eval = "../../data/train"
# subdirs
dir_x = 'sat'
dir_y = 'map'

# max. number of samples (files) to analyze (predicition takes a long time)
max_num_x = 3

# number of samples to plot in detail
num_x_show = 2

# size of images
target_size = (512,512)
#target_size = (256,256)

# path to & filename of model to analyze
trained_model_fn = '../../models/models_unet_borneo_and_harz_03_09_16_15.hdf5'

# list any custom loss or metric functions of the model here
custom_objects = {'auc_roc': auc_roc}

# individual samples to be shown: either None, a list of indexes, or any of 'random', 'head_tail'
# (head_tail = picked from top and bottom of list according to metric)
mode_sample_choice = "random"

# colormap to be used for prediction scores 
cmap_yscore = 'gnuplot'

### Load complete model
The additional input arg "custom_objects" is needed if custom loss or metrics were used in the model

In [None]:
model = load_model(trained_model_fn, custom_objects=custom_objects)
# based on the output of the last layer, find out whether the model is binary or multiclass
model_is_binary = model.get_layer(None,-1).output_shape[3] == 1

### Some constants, preparatory computations & definitions

In [None]:
file_list_x, num_x = utils.get_list_samplefiles(os.path.join(dir_eval, dir_x))

# actual number of files that will be analyzed, given files available
num_x_use = min(num_x, max_num_x)

# actual number of samples that can be shown, given number of samples to be analyzed
num_x_show = np.min([num_x_show, num_x_use])

### Loop over files, collecting data & predicitions (takes a long time)

In [None]:

CLASS_DICT = get_class_dict()
# number of pixels per image
img_size = np.prod(target_size)
# preallocate arrays collecting the label (y) values and y scores of all samples
arr_y = np.empty(img_size * num_x_use, dtype=np.uint8)
arr_yscore = np.empty(img_size * num_x_use, dtype=np.float32)
# array collecting the key metric for each sample individually
arr_metric = np.empty(num_x_use)

for i, fn in enumerate(file_list_x[:num_x_use]):
    # read sat image tile
    x = io.imread(os.path.join(dir_eval, dir_x, fn))
    # -------------------------------------------------------------------------------- 
    # revise this part if/once non-covered parts of image tiles are labeled as such
    # --------------------------------------------------------------------------------
    # determine invalid pixels (for now defined as those with a vale of zero
    # in the first band). Variable mask could be used to create a masked array,
    # but scikit-learn does not support masked arrays
    mask = x[:,:,0] == 0;
    # scale x
    x = x/255.0
    # read corresponding label tile
    y = io.imread(os.path.join(dir_eval, dir_y, fn))
    # set masked values: first, set zeros in label file to 'no road' value...
    y[np.logical_and(np.logical_not(mask), np.logical_not(y))] = CLASS_DICT["no_road"]
    # then set pixel positions found to not belong to image to 'no_img' value
    y[mask] = CLASS_DICT["no_img"]
    # if the model used for prediction is a binary one, set any value above no_road to any_road
    if model_is_binary:
        y[y>CLASS_DICT["no_road"]] = CLASS_DICT["any_road"]
    # copy flattened labels in array
    arr_y[i*img_size:(i+1)*img_size] = y.ravel()
    # predict
    print("analyzing {0:s} ({1:0.0f} % non-image pixels)...".format(fn, 100*np.sum(mask)/img_size))
    yscore = model.predict(x.reshape((1,) + target_size +(4,)))
    # copy flattened prediction in array
    arr_yscore[i*img_size:(i+1)*img_size] = yscore.ravel()
    # compute and store metric used for sorting
    _, _, roc_auc_dict, _, _, pr_auc_dict, _, _, _ = multiclass_roc_pr(y.ravel(), yscore.ravel())

    if len(pr_auc_dict) == 0:
        arr_metric[i] = None
    elif len(pr_auc_dict) == 1:
        # binary labels
        arr_metric[i] = pr_auc_dict[list(pr_auc_dict.keys())[0]]
    else:
        # pick average
        arr_metric[i] = pr_auc_dict["avg"]

### Compute and plot metrics on ensemble of data

In [None]:
(fpr_dict, 
tpr_dict,
roc_auc_dict, 
precision_dict, 
recall_dict,
pr_auc_dict,
beven_ix_dict,
beven_thresh_dict,
reduced_class_dict) = multiclass_roc_pr(arr_y, arr_yscore)

# set up summary figure
fig_sum, axs = plt.subplots(2, 2, figsize=(10, 10))
plot_pr(recall_dict, precision_dict, pr_auc_dict, beven_ix_dict, beven_thresh_dict, axs[0, 0])
plot_roc(fpr_dict, tpr_dict, roc_auc_dict, axs[0, 1])
plt.show()

In [None]:
# prepare index for showing samples
samples_ix = utils.gen_sample_index(num_x_use, num_x_show, mode_sample_choice=mode_sample_choice, metric=arr_metric)

### Show individual samples

In [None]:
for ix in samples_ix:
    fn = file_list_x[ix]
    # read sat image tile
    x = io.imread(os.path.join(dir_eval, dir_x, fn))
    # retrieve labels
    y = arr_y[ix*img_size:(ix+1)*img_size].reshape(target_size)
    # retrieve y score (prediction)
    yscore = arr_yscore[ix*img_size:(ix+1)*img_size].reshape(target_size)
    
    fig_sample = show_sample_prediction(x, y, yscore, cmap=cmap_yscore, title=fn)
    


In [None]:
# halt
sys.exit()

## Outdated stuff
which is not used currently but may come in handy later

In [None]:
# a quick test of multiclass_roc
multiclass_roc(np.r_[0, 40, 40, 0, 255, 255, 0, 255], np.empty(8))

In [None]:

# input arguments to Keras' ImageDataGenerator - be sure not to include any image augmentation here!
data_gen_args = dict(data_format="channels_last")

# batch size for summary stats without visualization (the more, the more efficient, but limited by memory)
batch_size = 3


# 'steps' input par into evaluate_generator
steps =  num_x_use // batch_size


### Run evaluation: only numeric values

In [None]:
# set up test gen with a batch size as large as possible for efficiency reasons
test_gen = trainGenerator(batch_size, eval_dir, img_dir, label_dir,
                        data_gen_args, save_to_dir = None, image_color_mode="rgba", target_size=target_size)
res = model.evaluate_generator(test_gen, steps=steps, workers=1, use_multiprocessing=True, verbose=1)

In [None]:
model.metrics_names
res

### Run prediction for display of images and more sophisticated evaluation

In [None]:
pred = model.predict_generator(test_gen, steps=steps, workers=1, use_multiprocessing=True, verbose=1)

In [None]:
plt.imshow(pred[5].reshape(target_size), cmap='gray');
plt.colorbar()

### Set up ImageDataGenerator

In [None]:
# this generator is supposed to yield single images and matching labels, hence batch size = 1
#batch1_test_gen = trainGenerator(1, eval_dir, img_dir, label_dir,
#                        data_gen_args, save_to_dir = None, image_color_mode="rgba", target_size=target_size)
# preallocate linear arrays for collecting flattened predicition and label data