In [None]:
import pandas as pd
import numpy as np
from metrics import metrics
from matplotlib import pyplot as plt

In [None]:
n_test_img = 90
path_to_input = "/dccstor/geofm-finetuning/flood_mapping/inferences/inputs/input{0}.npy"
path_to_pred = "/dccstor/geofm-finetuning/flood_mapping/inferences/pred{0}.npy"
path_to_label = "/dccstor/geofm-finetuning/flood_mapping/inferences/label{0}.npy"

inputs = list()
preds = list()
labels = list()

for index in range(n_test_img):
    inputs.append(np.load(path_to_input.format(index)))
    preds.append(np.load(path_to_pred.format(index)))
    labels.append(np.load(path_to_label.format(index)))

assert len(preds)==n_test_img, "Number of images does not match number of predictions"
assert len(labels)==n_test_img, "Number of images does not match number of labels"


In [None]:
n_examples = 5
rand_idx = np.random.randint(low=0, high=n_test_img, size=n_examples)
f, axarr = plt.subplots(n_examples,3,figsize=(18, 18))

for i, img_index in enumerate(rand_idx):
    input_img = inputs[img_index].squeeze()[0:3, :,].transpose([1, 2, 0])
    axarr[i,0].imshow(input_img)
    axarr[i,1].imshow(preds[img_index]*255, cmap="gray")
    axarr[i,2].imshow(labels[img_index]*120, cmap="gray")

In [None]:
test_iou = list()
test_accuracy = list()
test_bal_accuracy = list()
test_precision = list()
test_precision_weighted = list()
test_recall = list()
test_recall_weighted = list()
test_f1 = list()
test_f1_micro = list()
test_f1_macro = list()
test_f0_5 = list()
test_f0_1 = list()
test_f10 = list()
test_precision_class_1 = list()
test_precision_class_2 = list()
test_recall_class_1 = list()
test_recall_class_2 = list()
test_fscore_class_1 = list()
test_fscore_class_2 = list()
test_iou_class_1 = list()
test_iou_class_2 = list()

for index, pred in enumerate(preds):
    accuracy, bal_accuracy, precision, precision_weighted, recall, recall_weighted, iou_score, f1_score, f1_micro, f1_macro, f0_5, f0_1, f10, precision_per_class, recall_per_class, fscore_per_class, support_per_class, iou_score_per_class = metrics(
        y_pred=pred, y_true=labels[index])
    test_iou.append(iou_score)
    test_accuracy.append(accuracy)
    test_bal_accuracy.append(bal_accuracy)
    test_precision.append(precision)
    test_precision_weighted.append(precision_weighted)
    test_recall.append(recall)
    test_recall_weighted.append(recall_weighted)
    test_f1.append(f1_score)
    test_f1_micro.append(f1_micro)
    test_f1_macro.append(f1_macro)
    test_f0_5.append(f0_5)
    test_f0_1.append(f0_1)
    test_f10.append(f10)

    if len(precision_per_class) > 1:
        test_precision_class_1.append(precision_per_class[0])
        test_precision_class_2.append(precision_per_class[1])
        test_recall_class_1.append(recall_per_class[0])
        test_recall_class_2.append(recall_per_class[1])
        test_fscore_class_1.append(fscore_per_class[0])
        test_fscore_class_2.append(fscore_per_class[1])
        test_iou_class_1.append(iou_score_per_class[0])
        test_iou_class_2.append(iou_score_per_class[1])

results_df = pd.DataFrame({"Test IoU": [np.mean(test_iou)],
                           "Test Acc": [np.mean(test_accuracy)],
                           "Test Prec": [np.mean(test_precision)],
                           "Test Prec Weighted": [np.mean(test_precision_weighted)],
                           "Test Recall": [np.mean(test_recall)],
                           "Test Recall Weighted": [np.mean(test_recall_weighted)],
                           "Test Bal Acc": [np.mean(test_bal_accuracy)],
                           "Test F1": [np.mean(test_f1)],
                           "Test f1_micro": [np.mean(test_f1_micro)],
                           "Test f1_macro": [np.mean(test_f1_macro)],
                           "Test F0.1": [np.mean(test_f0_1)],
                           "Test F0.5": [np.mean(test_f0_5)],
                           "Test F10": [np.mean(test_f10)],
                           "Test F1 class 1": [np.mean(test_fscore_class_1)],
                           "Test F1 class 2": [np.mean(test_fscore_class_2)],
                           "Test Recall class 1": [np.mean(test_recall_class_1)],
                           "Test Recall class 2": [np.mean(test_recall_class_2)],
                           "Test Precision class 1": [np.mean(test_precision_class_1)],
                           "Test Precision class 2": [np.mean(test_precision_class_2)],
                           "Test IoU class 1": [np.mean(test_iou_class_1)],
                           "Test IoU class 2": [np.mean(test_iou_class_2)],
                           })
print(results_df)