[![Open In SageMaker Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/SatelliteVu/SatelliteVu-AWS-Disaster-Response-Hackathon/blob/main/deep_learning/evaluate_and_visualize.ipynb)

In this notebook, we load a trained model and evaluate it using sklearn.

In [None]:
import os
import sys
import glob
import numpy as np
import keras
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib import colors
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

from config import common_config, dataset_config, training_config, model_config, test_config
from datagen import get_dataset
import model_resunet

In [None]:
# Parameters
input_data_dir = os.path.join(common_config["INPUT_DIR"], "data")

model_dir = os.path.join(common_config["OUTPUT_DIR"], "model")

test_dataset_pattern = input_data_dir + dataset_config["TEST_DATASET_PATTERN"]

model_pattern = model_dir + "/*{}*.h5".format(test_config["wandb_model_nickname"])

In [None]:
# Look for a model
model_paths = glob.glob(model_pattern)
if not len(model_paths) == 1:
    print("One model must be provided.")
    sys.exit()

# Define ResUNet model
model = model_resunet.get_model([model_config["IMG_SIZE"][0],model_config["IMG_SIZE"][1],len(dataset_config["INPUT_FEATURES"])])

# Load model weights
model.load_weights(os.path.join(model_paths[0]))

In [None]:
# load tf record dataset
test_dataset = get_dataset(
      test_dataset_pattern,
      data_size=model_config["IMG_SIZE"][0],
      sample_size=model_config["IMG_SIZE"][0],
      batch_size=training_config["BATCH_SIZE"],
      num_in_channels=len(dataset_config["INPUT_FEATURES"]),
      compression_type=None,
      clip_and_normalize=False,
      clip_and_rescale=True,
      random_crop=False,
      center_crop=False,
      shuffle=False)

In [None]:
# Get all samples from TFRecords in arrays
first_it = True
for x, y in test_dataset:
    if first_it:
        test_data_inputs = x
        test_data_targets = y
        first_it = False
    else:
        test_data_inputs = np.concatenate((test_data_inputs, x), axis=0)        
        test_data_targets = np.concatenate((test_data_targets, y), axis=0)        
test_data_inputs = np.asarray(test_data_inputs)
y_true = np.asarray(test_data_targets)

In [None]:
# Predict
y_pred=list()
for sample in test_data_inputs:
    sample_exp = np.expand_dims(sample, axis=0)
    pred = model.predict(sample_exp)
    y_pred.append(pred)
masks = np.round(y_pred)

In [None]:
# Evaluate using sklearn
from sklearn.metrics import precision_recall_fscore_support as score

# Flatten results before evaluation
previous_fire_mask_flat = test_data_inputs[:, :, :, -1].flatten()
persistence_mask_flat = previous_fire_mask_flat
y_pred_mask_flat = np.expand_dims(np.array(y_pred).squeeze(), axis=-1).flatten()
masks_flat = masks.flatten()
y_true_flat = y_true.flatten()

# Metrics for predicitions
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_true_flat, masks_flat)
print('Accuracy: %f' % accuracy)
score_a = score(y_true_flat, masks_flat)
print("score_a", score_a)
# precision tp / (tp + fp)
precision = precision_score(y_true_flat, masks_flat)
print("precision", precision)
# recall: tp / (tp + fn)
recall = recall_score(y_true_flat, masks_flat)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_true_flat, masks_flat)
print('F1 score: %f' % f1)
# confusion matrix
matrix = confusion_matrix(y_true_flat, masks_flat)
print(matrix)

# # Metrics for baseline
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_true_flat, persistence_mask_flat)
print('Accuracy: %f' % accuracy)
score_a = score(y_true_flat, persistence_mask_flat)
print("score_a", score_a)
# precision tp / (tp + fp)
precision = precision_score(y_true_flat, persistence_mask_flat)
print("precision", precision)
# recall: tp / (tp + fn)
recall = recall_score(y_true_flat, persistence_mask_flat)
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(y_true_flat, persistence_mask_flat)
print('F1 score: %f' % f1)
# confusion matrix
matrix = confusion_matrix(y_true_flat, persistence_mask_flat)
print(matrix)

In [None]:
# Output visualisation
output_titles = ["Input previous day fire mask", "Ground truth next day fire mask", "Predicted next day fire mask" ]

n_rows = 5
n_features = len(output_titles)

CMAP = colors.ListedColormap(['silver', 'orangered'])
BOUNDS = [0., 1.]
NORM = colors.BoundaryNorm(BOUNDS, CMAP.N)

fig = plt.figure(figsize=(15,6.5))

for i in range(n_rows):
    for j in range(n_features):
        plt.subplot(n_rows, n_features + 1, i * (n_features + 1) + j + 1)
        if i == 0:
            plt.title(output_titles[j], fontsize=13)
        if j == 0:
            plt.imshow(test_data_inputs[i, :, :, -1], cmap=CMAP, norm=NORM) 
        if j == 1:
            plt.imshow(y_true[i, :, :, 0], cmap=CMAP, norm=NORM)
        if j == 2:
            plt.imshow(masks.squeeze(axis=1)[i, :, :, 0], cmap=CMAP, norm=NORM) 
        plt.axis('off')
plt.tight_layout()