# Import Librairies

In [1]:
import glob
import tensorflow as tf

# Load Model & Metrics

What indicators will we use ?
- Accuracy is a commonly used metric to evaluate the performance of classification models. It simply measures the percentage of correct predictions out of all predictions.
- Precision measures the proportion of correctly classified pixels among those predicted as belonging to the class of interest. It is calculated as the number of true positives (pixels correctly classified as belonging to the class of interest) divided by the total number of pixels predicted as belonging to the class of interest. The higher it is, the better the segmentation quality.
- Recall measures the proportion of correctly classified pixels among all pixels actually belonging to the class of interest. It is calculated as the number of true positives divided by the total number of pixels belonging to the class of interest.
- Mean Intersection over Union is a performance measure commonly used in image segmentation. It measures the similarity between the predicted segmentation and the reference segmentation by computing the ratio of the intersection between the two segments over their union.
- The F1 score is a performance measure commonly used in classification and can also be applied to segmentation problems. It is a harmonic mean of precision and recall for a class of interest.
- The EDI (Error Dispersion Index) is an index that allows to evaluate the quality of an image segmentation. More precisely, it measures the dispersion of the prediction errors compared to the dispersion of the true class labels. The EDI is defined as the variance of the prediction errors divided by the variance of the true class labels.
- The EII (Error Imbalanced Index) is a measure of the difference between the average prediction errors for the most and least frequent classes in a multi-class dataset. This measure quantifies the ability of a model to predict the less frequent classes well.

In [2]:
def recall(y_true, y_pred):
    true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))
    possible_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true, 0, 1)))
    return true_positives / (possible_positives + 1e-8)

def precision(y_true, y_pred):
    true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))
    predicted_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_pred, 0, 1)))
    return true_positives / (predicted_positives + 1e-8)

def f1(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * ((p * r) / (p + r + 1e-8))

def edi(y_true, y_pred):
    var_error = tf.math.reduce_variance(tf.math.abs(tf.math.subtract(y_true, y_pred)), axis=[1, 2])
    var_true = tf.math.reduce_variance(y_true, axis=[1, 2])
    edi = tf.math.divide(var_error, var_true + var_error)
    idx = tf.where(tf.math.is_inf(edi))
    return tf.reduce_mean(tf.tensor_scatter_nd_update(edi, idx, tf.zeros(len(idx))))

def eii(y_true, y_pred):
    freq = tf.math.reduce_sum(y_true, axis=[1, 2])
    max_idx, min_idx = tf.argmax(freq, axis=1), tf.argmin(freq, axis=1)
    errors = tf.reduce_mean(tf.math.abs(tf.math.subtract(y_true, y_pred)), axis=[1, 2])
    return (tf.gather(errors, max_idx, axis=1) - tf.gather(errors, min_idx, axis=1)) / tf.reduce_mean(errors)

In [3]:
metrics = [tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.OneHotMeanIoU(num_classes=40), edi, eii, f1]

In [4]:
model = tf.keras.models.load_model('output/model_unetV2.h5')
model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=metrics)







# Load Data

In [5]:
src_test = sorted(glob.glob('data/leftImg8bit/val/*/*.png'))
seg_test = sorted(glob.glob('data/gtFine/val/*/*_labelIds.png'))

# Pre-Processing Data

In [6]:
def process_path(src_path, seg_path):
    src = tf.io.read_file(src_path) # Returns a Tensor with the entire contents of the input filename.
    src = tf.image.decode_png(src, channels=3) # Decode a PNG-Encoded image to a Tensor (RGB Image -> 3).
    src = tf.image.convert_image_dtype(src, tf.float32) # Convert image -> dtype.

    seg = tf.io.read_file(seg_path)
    seg = tf.image.decode_png(seg, channels=1)
    seg = tf.squeeze(seg, -1)
    seg = tf.one_hot(seg, depth=40)
    return src, seg

def preprocess(image, mask):
    input_src = tf.image.resize(image, (128, 256), method='nearest') # Resized Image.
    input_seg = tf.image.resize(mask, (128, 256), method='nearest')
    input_src = input_src / 255.0 # Normalize the values to range from 0 to 1.
    return input_src, input_seg

# Create Test Dataset

In [7]:
src_list_d = tf.data.Dataset.list_files(src_test, shuffle=False) # To create a dataset of all files in the list.
seg_list_d = tf.data.Dataset.list_files(seg_test, shuffle=False) # The dataset here contains original images (without preprocessing).

src_filenames = tf.constant(src_test) # Creates a constant Tensor from a Tensor-like object (list).
seg_filenames = tf.constant(seg_test)

dataset = tf.data.Dataset.from_tensor_slices((src_filenames, seg_filenames)) # To create a dataset from a list (and tuples).
# It's actually a dataset of path.

src_ds = dataset.map(process_path) # Map Processing Path Function to the dataset.
processed_src_ds = src_ds.map(preprocess)

# Evaluate Model

In [8]:
test_dataset = processed_src_ds.cache().batch(1)
print(test_dataset.element_spec)
res = model.evaluate(test_dataset)

(TensorSpec(shape=(None, 128, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 128, 256, 40), dtype=tf.float32, name=None))


2023-05-11 03:32:59.804188: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [500]
	 [[{{node Placeholder/_1}}]]




In [9]:
metrics = model.get_metrics_result()
print(metrics)

{'loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.6595203>, 'categorical_accuracy': <tf.Tensor: shape=(), dtype=float32, numpy=0.8279995>, 'precision': <tf.Tensor: shape=(), dtype=float32, numpy=0.11982077>, 'recall': <tf.Tensor: shape=(), dtype=float32, numpy=0.98107916>, 'one_hot_mean_io_u': <tf.Tensor: shape=(), dtype=float32, numpy=0.27362415>, 'edi': <tf.Tensor: shape=(), dtype=float32, numpy=0.9990971>, 'eii': <tf.Tensor: shape=(), dtype=float32, numpy=0.12334498>, 'f1': <tf.Tensor: shape=(), dtype=float32, numpy=0.21432428>}


- If the EII is zero, it means that the model has equal performance on all classes. If the EII is positive, it indicates that the model has lower performance on less frequent classes, while if the EII is negative, it indicates that the model has higher performance on less frequent classes.

- If the EDI is close to zero, it means that the dispersion of prediction errors is small compared to the dispersion of true class labels. This indicates that the segmentation is of good quality. If the EDI is high, it means that the dispersion of the prediction errors is large compared to the dispersion of the true class labels. This indicates that the segmentation is of poor quality.