#### Import statements

In [1]:
import torch
import shutil
import os
import numpy as np

#### Load the drive folder containing all required files

In [2]:
# mount the drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# access the drive folder containing everything we need
%cd /content/drive/My Drive/Colab environments/Risiko! DL

# check that we are in the desired folder
%ls

Mounted at /content/drive
/content/drive/My Drive/Colab environments/Risiko! DL
 [0m[01;34m3D_models[0m/
 [01;34mbackgrounds[0m/
 coco_risiko.yaml
 custom_complete_yolo.yaml
 custom_yolo.yaml
 [01;34mdatasets[0m/
 [01;34mpre_trained_weights[0m/
 [01;34mreal_images[0m/
'Risiko!_Synthetic_Dataset_Creator.ipynb'
'Risiko! Test.ipynb'
 Split_train_test_val.ipynb
 [01;34msynthetic_dataset[0m/
 [01;34msynthetic_images[0m/
 Tanks_flags_detection_training.ipynb
 Test_detection.ipynb
 test_example.txt
 test.txt
 [01;34mtraining_results[0m/
 [01;34myolov5[0m/


####Load the weights of the models
- $only\_synthetic$ contains the training results of the model trained using only synthetic images;
- $synthetic\_and\_real$ contains the training results of the model trained using both synthetic and real images.

In [3]:
# generic path to the weights folder
weights_folder = 'training_results'
weights_path = os.path.join(os.getcwd(), weights_folder)

# specific path to weigths obtained with only synthetic images
specific_folder_syn = 'only_synthetic/weights/best.pt'
syn_weights_path = os.path.join(weights_path, specific_folder_syn)

# specific path to weigths obtained with synthetic and real images
specific_folder_compl = 'synthetic_and_real/weights/best.pt'
compl_weights_path = os.path.join(weights_path, specific_folder_compl)

#### Clone the GitHub repository yolov5 and install requirements

In [4]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -qr requirements.txt

fatal: destination path 'yolov5' already exists and is not an empty directory.
/content/drive/My Drive/Colab environments/Risiko! DL/yolov5
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m628.0/628.0 kB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h

#### Load the models

In [5]:
# set the device
device = torch.device("cuda" if torch.cuda.is_available else "cpu")

# model trained only on synthetic images
syn_model = torch.hub.load(os.getcwd(), 'custom', path=syn_weights_path, source='local', force_reload=True)
syn_model.to(device)

# model trained only on synthetic images
compl_model = torch.hub.load(os.getcwd(), 'custom', path=compl_weights_path, source='local', force_reload=True)
compl_model.to(device)

YOLOv5 🚀 2023-7-12 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 157 layers, 7042489 parameters, 0 gradients, 15.9 GFLOPs
Adding AutoShape... 
YOLOv5 🚀 2023-7-12 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 157 layers, 7042489 parameters, 0 gradients, 15.9 GFLOPs
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 32, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
  

####Load the paths to the test images and the paths to the labels files

In [6]:
# set the models in evaluation mode
syn_model.eval()
compl_model.eval()


# IMAGES

# path to the synthetic dataset
syn_path = '/content/drive/My Drive/Colab environments/Risiko! DL/datasets/test/synthetic/images'
# path to the real dataset
real_path = '/content/drive/My Drive/Colab environments/Risiko! DL/datasets/test/real/images'

# Get the lists of files in the directories
syn_files = os.listdir(syn_path)
real_files = os.listdir(real_path)

# create the list of paths to the synthetic test images
test_synthetic_images = []

for file_name in syn_files:
    # full file path
    file_path = os.path.join(syn_path, file_name)
    # add the image to the list
    test_synthetic_images.append(file_path)

# create the list of paths to the real test images
test_real_images = []

for file_name in real_files:
    # full file path
    file_path = os.path.join(real_path, file_name)
    # add the image to the list
    test_real_images.append(file_path)


# LABELS

# paths to the labels
syn_labels_path = '/content/drive/My Drive/Colab environments/Risiko! DL/datasets/test/synthetic/labels'
real_labels_path = '/content/drive/My Drive/Colab environments/Risiko! DL/datasets/test/real/labels'

# get the lists of files in each directory
syn_files = os.listdir(syn_labels_path)
real_files = os.listdir(real_labels_path)

# create the list of synthetic test labels
test_synthetic_labels = []

for file_name in syn_files:
    # full file path
    file_path = os.path.join(syn_labels_path, file_name)
    # add the image to the list
    test_synthetic_labels.append(file_path)

# create the list of real test labels
test_real_labels = []

for file_name in real_files:
    # full file path
    file_path = os.path.join(real_labels_path, file_name)
    # add the image to the list
    test_real_labels.append(file_path)

####Since the ground truth labels contain the bounding boxes in format [x_center, y_center, width, height], we use the function below to convert them in the format [x_min, y_min, x_max, y_max]

In [7]:
# Convert the input bbox from [x_center, y_center, width, height] format to
# [x_min, y_min, x_max, y_max] format.
def convert_bbox(bbox):

    x_center = bbox[0]
    y_center = bbox[1]
    width = bbox[2]
    height = bbox[3]

    x_min = x_center - width / 2
    y_min = y_center - height / 2
    x_max = x_center + width / 2
    y_max = y_center + height / 2

    return [x_min, y_min, x_max, y_max]

####Extract bounding boxes and class labels from the files containing the ground truth of the test images

In [8]:
# Extract bboxes and labels from .txt files.
# $labels is the list of file paths to the .txt files.
# Return two lists:
#   - $true_bboxes containing the bboxes;
#   - $true_labels containing the labels.
def extract_bboxes_and_labels(labels):

    # store the true bboxes values of synthetic images
    true_bboxes = []

    # store the true class label values of synthetic images
    true_labels = []

    # iterate over the labels
    for label_file in labels:
        # open the current file
        with open(label_file, "r") as f:
            # class label values in the current file
            current_file_classes = []
            # bboxes values in the current file
            current_file_bboxes = []

            # iterate over the lines: each line is associated with a true instance
            for line in f:
                # extract values from a line
                string_values = line.split()
                # append the class value in the current line of the current file
                current_file_classes.append(float(string_values[0]))
                # values of the bbox in the current line of the current file
                bbox = []
                for i in range(1, len(string_values)):
                    bbox.append(float(string_values[i]))
                # convert the bbox format from (x_center, y_center, width, height)
                # to (x_min, y_min, x_max, y_max)
                conv_bbox = convert_bbox(bbox)
                # append the bbox in the current line of the current file
                current_file_bboxes.append(conv_bbox)

            # append the bboxes related to the current file
            true_bboxes.append(current_file_bboxes)
            # append the class labels related to the current file
            true_labels.append(current_file_classes)

    return true_bboxes, true_labels

In [9]:
# store the true bboxes values and the true class label values of synthetic images
syn_true_bboxes, syn_true_labels = extract_bboxes_and_labels(test_synthetic_labels)

# store the true bboxes values and the true class label values of real images
real_true_bboxes, real_true_labels = extract_bboxes_and_labels(test_real_labels)

##Inference of the models
We compute inference for:
- $syn\_model$ (model trained only with synthetic images) on the test set containing only synthetic images;
- $syn\_model$ (model trained only with synthetic images) on the test set containing only real images;
- $compl\_model$ (model trained with both synthetic and real images) on the test set containing only synthetic images;
- $compl\_model$ (model trained with both synthetic and real images) on the test set containing only real images;
- $syn\_model$ (model trained only with synthetic images) on the whole test set;
- $compl\_model$ (model trained with both synthetic and real images) on the whole test set;

In [10]:
# inference on synthetic images of the model trained only on synthetic images
results_sm_si = syn_model(test_synthetic_images, size=640)

# inference on real images of the model trained only on synthetic images
results_sm_ri = syn_model(test_real_images, size=640)

# inference on synthetic images of the model trained on both synthetic and real images
results_cm_si = compl_model(test_synthetic_images, size=640)

# inference on real images of the model trained on both synthetic and real images
results_cm_ri = compl_model(test_real_images, size=640)

# create complete test set and ground truth
test_images = test_synthetic_images + test_real_images
true_bb = syn_true_bboxes + real_true_bboxes
true_lb = syn_true_labels + real_true_labels

# inference on all test images of the model trained on only synthetic images
results_sm = syn_model(test_images, size=640)

# inference on all test images of the model trained on both synthetic and real images
results_cm = compl_model(test_images, size=640)

## Evaluation measures
The evaluation measures that we have decided to compute are:
- $class\_mean\_IOU$: is the average Intersection Over Union for each class label, considering all images in a given test set;
- $mean\_IOU$: is the mean Intersection Over Union over all class labels, considering all images in a given test set;
- $class\_precision$: precision for a single class label;
- $average\_precision$: is the average of the precisions for each class label;
- $class\_recall$: recall for a single class label;
- $average\_recall$: is the average of the recalls for each class label;
- $f1\_class$: f1 for a single class label;
- $average\_f1$: is the average of the f1 scores for each class label.

####Implementation of the evaluation measures

In [17]:
IMAGES_WIDTH = 1920
IMAGES_HEIGHT = 1280

CONFIDENCE_THR = 0.5
IOU_THR = 0.3

# Normalize a bbox in image of size IMAGES_WIDTH x IMAGES_HEIGHT to values in [0, 1].
# The bbox must be in format [x_min, y_min, x_max, y_max].
# Return the normalized bbox.
def normalize_predicted_bbox(bbox):

    x_min = bbox[0] / IMAGES_WIDTH
    y_min = bbox[1] / IMAGES_HEIGHT
    x_max = bbox[2] / IMAGES_WIDTH
    y_max = bbox[3] / IMAGES_HEIGHT

    return [x_min, y_min, x_max, y_max]


# Print all (key, value) pairs in the dictionary $dic.
# The dictionary must have float numbers as values.
# As a first row, the string $s is printed before the dictionary.
def print_dictionary(dic, s):
    print(s)
    for key in dic.keys():
        print(str(key) + ": " + "{:.3f}".format(dic[key]))




# Compute the IOU between bboxes $bbox_1 and $bbox_2.
# $bbox_1 and $bbox_2 are in [x_min, y_min, x_max, y_max] format.
# Return the IOU between $bbox_1 and $bbox_2.
def calculate_iou(bbox_1, bbox_2):

    # Compute the intersection coordinates
    x_left = max(bbox_1[0], bbox_2[0])
    y_top = max(bbox_1[1], bbox_2[1])
    x_right = min(bbox_1[2], bbox_2[2])
    y_bottom = min(bbox_1[3], bbox_2[3])

    # compute the areas of both bounding bboxes
    area_bbox_1 = (bbox_1[2] - bbox_1[0] + 1) * (bbox_1[3] - bbox_1[1] + 1)
    area_bbox_2 = (bbox_2[2] - bbox_2[0] + 1) * (bbox_2[3] - bbox_2[1] + 1)

    # compute the area of the intersection
    intersection_area = max(0, (x_right - x_left + 1)) * max(0, (y_bottom - y_top + 1))

    # calculate the IOU
    return intersection_area / float(area_bbox_1 + area_bbox_2 - intersection_area)


# Compute the mean IOU for each class between predicted bboxes and true ones in a given image.
# $predicted_bboxes: predicted bboxes in the image.
# $true_bboxes: ground truth bboxes in the image.
# $predicted_classes: predicted class labels for each bbox in the image.
# $true_classes: true class labels of true bboxes in the image.
# Return a dictionary $class_mean_IOU with classes as keys and mean IOU score for that class in the image.
def class_mean_IOU_image(predicted_bboxes, true_bboxes, predicted_classes, true_classes):

    # dictionary where each key is a class label and the value is the list of IOU for that label
    class_IOU_list = {}

    # iterate over the predicted bboxes
    for i in range(len(predicted_bboxes)):

        # list of IOU scores between the current predicted bbox and all the true bboxes with the same class
        current_bbox_IOU_scores = []

        # class label of the current predicted bbox
        predicted_class = predicted_classes[i].tolist()

        # initialize the entry of the dictionary
        class_IOU_list[predicted_class] = []

        # iterate over the true bboxes to compute the IOU scores for the current predicted bbox
        for j in range(len(true_bboxes)):
            if predicted_class == true_classes[j]:
                current_bbox_IOU_scores.append(
                    calculate_iou(normalize_predicted_bbox(predicted_bboxes[i].tolist()), true_bboxes[j]))
        if len(current_bbox_IOU_scores) > 0:
            class_IOU_list[predicted_class].append(np.max(current_bbox_IOU_scores))
        else:
            class_IOU_list[predicted_class].append(0)

    # dictionary where each class label is a key and the associated value is the mean IOU with that label
    class_mean_IOU = {}

    # fill the dictionary
    for class_label in class_IOU_list.keys():
        class_mean_IOU[class_label] = np.mean(class_IOU_list[class_label])

    return class_mean_IOU


# Compute the mean IOU for each class between predicted bboxes and true ones for a dataset of images.
# $results is the result of the application of yolov5 model to a batch of images.
# $true_bboxes: ground truth bboxes. $true_bboxes[i] contains true bboxes in image $i in the batch of images.
# $true_classes: true class labels of true bboxes. $true_classes[i] contains the class label of $true_bboxes[i].
# Return a dictionary $class_mean_IOU with classes as keys and mean IOU score for that class in the set of images.
def class_mean_IOU(results, true_bboxes, true_classes):

    # dictionary with class labels as keys and the corresponding lists of IOU scores as values
    IOU_scores = {}

    # iterate over the images
    for i in range(len(results.xyxy)):

        # result of inference on image $i in the batch
        predictions = results.xyxy[i]
        # predicted bboxes
        pred_boxes = predictions[:, :4] # [xmin, ymin, xmax, ymax] for each bbox
        # corresponding predicted class labels
        pred_labels = predictions[:, 5] # label for each prediction in the image

        # dictionary of mean IOU values for the current image
        current_image_IOU = class_mean_IOU_image(pred_boxes, true_bboxes[i], pred_labels, true_classes[i])

        # iterate over the dictionary to fill the global list
        for class_label in current_image_IOU.keys():
            if class_label not in IOU_scores:
                IOU_scores[class_label] = []
            IOU_scores[class_label].append(current_image_IOU[class_label])

    # compute the mean IOU for each class and return it
    class_mean_IOU = {}
    for class_label in IOU_scores.keys():
        class_mean_IOU[class_label] = np.mean(IOU_scores[class_label])

    return class_mean_IOU


# Compute the mean IOU, considering all class mean IOU values.
# $classes_mean_IOU is the dictionary containing the class labels as keys and related class mean IOU as value.
# Return the value $mean_IOU: mean IOU considering each class mean IOU.
def mean_IOU(classes_mean_IOU):

    # sum IOU over all classes
    sum_IOU = 0

    # number of labels
    num_classes = 0

    # iterate over the dictionary
    for class_label in classes_mean_IOU.keys():
        sum_IOU += classes_mean_IOU[class_label]
        num_classes += 1

    return sum_IOU / num_classes


# Compute the true positives and false positives for each class.
# $results is the result of the application of the yolov5 model to a batch of images.
# $true_bboxes: ground truth bboxes of a batch of images.
# $true_classes: true class labels of true bboxes.
# $conf_thr is the confidence threshold.
# $iou_thr is the IOU threshold.
# Return two dictionaries:
#   - $class_tp_dic contains the number of true positives for each class;
#   - $class_fp_dic contains the number of false positives for each class.
def class_tp_fp(results, true_bboxes, true_classes, conf_thr=CONFIDENCE_THR, iou_thr=IOU_THR):

    # dictionary where each key is a class label and the value is the number of true positives of that class
    # in the image
    class_tp_dic = {}
    # dictionary where each key is a class label and the value is the number of false positives of that class
    # in the image
    class_fp_dic = {}

    # iterate over the images
    for i in range(len(results.xyxy)):

        # result of inference on image $i in the batch
        predictions = results.xyxy[i]
        # predicted bboxes
        pred_bboxes = predictions[:, :4] # [xmin, ymin, xmax, ymax] for each bbox
        # corresponding confidence scores
        confidence_scores = predictions[:, 4]
        # corresponding predicted class labels
        predicted_classes = predictions[:, 5] # label for each prediction in the image

        # iterate over the predicted bboxes in the current image
        for j in range(len(pred_bboxes)):

            # list of IOU scores between the current predicted bbox and all the true bboxes with the same class
            current_bbox_IOU_scores = []

            # class label of the current predicted bbox
            predicted_class = predicted_classes[j].tolist()

            # initialize the dictionaries entries if they have not been added to the dictionaries yet
            if predicted_class not in class_tp_dic:
                class_tp_dic[predicted_class] = 0
            if predicted_class not in class_fp_dic:
                class_fp_dic[predicted_class] = 0

            # iterate over the true bboxes to compute the IOU scores for the current predicted bbox
            for k in range(len(true_bboxes[i])):
                if predicted_class == true_classes[i][k]:
                    current_bbox_IOU_scores.append(
                        calculate_iou(normalize_predicted_bbox(pred_bboxes[j].tolist()), true_bboxes[i][k]))

            # check if the current predicted bbox is a false positive or a true positive
            if len(current_bbox_IOU_scores) > 0:
                if confidence_scores[j] >= conf_thr and np.max(current_bbox_IOU_scores) >= iou_thr:
                    class_tp_dic[predicted_class] += 1
                else:
                    class_fp_dic[predicted_class] += 1
            else:
                    class_fp_dic[predicted_class] += 1

    return class_tp_dic, class_fp_dic

# Compute the precision for each class.
# $tp_dic is a dictionary containing as keys the class labels and the true positives for that class label as value.
# $fp_dic is a dictionary containing as keys the class labels and the false positives for that class label as value.
# Return a dictionary with class labels as keys and related class precisions as values.
def class_precision(tp_dic, fp_dic):

    precision_dic = {}

    # iterate over the classes in the dictionaries
    for class_label in tp_dic.keys():
        precision_dic[class_label] = tp_dic[class_label] / (tp_dic[class_label] + fp_dic[class_label])

    return precision_dic


# Compute the average precision, taking into account all classes.
# $tp_dic is a dictionary containing as keys the class labels and the true positives for that class label as value.
# $fn_dic is a dictionary containing as keys the class labels and the false negatives for that class label as value.
# Return the average precision.
def average_precision(tp_dic, fp_dic):

    # overall number of tp taking into account all classes
    sum_tp = 0
    # overall number of positives taking into account all classes
    sum_pred_positives = 0

    # iterate over the dictionaries
    for class_label in tp_dic.keys():
        sum_tp += tp_dic[class_label]
        sum_pred_positives += tp_dic[class_label] + fp_dic[class_label]

    return sum_tp / sum_pred_positives


# Compute the true positives for each class.
# $results is the result of the application of the yolov5 model to a batch of images.
# $true_bboxes: ground truth bboxes of a batch of images.
# $true_classes: true class labels of true bboxes.
# $conf_thr is the confidence threshold.
# $iou_thr is the IOU threshold.
# Return the dictionary $class_tp_dic, which contains the number of true positives for each class.
def class_tp(results, true_bboxes, true_classes, conf_thr=CONFIDENCE_THR, iou_thr=IOU_THR):

    # dictionary where each key is a class label and the value is the number of true positives of that class
    # in the image
    class_tp_dic = {}

    # iterate over the images
    for i in range(len(results.xyxy)):

        # result of inference on image $i in the batch
        predictions = results.xyxy[i]
        # predicted bboxes
        pred_bboxes = predictions[:, :4] # [xmin, ymin, xmax, ymax] for each bbox
        # corresponding confidence scores
        confidence_scores = predictions[:, 4]
        # corresponding predicted class labels
        predicted_classes = predictions[:, 5] # label for each prediction in the image

        # iterate over the predicted bboxes in the current image
        for j in range(len(pred_bboxes)):

            # list of IOU scores between the current predicted bbox and all the true bboxes with the same class
            current_bbox_IOU_scores = []

            # class label of the current predicted bbox
            predicted_class = predicted_classes[j].tolist()

            # initialize the dictionary entry if it has not been added yet
            if predicted_class not in class_tp_dic:
                class_tp_dic[predicted_class] = 0

            # iterate over the true bboxes to compute the IOU scores for the current predicted bbox
            for k in range(len(true_bboxes[i])):
                if predicted_class == true_classes[i][k]:
                    current_bbox_IOU_scores.append(
                        calculate_iou(normalize_predicted_bbox(pred_bboxes[j].tolist()), true_bboxes[i][k]))

            # check if the current predicted bbox is a true positive
            if len(current_bbox_IOU_scores) > 0:
                if confidence_scores[j] >= conf_thr and np.max(current_bbox_IOU_scores) >= iou_thr:
                    class_tp_dic[predicted_class] += 1

    return class_tp_dic


# Compute the false negatives for each class.
# $results is the result of the application of the yolov5 model to a batch of images.
# $true_bboxes: ground truth bboxes of a batch of images.
# $true_classes: true class labels of true bboxes.
# Return the dictionary $class_fn_dic, which contains the number of false negatives for each class.
def class_fn(results, true_bboxes, true_classes, conf_thr=CONFIDENCE_THR, iou_thr=IOU_THR):

    # dictionary where each key is a class label and the value is the number of false negatives of that class
    # in the image
    class_fn_dic = {}

    # iterate over the images
    for i in range(len(results.xyxy)):

        # result of inference on image $i in the batch
        predictions = results.xyxy[i]
        # predicted bboxes
        pred_bboxes = predictions[:, :4] # [xmin, ymin, xmax, ymax] for each bbox
        # corresponding confidence scores
        confidence_scores = predictions[:, 4]
        # corresponding predicted class labels
        predicted_classes = predictions[:, 5] # label for each prediction in the image

        # iterate over the true bboxes in the current image
        for j in range(len(true_bboxes[i])):

            # list of IOU scores between the current true bbox and all the predicted bboxes with the same class
            current_bbox_IOU_scores = []

            # class label of the current true bbox
            true_class = true_classes[i][j]

            # initialize the dictionary entry if it has not been added yet
            if true_class not in class_fn_dic:
                class_fn_dic[true_class] = 0

            # iterate over the predicted bboxes to compute the IOU scores with the current true bbox
            for k in range(len(pred_bboxes.tolist())):
                if true_class == predicted_classes[k].tolist():
                    current_bbox_IOU_scores.append(
                        calculate_iou(normalize_predicted_bbox(pred_bboxes[k].tolist()), true_bboxes[i][j]))

            # check if the current predicted bbox is a false negative
            if len(current_bbox_IOU_scores) != 0:
                if (confidence_scores[np.argmax(current_bbox_IOU_scores)] < conf_thr or
                    np.max(current_bbox_IOU_scores) < iou_thr):
                    class_fn_dic[true_class] += 1
            else:
                class_fn_dic[true_class] += 1

    return class_fn_dic


# Compute the recall for each class.
# $tp_dic is a dictionary containing as keys the class labels and the true positives for that class label as value.
# $fn_dic is a dictionary containing as keys the class labels and the false negatives for that class label as value.
# Return a dictionary with class labels as keys and related class recalls as values.
def class_recall(tp_dic, fn_dic):

    recall_dic = {}

    # iterate over the classes in the dictionaries
    for class_label in tp_dic.keys():
        recall_dic[class_label] = tp_dic[class_label] / (tp_dic[class_label] + fn_dic[class_label])

    return recall_dic


# Compute the average recall, taking into account all classes.
# $tp_dic is a dictionary containing as keys the class labels and the true positives for that class label as value.
# $fn_dic is a dictionary containing as keys the class labels and the false negatives for that class label as value.
# Return the average recall.
def average_recall(tp_dic, fn_dic):

    # overall number of tp taking into account all classes
    sum_tp = 0
    # overall number of real positives taking into account all classes
    sum_real_positives = 0

    # iterate over the dictionaries
    for class_label in tp_dic.keys():
        sum_tp += tp_dic[class_label]
        sum_real_positives += tp_dic[class_label] + fn_dic[class_label]

    return sum_tp / sum_real_positives


# Compute the f1 score for each class.
# $class_precision is a dictionary containing precision for each class.
# $class_recall is a dictionary containing recall for each class.
# Return a dictionary with the f1 score for each class.
def class_f1(class_precision, class_recall):

    # dictionary with class labels as keys and class f1 scores as values
    class_f1 = {}

    # fill the dictionary
    for label in class_precision.keys():
        if class_precision[label] != 0 and class_recall[label] != 0:
            class_f1[label] = 2 * class_precision[label] * class_recall[label] / (class_precision[label] + class_recall[label])
        else:
            class_f1[label] = 0


    # return the dictionary
    return class_f1

def mean_f1(class_f1):

    # sum of f1 scores
    f1 = 0
    # number of classes
    classes = 0

    # iterate over the classes
    for label in class_f1.keys():
        f1 += class_f1[label]
        classes +=1

    # return the mean f1 over all classes
    return f1 / classes


# Function that applies the above evaluations and prints them.
# $string_print contains information to be printed before the results.
# $true_bboxes: ground truth bboxes of a batch of images.
# $true_classes: true class labels of true bboxes.
def evaluate_and_print(string_print, results, true_bboxes, true_labels):

    # print information regarding the model
    print()
    print("-" * 50)
    print("\n" + string_print)

    # compute the class mean IOU
    class_mean_iou = class_mean_IOU(results, true_bboxes, true_labels)
    # sort the dictionary by key, i.e. by class
    class_mean_iou = dict(sorted(class_mean_iou.items()))
    print_dictionary(class_mean_iou, "Mean IOU for each class:")

    mean_iou = mean_IOU(class_mean_iou)
    print("\nMean IOU over all classes: " + "{:.3f}".format(mean_iou))
    print("\n")


    # true positives and false positives for each class
    cl_tp, cl_fp = class_tp_fp(results, true_bboxes, true_labels)

    # sort the dictionaries by key, i.e. by class
    cl_tp = dict(sorted(cl_tp.items()))
    cl_fp = dict(sorted(cl_fp.items()))

    # compute precision for each class
    class_prec = class_precision(cl_tp, cl_fp)
    print_dictionary(class_prec, "Precision for each class:")

    # compute average precision over all classes
    av_precision = average_precision(cl_tp, cl_fp)
    print("\nAverage precision over all classes: " + "{:.3f}".format(av_precision))
    print("\n")


    # compute true positives and false negatives
    cl_tp = class_tp(results, true_bboxes, true_labels)
    cl_fn = class_fn(results, true_bboxes, true_labels)

    # sort the dictionaries by key, i.e. by class
    cl_tp = dict(sorted(cl_tp.items()))
    cl_fn = dict(sorted(cl_fn.items()))

    # compute recall for each class
    class_rec = class_recall(cl_tp, cl_fn)
    print_dictionary(class_rec, "Recall for each class:")

    # compute average recall
    av_recall = average_recall(cl_tp, cl_fn)
    print("\nAverage recall over all classes: " + "{:.3f}".format(av_recall))
    print("\n")


    # compute f1 score for each class
    f1_class = class_f1(class_prec, class_rec)
    print_dictionary(f1_class, "F1 score for each class:")

    # compute average f1 score
    f1 = mean_f1(f1_class)
    print("\nAverage F1 score over all classes: " + "{:.3f}".format(f1))
    print("\n")


####Application of the above evaluation measures to all the inference results (different models on synthetic and real test sets)

In [18]:
# MODEL TRAINED ON SYNTHETIC IMAGES: SYNTHETIC TEST SET
str_pr = "MODEL TRAINED ON SYNTHETIC IMAGES: SYNTHETIC TEST SET"
evaluate_and_print(str_pr, results_sm_si, syn_true_bboxes, syn_true_labels)

# MODEL TRAINED ON SYNTHETIC IMAGES: REAL TEST SET
str_pr = "MODEL TRAINED ON SYNTHETIC IMAGES: REAL TEST SET"
evaluate_and_print(str_pr, results_sm_ri, real_true_bboxes, real_true_labels)

# MODEL TRAINED ON ALL IMAGES: SYNTHETIC TEST SET
str_pr = "MODEL TRAINED ON ALL IMAGES: SYNTHETIC TEST SET"
evaluate_and_print(str_pr, results_cm_si, syn_true_bboxes, syn_true_labels)

# MODEL TRAINED ON ALL IMAGES: REAL TEST SET
str_pr = "MODEL TRAINED ON ALL IMAGES: REAL TEST SET"
evaluate_and_print(str_pr, results_cm_ri, real_true_bboxes, real_true_labels)

# MODEL TRAINED ONLY ON SYNTHETIC IMAGES: COMPLETE TEST SET
str_pr = "MODEL TRAINED ONLY ON SYNTHETIC IMAGES: COMPLETE TEST SET"
evaluate_and_print(str_pr, results_sm, true_bb, true_lb)

# MODEL TRAINED ON ALL IMAGES: COMPLETE TEST SET
str_pr = "MODEL TRAINED ON ALL IMAGES: COMPLETE TEST SET"
evaluate_and_print(str_pr, results_cm, true_bb, true_lb)


--------------------------------------------------

MODEL TRAINED ON SYNTHETIC IMAGES: SYNTHETIC TEST SET
Mean IOU for each class:
0.0: 0.910
1.0: 0.922
2.0: 0.925
3.0: 0.940
4.0: 0.923
5.0: 0.918
6.0: 0.905
7.0: 0.936
8.0: 0.947
9.0: 0.905
10.0: 0.932
11.0: 0.959

Mean IOU over all classes: 0.927


Precision for each class:
0.0: 0.894
1.0: 0.877
2.0: 0.841
3.0: 0.865
4.0: 0.871
5.0: 0.886
6.0: 0.855
7.0: 0.900
8.0: 0.852
9.0: 0.812
10.0: 0.926
11.0: 0.872

Average precision over all classes: 0.873


Recall for each class:
0.0: 1.000
1.0: 1.000
2.0: 0.995
3.0: 1.000
4.0: 1.000
5.0: 1.000
6.0: 1.000
7.0: 0.973
8.0: 0.986
9.0: 0.951
10.0: 0.987
11.0: 1.000

Average recall over all classes: 0.996


F1 score for each class:
0.0: 0.944
1.0: 0.934
2.0: 0.912
3.0: 0.928
4.0: 0.931
5.0: 0.940
6.0: 0.922
7.0: 0.935
8.0: 0.914
9.0: 0.876
10.0: 0.955
11.0: 0.932

Average F1 score over all classes: 0.927



--------------------------------------------------

MODEL TRAINED ON SYNTHETIC IMAGES: REA