# Mattia Sarti's Notebook
### The following source code is illustrated [here](https://github.com/MattiaSarti/yolo-to-help-protect-the-great-barrier-reef)

#### Settings

In [None]:
! if [ -d "/kaggle/working/cache" ]; then rm -r /kaggle/working/cache; fi

In [None]:
! mkdir /kaggle/working/cache

In [None]:
__name__ = 'main_by_mattia'

#### Common constants

In [None]:
"""
Convenient definitions of common constants.
"""


from typing import Tuple

from numpy import arange, meshgrid, ndarray, stack
# pylint: disable=import-error,no-name-in-module
from tensorflow import float32 as tf_float32, uint8 as tf_uint8
# pylint: enable=import-error,no-name-in-module


def compute_grid_cell_centers_xy_coords() -> Tuple[ndarray, ndarray]:
    """
    Return two 3D arrays respectively representing the output grid cell
    centers' (x, y) coordinates and top-left corners' (x, y) coordinates,
    indexed along the first two dimensions as rows and columns of cells in the
    output grid.
    ---
        Outputs' Shapes:
            - (OUTPUT_GRID_N_ROWS, OUTPUT_GRID_N_COLUMNS, 2)
            - (OUTPUT_GRID_N_ROWS, OUTPUT_GRID_N_COLUMNS, 2)
    ---
        Outputs' Meanings:
            - the first dimension is the row index of the grid cell and the
            second dimension is the column index of the grid cell, while the
            third dimension represents the tuple of center (x, y) coordinates
            of the considered grid cell
            - the first dimension is the row index of the grid cell and the
            second dimension is the column index of the grid cell, while the
            third dimension represents the tuple of top-left corner (x, y)
            coordinates of the considered grid cell
    """
    # x and y possible values spanned by grid cell centers:
    centers_x_coords_values = arange(
        start=int(OUTPUT_GRID_CELL_N_COLUMNS / 2),
        stop=IMAGE_N_COLUMNS,
        step=OUTPUT_GRID_CELL_N_COLUMNS
    )
    assert centers_x_coords_values.shape == (OUTPUT_GRID_N_COLUMNS,)
    centers_y_coords_values = arange(
        start=int(OUTPUT_GRID_CELL_N_ROWS / 2),
        stop=IMAGE_N_ROWS,
        step=OUTPUT_GRID_CELL_N_ROWS
    )
    assert centers_y_coords_values.shape == (OUTPUT_GRID_N_ROWS,)

    # x and y possible values spanned by grid cell top-left corners:
    corners_x_coords_values = arange(
        start=0,
        stop=IMAGE_N_COLUMNS,
        step=OUTPUT_GRID_CELL_N_COLUMNS
    )
    assert corners_x_coords_values.shape == (OUTPUT_GRID_N_COLUMNS,)
    corners_y_coords_values = arange(
        start=0,
        stop=IMAGE_N_ROWS,
        step=OUTPUT_GRID_CELL_N_ROWS
    )
    assert corners_y_coords_values.shape == (OUTPUT_GRID_N_ROWS,)

    # grid of cells containing the respective center x and y coordinates each:
    centers_xy_coords = stack(
        arrays=meshgrid(centers_x_coords_values, centers_y_coords_values),
        axis=-1
    )

    # grid of cells containing the respective top-left corner x and y
    # coordinates each:
    corners_xy_coords = stack(
        arrays=meshgrid(corners_x_coords_values, corners_y_coords_values),
        axis=-1
    )

    return (
        centers_xy_coords,
        corners_xy_coords
    )


def compute_weights_to_balance_anchors_emptiness() -> Tuple[float, float]:
    """
    Return the weights, for the loss function terms, that balance full vs
    empty anchors.
    """
    average_n_full_anchors_per_image = (
        AVERAGE_N_BOUNDING_BOXES_PER_IMAGE / N_ANCHORS_PER_IMAGE
    )
    average_n_empty_anchors_per_image = (
        N_ANCHORS_PER_IMAGE - average_n_full_anchors_per_image
    )

    full_anchors_weight = 1 / average_n_full_anchors_per_image
    empty_anchors_weight = 1 / average_n_empty_anchors_per_image

    weights_sum = full_anchors_weight + empty_anchors_weight

    normalized_full_anchors_weight = full_anchors_weight / weights_sum
    normalized_empty_anchors_weight = empty_anchors_weight / weights_sum

    return normalized_full_anchors_weight, normalized_empty_anchors_weight


AVERAGE_N_BOUNDING_BOXES_PER_IMAGE = 0.51

DATA_TYPE_FOR_INPUTS = tf_uint8
DATA_TYPE_FOR_OUTPUTS = tf_float32

DOWNSAMPLING_STEPS = 4

IMAGE_N_CHANNELS = 3
IMAGE_N_COLUMNS = 1280
IMAGE_N_ROWS = 720

# MINIMUM_BOUNDING_BOX_HEIGHT = 13  # [pixels]
# MINIMUM_BOUNDING_BOX_WIDTH = 17  # [pixels]

N_OUTPUTS_PER_ANCHOR = 5

ANCHORS_WIDTH_VS_HEIGHT_WEIGHTS = (
    (0.6, 0.4),
    (0.5, 0.5),
    # (0.4, 0.6)  # NOTE: empirically observed: this anchor is less relevant
)
assert all(
    [
        (weight[0] + weight[1] == 1) for weight in
        ANCHORS_WIDTH_VS_HEIGHT_WEIGHTS
    ]
)
N_ANCHORS_PER_CELL = len(
    ANCHORS_WIDTH_VS_HEIGHT_WEIGHTS
)

OUTPUT_GRID_CELL_N_COLUMNS = 16  # NOTE: this may vary with the architecture
OUTPUT_GRID_CELL_N_ROWS = 16  # NOTE: this may vary with the architecture
# NOTE: common divisors of 1280 and 720: {1, 2, 4, 5, 8, 10, 16, 20, 40, 80},
# and the ones that respect the training-plus-validation set bounding boxes'
# distinction when using a single anchor are: {1, 2, 4, 5, 8, 10, 16}

OUTPUT_GRID_N_COLUMNS = int(IMAGE_N_COLUMNS / OUTPUT_GRID_CELL_N_COLUMNS)
OUTPUT_GRID_N_ROWS = int(IMAGE_N_ROWS / OUTPUT_GRID_CELL_N_ROWS)

N_ANCHORS_PER_IMAGE = (
    OUTPUT_GRID_N_COLUMNS * OUTPUT_GRID_N_ROWS * N_ANCHORS_PER_CELL
)

(
    OUTPUT_GRID_CELL_CENTERS_XY_COORDS,
    OUTPUT_GRID_CELL_CORNERS_XY_COORDS
) = compute_grid_cell_centers_xy_coords()

(
    LOSS_CONTRIBUTE_IMPORTANCE_OF_FULL_ANCHORS,
    LOSS_CONTRIBUTE_IMPORTANCE_OF_EMPTY_ANCHORS
) = compute_weights_to_balance_anchors_emptiness()
# FIXME: is this balancing reasonable?  with 0.999999990162037 vs
# 9.837962962962963e-09, using float32 will truncate the second term to 0!!
(
    LOSS_CONTRIBUTE_IMPORTANCE_OF_FULL_ANCHORS,
    LOSS_CONTRIBUTE_IMPORTANCE_OF_EMPTY_ANCHORS
) = (0.5, 0.5)


#### Samples and labels

In [None]:
#pylint: disable=too-many-lines
"""
Sample and label extraction from the raw dataset files, inspection and
preprocessing for feeding the model.
"""


from csv import reader as csv_reader
from itertools import combinations
from json import loads as json_loads
from math import sqrt
from os import getcwd, pardir
from os.path import join as path_join
from typing import Dict, List, Tuple

from matplotlib.patches import Rectangle
from matplotlib.pyplot import (
    clf as plt_clf,
    close as plt_close,
    figure as plt_figure,
    hist as plt_hist,
    get_current_fig_manager,
    pause as plt_pause,
    savefig as plt_savefig,
    show as plt_show,
    subplots,
    title as plt_title,
    xticks as plt_xticks
)
from numpy import argmin, sum as np_sum, unravel_index, zeros
# pylint: disable=import-error,no-name-in-module
from tensorflow import convert_to_tensor, py_function, Tensor
from tensorflow.data import AUTOTUNE, Dataset
from tensorflow.io import decode_jpeg, read_file
# pylint: enable=import-error,no-name-in-module

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    from common_constants import (
        ANCHORS_WIDTH_VS_HEIGHT_WEIGHTS,
        DATA_TYPE_FOR_INPUTS,
        DATA_TYPE_FOR_OUTPUTS,
        IMAGE_N_COLUMNS,
        IMAGE_N_ROWS,
        N_ANCHORS_PER_CELL,
        N_OUTPUTS_PER_ANCHOR,
        OUTPUT_GRID_CELL_CENTERS_XY_COORDS,
        OUTPUT_GRID_CELL_CORNERS_XY_COORDS,
        OUTPUT_GRID_CELL_N_COLUMNS,
        OUTPUT_GRID_CELL_N_ROWS,
        OUTPUT_GRID_N_COLUMNS,
        OUTPUT_GRID_N_ROWS
    )


MINI_BATCH_SIZE = 8  # TODO
VALIDATION_SET_PORTION_OF_DATA = 0.1  # 0.3

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    DATASET_DIR = path_join(
        getcwd(),
        pardir,
        'tensorflow-great-barrier-reef'
    )
else:
    DATASET_DIR = path_join(
        getcwd(),
        pardir,
        'input',
        'tensorflow-great-barrier-reef'
    )
CACHE_DIR = path_join(
    getcwd(),
    'cache'
)
CACHE_FILE_PATH_FOR_STATISTICS_SET = path_join(
    CACHE_DIR,
    'statistics.tmp'
)
CACHE_FILE_PATH_FOR_TRAINING_SET = path_join(
    CACHE_DIR,
    'training.tmp'
)
CACHE_FILE_PATH_FOR_VALIDATION_SET = path_join(
    CACHE_DIR,
    'validation.tmp'
)
LABELS_FILE_PATH = path_join(
    DATASET_DIR,
    'train.csv'
)
PICTURES_DIR = path_join(
    getcwd(),
    pardir,
    'docs',
    'pictures'
)

SHOW_BOUNDING_BOXES_STATISTICS = False
SHOW_DATASET_MOVIES = False


def get_cell_containing_bounding_box_center(
        center_absolute_x_and_y_coords: Tuple[float, float]
) -> Tuple[int, int, int, int]:
    """
    Find the output grid cell whose center is closest to the bounding box one
    (the input one), returning the grid cell's row and column indexes and its
    top-left corner x and y coordinates.
    ---
        Output Shape:
            - (4,)
    ---
        Output Meaning:
            - [
                grid cell row index,
                grid cell column index,
                x coordindate of cell top-left corner,
                y coordindate of cell top-left corner
            ]
    """
    (  # pylint: disable=unbalanced-tuple-unpacking
        grid_cell_enclosing_bounding_box_center_row_index,
        grid_cell_enclosing_bounding_box_center_column_index
    ) = unravel_index(
        indices=argmin(
            # NOTE: in case of equivalent minima, the first one is picked grid
            # of squared element-wise center pairs' distances representing the
            # minimized objective to find the closest grid cell center:
            a=np_sum(
                a=(
                    (OUTPUT_GRID_CELL_CENTERS_XY_COORDS -
                     center_absolute_x_and_y_coords) ** 2
                ),
                axis=-1
            )
        ),
        shape=(OUTPUT_GRID_N_ROWS, OUTPUT_GRID_N_COLUMNS),
        order='C'
    )

    return (
        # [grid cell row index, grid cell column index]:
        [
            grid_cell_enclosing_bounding_box_center_row_index,
            grid_cell_enclosing_bounding_box_center_column_index
        ] +
        # [x coordindate of cell corner, y coordindate of cell corner]:
        OUTPUT_GRID_CELL_CORNERS_XY_COORDS[
            grid_cell_enclosing_bounding_box_center_row_index,
            grid_cell_enclosing_bounding_box_center_column_index,
            :
        ].tolist()
    )


def get_index_of_anchor_with_closest_aspect_ratio(
        absolute_width: float,
        absolute_height: float
) -> int:
    """
    Return the index of the anchor whose aspect ratio is close to the
    considered bounding box represented by the input relative width and
    relative height.
    """
    width_weight = absolute_width / (absolute_width + absolute_height)
    height_weight = absolute_height / (absolute_width + absolute_height)

    return (
        ANCHORS_WIDTH_VS_HEIGHT_WEIGHTS.index(
            sorted(
                ANCHORS_WIDTH_VS_HEIGHT_WEIGHTS,
                key=lambda width_vs_height_weights: (
                    abs(width_vs_height_weights[0] - width_weight) +
                    abs(width_vs_height_weights[1] - height_weight)
                ),
                reverse=False
            )[0]
        )
    )


def dataset_of_samples_and_bounding_boxes() -> Dataset:
    """
    Build a TensorFlow dataset that can iterate over all the dataset samples
    and the respective labels containing bounding boxes.
    """
    image_paths_dataset = Dataset.from_tensor_slices(
        tensors=[*IMAGE_PATHS_TO_BOUNDING_BOXES]  # only keys included
    )

    image_paths_dataset = image_paths_dataset.map(
        map_func=lambda image_path: py_function(
            func=load_sample_and_get_bounding_boxes,
            inp=[image_path],
            Tout=(DATA_TYPE_FOR_INPUTS, DATA_TYPE_FOR_OUTPUTS)
        ),
        num_parallel_calls=AUTOTUNE,
        deterministic=True
    )

    if __name__ == 'main_by_mattia':
        # optimizing performances by caching end-results:
        image_paths_dataset = image_paths_dataset.cache(
            filename=CACHE_FILE_PATH_FOR_STATISTICS_SET
        )

    # optimizing performances by pre-fetching final elements:
    image_paths_dataset = image_paths_dataset.prefetch(buffer_size=AUTOTUNE)

    return image_paths_dataset


def dataset_of_samples_and_model_outputs(shuffle: bool = True) -> Dataset:
    """
    Build a TensorFlow dataset that can iterate over all the dataset samples
    and the respective labels containing model outputs, in a shuffled order.
    """
    image_paths_dataset = Dataset.from_tensor_slices(
        tensors=[*IMAGE_PATHS_TO_MODEL_OUTPUTS]  # only keys included
    )

    # NOTE: shuffling is carried out here to have acceptable performance with
    # a shuffling buffer size that allows to take the whole set into memory
    # in case shuffling is desired:
    if shuffle:
        image_paths_dataset = image_paths_dataset.shuffle(
            buffer_size=N_TRAINING_PLUS_VALIDATION_SAMPLES,
            seed=0,
            reshuffle_each_iteration=False  # NOTE: relevant when splitting
        )

    # NOTE: further optimizations on this dataset - that is the one employed
    # for training/validation - are carried out later, after
    # training/validation splitting and batching, to optimize performances

    return image_paths_dataset.map(
        map_func=lambda image_path: py_function(
            func=load_sample_and_get_model_outputs,
            inp=[image_path],
            Tout=(DATA_TYPE_FOR_INPUTS, DATA_TYPE_FOR_OUTPUTS)
        ),
        num_parallel_calls=AUTOTUNE,
        deterministic=True
    )


def inspect_bounding_boxes_statistics_on_training_n_validation_set() -> None:  # noqa: E501 pylint: disable=too-many-locals,too-many-branches,too-many-statements
    """
    Inspect and print the following statistics of bounding boxes in the
    training-plus-validation set:
        - total number of bounding boxes
        - total number of images
        - average number of bounding boxes per image
        - minimum number of bounding boxes per image
        - maximum number of bounding boxes per image
        - total number of empty images
        - average bounding box height [pixels]
        - average bounding box width [pixels]
        - average bounding boxes' centers distance [pixels]
        - average bounding boxes' centers x-coord distance [pixels]
        - average bounding boxes' centers y-coord distance [pixels]
        - minimum bounding box height [pixels]
        - minimum bounding box width [pixels]
        - minimum bounding boxes' centers distance [pixels]
        - minimum bounding boxes' centers x-coord distance [pixels]
        - minimum bounding boxes' centers y-coord distance [pixels]
        - maximum bounding box height [pixels]
        - maximum bounding box width [pixels]
        - maximum bounding boxes' centers distance [pixels]
        - maximum bounding boxes' centers x-coord distance [pixels]
        - maximum bounding boxes' centers y-coord distance [pixels]
        - histogram of number of bounding boxes per image
        - histogram of bounding boxes' centers distance [pixels]
        - histogram of bounding boxes' centers x-coord distance [pixels]
        - histogram of bounding boxes' centers y-coord distance [pixels]
    """
    total_n_images = len(IMAGE_PATHS_TO_BOUNDING_BOXES)

    bounding_boxes_centers_distances_for_histogram = []
    bounding_boxes_centers_x_coord_distances_for_histogram = []
    bounding_boxes_centers_y_coord_distances_for_histogram = []
    cumulative_bounding_box_height = 0
    cumulative_bounding_box_width = 0
    cumulative_bounding_boxes_centers_distance = 0
    cumulative_bounding_boxes_centers_x_coord_distance = 0
    cumulative_bounding_boxes_centers_y_coord_distance = 0
    minimum_bounding_box_height = 99999
    minimum_bounding_box_width = 99999
    minimum_bounding_boxes_centers_distance = 99999
    minimum_bounding_boxes_centers_x_coord_distance = 99999
    minimum_bounding_boxes_centers_y_coord_distance = 99999
    minimum_n_bounding_boxes_per_image = 99999
    maximum_bounding_box_height = 0
    maximum_bounding_box_width = 0
    maximum_bounding_boxes_centers_distance = 0
    maximum_bounding_boxes_centers_x_coord_distance = 0
    maximum_bounding_boxes_centers_y_coord_distance = 0
    maximum_n_bounding_boxes_per_image = 0
    n_bounding_boxes_per_image_for_histogram = []
    total_n_bounding_boxes = 0
    total_n_bounding_boxes_center_distances_cumulated = 0
    total_n_empty_images = 0

    for image_bounding_boxes in IMAGE_PATHS_TO_BOUNDING_BOXES.values():
        n_bounding_boxes = len(image_bounding_boxes)
        n_bounding_boxes_per_image_for_histogram.append(
            n_bounding_boxes
        )

        total_n_bounding_boxes += n_bounding_boxes
        if n_bounding_boxes < minimum_n_bounding_boxes_per_image:
            minimum_n_bounding_boxes_per_image = n_bounding_boxes
        if n_bounding_boxes > maximum_n_bounding_boxes_per_image:
            maximum_n_bounding_boxes_per_image = n_bounding_boxes
        if n_bounding_boxes == 0:
            total_n_empty_images += 1

        bounding_boxes_centers_x_and_y_coords = []
        for bounding_box in image_bounding_boxes:
            cumulative_bounding_box_height += bounding_box['height']
            cumulative_bounding_box_width += bounding_box['width']

            bounding_boxes_centers_x_and_y_coords.append(
                {
                    'x': (bounding_box['x'] + bounding_box['width']) / 2,
                    'y': (bounding_box['y'] + bounding_box['height']) / 2
                }
            )

            if bounding_box['height'] < minimum_bounding_box_height:
                minimum_bounding_box_height = bounding_box['height']
            if bounding_box['width'] < minimum_bounding_box_width:
                minimum_bounding_box_width = bounding_box['width']

            if bounding_box['height'] > maximum_bounding_box_height:
                maximum_bounding_box_height = bounding_box['height']
            if bounding_box['width'] > maximum_bounding_box_width:
                maximum_bounding_box_width = bounding_box['width']

        if n_bounding_boxes > 1:
            for centers_coords_pair in combinations(
                    iterable=bounding_boxes_centers_x_and_y_coords,
                    r=2
            ):
                total_n_bounding_boxes_center_distances_cumulated += 1

                x_coord_difference = abs(
                    centers_coords_pair[0]['x'] - centers_coords_pair[1]['x']
                )
                y_coord_difference = abs(
                    centers_coords_pair[0]['y'] - centers_coords_pair[1]['y']
                )
                distance = sqrt(
                    x_coord_difference**2 + y_coord_difference**2
                )

                bounding_boxes_centers_distances_for_histogram.append(
                    distance
                )
                bounding_boxes_centers_x_coord_distances_for_histogram.append(
                    x_coord_difference
                )
                bounding_boxes_centers_y_coord_distances_for_histogram.append(
                    y_coord_difference
                )

                cumulative_bounding_boxes_centers_distance += (
                    distance
                )
                cumulative_bounding_boxes_centers_x_coord_distance += (
                    x_coord_difference
                )
                cumulative_bounding_boxes_centers_y_coord_distance += (
                    y_coord_difference
                )

                if (
                        distance <
                        minimum_bounding_boxes_centers_distance
                ):
                    minimum_bounding_boxes_centers_distance = (
                        distance
                    )
                if (
                        x_coord_difference <
                        minimum_bounding_boxes_centers_x_coord_distance
                ):
                    minimum_bounding_boxes_centers_x_coord_distance = (
                        x_coord_difference
                    )
                if (
                        y_coord_difference <
                        minimum_bounding_boxes_centers_y_coord_distance
                ):
                    minimum_bounding_boxes_centers_y_coord_distance = (
                        y_coord_difference
                    )

                if (
                        distance >
                        maximum_bounding_boxes_centers_distance
                ):
                    maximum_bounding_boxes_centers_distance = (
                        distance
                    )
                if (
                        x_coord_difference >
                        maximum_bounding_boxes_centers_x_coord_distance
                ):
                    maximum_bounding_boxes_centers_x_coord_distance = (
                        x_coord_difference
                    )
                if (
                        y_coord_difference > (
                            maximum_bounding_boxes_centers_y_coord_distance
                        )
                ):
                    maximum_bounding_boxes_centers_y_coord_distance = (
                        y_coord_difference
                    )

    print('- ' * 30)
    print("Bounding Boxes' Statistics:")

    print(
        "\t- total number of bounding boxes:",
        total_n_bounding_boxes
    )
    print(
        "\t- total number of images:",
        total_n_images
    )
    print(
        "\t- average number of bounding boxes per image:",
        round(number=total_n_bounding_boxes/total_n_images, ndigits=2)
    )
    print(
        "\t- minimum number of bounding boxes per image:",
        minimum_n_bounding_boxes_per_image
    )
    print(
        "\t- maximum number of bounding boxes per image:",
        maximum_n_bounding_boxes_per_image
    )
    print(
        "\t- total number of empty images:",
        total_n_empty_images
    )
    print(
        "\t- average bounding box height [pixels]:",
        round(
            number=cumulative_bounding_box_height/total_n_bounding_boxes,
            ndigits=2
        )
    )
    print(
        "\t- average bounding box width [pixels]:",
        round(
            number=cumulative_bounding_box_width/total_n_bounding_boxes,
            ndigits=2
        )
    )
    print(
        "\t- average bounding boxes' centers distance [pixels]:",
        round(
            number=(
                cumulative_bounding_boxes_centers_distance /
                total_n_bounding_boxes_center_distances_cumulated
            ),
            ndigits=2
        )
    )
    print(
        "\t- average bounding boxes' centers x-coord distance [pixels]:",
        round(
            number=(
                cumulative_bounding_boxes_centers_x_coord_distance /
                total_n_bounding_boxes_center_distances_cumulated
            ),
            ndigits=2
        )
    )
    print(
        "\t- average bounding boxes' centers y-coord distance [pixels]:",
        round(
            number=(
                cumulative_bounding_boxes_centers_y_coord_distance /
                total_n_bounding_boxes_center_distances_cumulated
            ),
            ndigits=2
        )
    )
    print(
        "\t- minimum bounding box height [pixels]:",
        minimum_bounding_box_height
    )
    print(
        "\t- minimum bounding box width [pixels]:",
        minimum_bounding_box_width
    )
    print(
        "\t- minimum bounding boxes' centers distance [pixels]:",
        round(
            number=minimum_bounding_boxes_centers_distance,
            ndigits=2
        )
    )
    print(
        "\t- minimum bounding boxes' centers x-coord distance [pixels]:",
        minimum_bounding_boxes_centers_x_coord_distance
    )
    print(
        "\t- minimum bounding boxes' centers y-coord distance [pixels]:",
        minimum_bounding_boxes_centers_y_coord_distance
    )
    print(
        "\t- maximum bounding box height [pixels]:",
        maximum_bounding_box_height
    )
    print(
        "\t- maximum bounding box width [pixels]:",
        maximum_bounding_box_width
    )
    print(
        "\t- maximum bounding boxes' centers distance [pixels]:",
        round(
            number=maximum_bounding_boxes_centers_distance,
            ndigits=2
        )
    )
    print(
        "\t- maximum bounding boxes' centers x-coord distance [pixels]:",
        maximum_bounding_boxes_centers_x_coord_distance
    )
    print(
        "\t- maximum bounding boxes' centers y-coord distance [pixels]:",
        maximum_bounding_boxes_centers_y_coord_distance
    )
    print(
        "\t- histogram of number of bounding boxes per image: see plot"
    )
    print(
        "\t- histogram of bounding boxes' centers distance [pixels]: " +
        "see plot"
    )
    print(
        "\t- histogram of bounding boxes' centers x-coord distance " +
        "[pixels]: see plot"
    )
    print(
        "\t- histogram of bounding boxes' centers y-coord distance " +
        "[pixels]: see plot"
    )

    plt_figure()

    what_it_represent = "Histogram of Number of Bounding Boxes per Image"
    plt_hist(
        x=n_bounding_boxes_per_image_for_histogram,
        bins=maximum_n_bounding_boxes_per_image,
        align='left',
        color='skyblue',
        rwidth=0.8
    )
    plt_title(label=what_it_represent)
    plt_xticks(
        ticks=list(range(maximum_n_bounding_boxes_per_image))
    )
    plt_savefig(
        fname=path_join(
            PICTURES_DIR,
            what_it_represent + '.png'
        ),
        bbox_inches='tight'
    )
    plt_show(block=False)
    plt_pause(interval=1)
    plt_clf()

    what_it_represent = (
        "Histogram of Bounding Boxes' Centers Distance [pixels]"
    )
    plt_hist(
        x=bounding_boxes_centers_distances_for_histogram,
        bins=list(range(int(sqrt(IMAGE_N_COLUMNS**2 + IMAGE_N_ROWS**2)))),
        align='left',
        color='chartreuse',
        rwidth=0.8
    )
    plt_title(label=what_it_represent)
    plt_xticks(
        ticks=list(
            range(0, int(sqrt(IMAGE_N_COLUMNS**2 + IMAGE_N_ROWS**2)), 20)
        ),
        fontsize=6,
        rotation=90
    )
    figure_manager = get_current_fig_manager()
    figure_manager.resize(*figure_manager.window.maxsize())
    plt_savefig(
        fname=path_join(
            PICTURES_DIR,
            what_it_represent + '.png'
        ),
        bbox_inches='tight'
    )
    plt_show(block=False)
    plt_pause(interval=1)
    plt_clf()

    what_it_represent = (
        "Histogram of Bounding Boxes' Centers X-Coordinate Distance [pixels]"
    )
    plt_hist(
        x=bounding_boxes_centers_x_coord_distances_for_histogram,
        bins=list(range(IMAGE_N_COLUMNS)),
        align='left',
        color='mediumslateblue',
        rwidth=0.8
    )
    plt_title(label=what_it_represent)
    plt_xticks(
        ticks=list(range(0, IMAGE_N_COLUMNS, 20)),
        fontsize=6,
        rotation=90
    )
    plt_savefig(
        fname=path_join(
            PICTURES_DIR,
            what_it_represent + '.png'
        ),
        bbox_inches='tight'
    )
    plt_show(block=False)
    plt_pause(interval=1)
    plt_clf()

    what_it_represent = (
        "Histogram of Bounding Boxes' Centers Y-Coordinate Distance [pixels]"
    )
    plt_hist(
        x=bounding_boxes_centers_y_coord_distances_for_histogram,
        bins=list(range(IMAGE_N_ROWS)),
        align='left',
        color='violet',
        rwidth=0.8
    )
    plt_title(label=what_it_represent)
    plt_xticks(
        ticks=list(range(0, IMAGE_N_ROWS, 20)),
        fontsize=6,
        rotation=90
    )
    plt_savefig(
        fname=path_join(
            PICTURES_DIR,
            what_it_represent + '.png'
        ),
        bbox_inches='tight'
    )
    plt_show(block=False)
    plt_pause(interval=1)
    plt_clf()

    plt_close()

    print('- ' * 30)


def label_line_to_image_path_2_bounding_boxes_and_2_model_output(
        csv_label_line_segments: List[str]
) -> Tuple[
        Dict[str, List[Dict[str, int]]],
        Dict[str, List[List[Tuple[int, int, int, int]]]]
]:
    """
    Turn any line of the CSV labels file from the original format
    'video_id,sequence,video_frame,sequence_frame,image_id,annotations' into
    two dictionariies: the former with the respective image file path as key
    and the respective bounding boxes as value, the latter with the respective
    image file path as key and the respective model outputs as value.
    """
    image_path = path_join(
        DATASET_DIR,
        'train_images',
        'video_' + csv_label_line_segments[0],
        csv_label_line_segments[2] + '.jpg'
    )
    bounding_boxes = json_loads(
        csv_label_line_segments[5]
        .replace('"', '"""')
        .replace("'", '"')
    )

    return (
        {
            bytes(image_path, 'utf-8'): bounding_boxes
        },
        {
            bytes(image_path, 'utf-8'): turn_bounding_boxes_to_model_outputs(
                raw_bounding_boxes=bounding_boxes
            )
        }
    )


def load_labels_as_paths_to_bounding_boxes_and_model_outputs_dicts() -> Tuple[
        Dict[str, List[Dict[str, int]]],
        Dict[str, List[List[Tuple[int, int, int, int]]]]
]:
    """
    Load the labels' information from the CSV file and return them as a two
    dictionaries, the former associating image file paths to respective
    bounding boxes and the latter associating image file paths to respective
    model outputs.
    """
    image_paths_to_bounding_boxes = {}
    image_paths_to_model_outputs = {}

    with open(LABELS_FILE_PATH, 'r') as labels_file:
        labels_reader = csv_reader(
            labels_file,
            delimiter=',',
            quotechar='"'
        )

        for line_index, line_segments in enumerate(labels_reader):
            if line_index == 0:
                continue

            # turning the label from the raw format into processed
            # dictionaries to retrieve bounding boxes and model outputs of
            # images easily from respective image file paths:
            (
                image_path_to_bounding_boxes,
                image_path_to_model_outputs
            ) = label_line_to_image_path_2_bounding_boxes_and_2_model_output(
                csv_label_line_segments=line_segments
            )
            image_paths_to_bounding_boxes.update(image_path_to_bounding_boxes)
            image_paths_to_model_outputs.update(image_path_to_model_outputs)

    return (image_paths_to_bounding_boxes, image_paths_to_model_outputs)


def load_sample_and_get_bounding_boxes(image_path: Tensor) -> Tuple[
        Tensor, Tensor
]:
    """
    Load the sample and get the label - representing bounding boxes - of the
    image represented by the input path.
    """
    return (
        decode_jpeg(
            contents=read_file(
                filename=image_path
            )
        ),
        convert_to_tensor(
            # bounding boxes as network output values:
            value=[
                [
                    bounding_box_dict['x'],
                    bounding_box_dict['y'],
                    bounding_box_dict['width'],
                    bounding_box_dict['height']
                ] for bounding_box_dict in
                IMAGE_PATHS_TO_BOUNDING_BOXES[image_path.numpy()]
            ],
            dtype=DATA_TYPE_FOR_OUTPUTS
        )
    )


def load_sample_and_get_model_outputs(image_path: Tensor) -> Tuple[
        Tensor, Tensor
]:
    """
    Load the sample and get the label - representing model outputs - of the
    image represented by the input path.
    """
    return (
        decode_jpeg(
            contents=read_file(
                filename=image_path
            )
        ),
        convert_to_tensor(
            # bounding boxes as network output values:
            value=IMAGE_PATHS_TO_MODEL_OUTPUTS[image_path.numpy()],
            dtype=DATA_TYPE_FOR_OUTPUTS
        )
    )


def split_dataset_into_batched_training_and_validation_sets(
        training_plus_validation_set: Dataset
) -> Tuple[Dataset, Dataset]:
    """
    Split the input dataset into a training set and a validation set, both
    already divided into mini-batches.
    """
    n_samples_in_validation_set = int(
        VALIDATION_SET_PORTION_OF_DATA * N_TRAINING_PLUS_VALIDATION_SAMPLES
    )
    n_samples_in_training_set = (
        N_TRAINING_PLUS_VALIDATION_SAMPLES - n_samples_in_validation_set
    )

    training_set = (
        training_plus_validation_set
        # selecting only the training samples and labels:
        .take(count=n_samples_in_training_set)
        # creating mini-batches:
        .batch(
            batch_size=MINI_BATCH_SIZE,
            drop_remainder=False,
            num_parallel_calls=AUTOTUNE,
            deterministic=True
        )
    )
    # only when running everything in a unified notebook on Kaggle's servers:
    if __name__ == 'main_by_mattia':
        # optimizing performances by caching end-results:
        training_set = training_set.cache(
            filename=CACHE_FILE_PATH_FOR_TRAINING_SET
        )
    # optimizing performances by pre-fetching final elements:
    training_set = training_set.prefetch(buffer_size=AUTOTUNE)

    validation_set = (
        training_plus_validation_set
        # selecting only the validation samples and labels:
        .skip(count=n_samples_in_training_set)
        .take(count=n_samples_in_validation_set)
        # creating mini-batches:
        .batch(
            batch_size=MINI_BATCH_SIZE,
            drop_remainder=False,
            num_parallel_calls=AUTOTUNE,
            deterministic=True
        )
    )
    # only when running everything in a unified notebook on Kaggle's servers:
    if __name__ == 'main_by_mattia':
        # optimizing performances by caching end-results:
        validation_set = validation_set.cache(
            filename=CACHE_FILE_PATH_FOR_TRAINING_SET
        )
    # optimizing performances by pre-fetching final elements:
    validation_set = validation_set.prefetch(buffer_size=AUTOTUNE)

    return (training_set, validation_set)


def show_dataset_as_movie(
        ordered_samples_and_labels: Dataset,
        bounding_boxes_or_model_outputs: str = 'bounding_boxes'
) -> None:
    """
    Show the dataset images frame by frame, reconstructing the video
    sequences, with boundinx boxes contained displayed over the respective
    sample/frame.
    """
    assert (
        bounding_boxes_or_model_outputs in ('bounding_boxes', 'model_outputs')
    ), "Invalid 'bounding_boxes_or_model_outputs' input."

    _, axes = subplots(1, 1)

    # for each sample-label pair, a frame fusing them together is shown:
    for index, sample_and_label in enumerate(ordered_samples_and_labels):
        if index % 1000 == 0:
            print(f"{index} frames shown")

        # clearing axes from the previous frame information:
        axes.clear()

        # showing the image:
        axes.imshow(sample_and_label[0].numpy())

        # showing labels...

        # ... either as bounding boxes:
        if bounding_boxes_or_model_outputs == 'bounding_boxes':
            # for each bounding box:
            for bounding_box in sample_and_label[1].numpy().tolist():
                # drawing the bounding box over the frame image:
                axes.add_patch(
                    p=Rectangle(
                        xy=(bounding_box[0], bounding_box[1]),
                        width=bounding_box[2],
                        height=bounding_box[3],
                        linewidth=2,
                        edgecolor='#00ff00',
                        facecolor='none'
                    )
                )

        # ... or as model output grid cells:
        elif bounding_boxes_or_model_outputs == 'model_outputs':
            # for each model output grid cell whose label contains anchors:
            for cell_row_index in range(OUTPUT_GRID_N_ROWS):
                for cell_column_index in range(OUTPUT_GRID_N_COLUMNS):
                    # filtering out grid cells not containing any anchor:
                    if (
                            sample_and_label[1][
                                cell_row_index,
                                cell_column_index,
                                :,
                                :
                            ].numpy() == zeros(
                                shape=(
                                    N_ANCHORS_PER_CELL,
                                    N_OUTPUTS_PER_ANCHOR
                                )
                            )
                    ).all():
                        continue

                    # highlighting the full cell over the frame image:
                    axes.add_patch(
                        p=Rectangle(
                            xy=(
                                OUTPUT_GRID_CELL_CORNERS_XY_COORDS[
                                    cell_row_index,
                                    cell_column_index
                                ]
                            ),
                            width=OUTPUT_GRID_CELL_N_COLUMNS,
                            height=OUTPUT_GRID_CELL_N_ROWS,
                            linewidth=2,
                            edgecolor='#00ff00',
                            facecolor='none'
                        )
                    )

        else:
            raise Exception("Ill-conceived code.")

        # making the plot go adeah with the next frame after a small pause for
        # better observation:
        plt_show(block=False)
        plt_pause(interval=0.000001)


def turn_bounding_boxes_to_model_outputs(
        raw_bounding_boxes: List[Dict[str, int]]
) -> Dict[str, List[List[Tuple[int, int, int, int]]]]:
    """
    Turn the input, raw list of bounding boxes' position information into the
    equivalent information from the model outputs' perspective, as direct
    supervision labels - for a single image.
    """
    labels = zeros(
        shape=(
            OUTPUT_GRID_N_ROWS,
            OUTPUT_GRID_N_COLUMNS,
            N_ANCHORS_PER_CELL,
            N_OUTPUTS_PER_ANCHOR
        )
    )

    # for each bounding box in the image:
    for bounding_box in raw_bounding_boxes:
        # computing the absolute x and y coordinates of the bounding box
        # center:
        bounging_box_center_absolute_x_coord = (
            bounding_box['x'] + (bounding_box['width'] / 2)
        )
        bounging_box_center_absolute_y_coord = (
            bounding_box['y'] + (bounding_box['height'] / 2)
        )

        # getting the required information about the grid cell that contains
        # its center:
        (
            cell_row_index,
            cell_column_index,
            cell_corner_x_coord,
            cell_corner_y_coord
        ) = get_cell_containing_bounding_box_center(
            center_absolute_x_and_y_coords=(
                bounging_box_center_absolute_x_coord,
                bounging_box_center_absolute_y_coord
            )
        )

        # transforming and normalizing the bounding box coordinates, now
        # meaning respectively for x and y the relative center offset from the
        # encoling cell top-left corner normalized to the grid cell size and
        # for width and height the relative heights and heights with respect
        # to the image sides:
        relative_x_coord = (
            (bounging_box_center_absolute_x_coord - cell_corner_x_coord) /
            OUTPUT_GRID_CELL_N_COLUMNS
        )
        relative_y_coord = (
            (bounging_box_center_absolute_y_coord - cell_corner_y_coord) /
            OUTPUT_GRID_CELL_N_ROWS
        )
        relative_width = bounding_box['width'] / IMAGE_N_COLUMNS
        relative_height = bounding_box['height'] / IMAGE_N_ROWS

        # getting the index of the anchor with closest aspect ratio to the
        # considered bounding box:
        label_anchor_index = get_index_of_anchor_with_closest_aspect_ratio(
            absolute_width=bounding_box['width'],
            absolute_height=bounding_box['height']
        )

        # associating the bounding box attributes to the respective anchor
        # labels - after checking there are no intrinsic limitations of the
        # employed design choices:
        label_cannot_be_associated_to_respective_anchor = (
            labels[cell_row_index, cell_column_index, label_anchor_index, :] !=
            [.0] * N_OUTPUTS_PER_ANCHOR
        ).any()
        if label_cannot_be_associated_to_respective_anchor:
            raise Exception(
                f"Either more than {N_ANCHORS_PER_CELL} anchors or a " +
                "better output resolution are required, as more bounding " +
                "boxes than the set number of anchors are falling within " +
                "the same output cell in this sample."
            )
        labels[cell_row_index, cell_column_index, label_anchor_index, :] = [
            1.0,  # FIXME: supposed to be an objectiveness score or an IoU?
            relative_x_coord,
            relative_y_coord,
            relative_width,
            relative_height
        ]

    return labels


(
    IMAGE_PATHS_TO_BOUNDING_BOXES,
    IMAGE_PATHS_TO_MODEL_OUTPUTS
) = load_labels_as_paths_to_bounding_boxes_and_model_outputs_dicts()

N_TRAINING_PLUS_VALIDATION_SAMPLES = len(IMAGE_PATHS_TO_BOUNDING_BOXES)


if __name__ == '__main__':
    if SHOW_BOUNDING_BOXES_STATISTICS:
        inspect_bounding_boxes_statistics_on_training_n_validation_set()

    samples_n_bounding_boxes_dataset = dataset_of_samples_and_bounding_boxes()

    if SHOW_DATASET_MOVIES:
        show_dataset_as_movie(
            ordered_samples_and_labels=samples_n_bounding_boxes_dataset,
            bounding_boxes_or_model_outputs='bounding_boxes'
        )

    samples_n_model_outputs_dataset = dataset_of_samples_and_model_outputs(
        # not shuffling when needing adjacent frames for showing the movie:
        shuffle=(not SHOW_DATASET_MOVIES)
    )

    if SHOW_DATASET_MOVIES:
        show_dataset_as_movie(
            ordered_samples_and_labels=samples_n_model_outputs_dataset,
            bounding_boxes_or_model_outputs='model_outputs'
        )

    (
        training_dataset, validation_dataset
    ) = split_dataset_into_batched_training_and_validation_sets(
        training_plus_validation_set=samples_n_model_outputs_dataset
    )


#### Inference

In [None]:
"""
Utilities for inference time, for converting model outputs to bounding boxes'
predictions.
"""


from typing import List, Tuple, Union

# pylint: disable=import-error,no-name-in-module
from tensorflow import (
    cast,
    clip_by_value,
    concat,
    convert_to_tensor,
    expand_dims,
    reshape,
    squeeze,
    stack,
    Tensor,
    tile
)
from tensorflow.image import combined_non_max_suppression
from tensorflow.math import (
    add,
    divide,
    subtract,
    multiply
)
# pylint: enable=import-error,no-name-in-module

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    from common_constants import (
        DATA_TYPE_FOR_OUTPUTS,
        IMAGE_N_COLUMNS,
        IMAGE_N_ROWS,
        N_ANCHORS_PER_CELL,
        N_OUTPUTS_PER_ANCHOR,
        OUTPUT_GRID_CELL_CORNERS_XY_COORDS,
        OUTPUT_GRID_CELL_N_COLUMNS,
        OUTPUT_GRID_CELL_N_ROWS
    )
    from model_architecture import YOLOv3Variant
    from samples_and_labels import (
        dataset_of_samples_and_model_outputs,
        split_dataset_into_batched_training_and_validation_sets
    )


IOU_THRESHOLD_FOR_NON_MAXIMUM_SUPPRESSION = 0.5
MINIMUM_BOUNDING_BOX_SIDE_DIMENSION_TOLERANCE = 0.1
MAXIMUM_N_BOUNDING_BOXES_AFTER_NMS = 100
SCORE_THRESHOLD_FOR_NON_MAXIMUM_SUPPRESSION = 0.5


def batched_anchors_rel_to_real_abs_x_y_w_h(
        batched_anchors_relative_x_y_w_h: Tensor,
        batched_anchors_corners_absolute_x_y: Tensor
) -> Tensor:
    """
    Turn batches of arrays of anchors where every anchor is represented by
    relative (x center, y center, w, h) values into batches of the same
    anchors where each anchor is represented by absolute (x top-left corner,
    y top-left corner, w, h) values - x and y represent respectively the x and
    y coordinates of the center in the inputs and of the top-left corner in
    the output, w and y represent respectively the width and height of sides.
    NOTE: this function changes not only the scale but also the meaning of x
    and y
    ---
        Input Shapes:
            - (
                VARIABLE_N_SAMPLES,
                N_ANCHORS_PER_IMAGE,
                1,
                4
            )
            - (
                VARIABLE_N_SAMPLES,
                N_ANCHORS_PER_IMAGE,
                2
            )
    ---
        Output Shape:
            - (
                VARIABLE_N_SAMPLES,
                N_ANCHORS_PER_IMAGE,
                1,
                4
            )
    """
    expanded_batched_anchors_corners_absolute_x_y = cast(  # noqa: E501 pylint: disable=unexpected-keyword-arg,no-value-for-parameter
        # NOTE: they are already discretized, so any truncation due to casting
        # is not relevant
        x=expand_dims(
            input=batched_anchors_corners_absolute_x_y,
            axis=2
        ),
        dtype=DATA_TYPE_FOR_OUTPUTS
    )  # shape â†’ (samples, anchors_per_image, 1, 2)

    batched_anchors_absolute_w = multiply(
        x=batched_anchors_relative_x_y_w_h[..., 2],
        y=IMAGE_N_COLUMNS
    )  # shape â†’ (samples, anchors_per_image, 1)

    batched_anchors_absolute_h = multiply(
        x=batched_anchors_relative_x_y_w_h[..., 3],
        y=IMAGE_N_ROWS
    )  # shape â†’ (samples, anchors_per_image, 1)

    batched_anchors_absolute_x = subtract(
        x=add(
            x=multiply(
                x=batched_anchors_relative_x_y_w_h[..., 0],
                y=float(OUTPUT_GRID_CELL_N_COLUMNS)
            ),  # shape â†’ (samples, anchors_per_image, 1)
            y=expanded_batched_anchors_corners_absolute_x_y[..., 0]
        ),  # shape â†’ (samples, anchors_per_image, 1)
        y=divide(
            x=batched_anchors_absolute_w,
            y=float(2)
        ),  # shape â†’ (samples, anchors_per_image, 1)
    )  # shape â†’ (samples, anchors_per_image, 1)

    batched_anchors_absolute_y = subtract(
        x=add(
            x=multiply(
                x=batched_anchors_relative_x_y_w_h[..., 1],
                y=float(OUTPUT_GRID_CELL_N_ROWS)
            ),  # shape â†’ (samples, anchors_per_image, 1)
            y=expanded_batched_anchors_corners_absolute_x_y[..., 1]
        ),  # shape â†’ (samples, anchors_per_image, 1)
        y=divide(
            x=batched_anchors_absolute_h,
            y=float(2)
        ),  # shape â†’ (samples, anchors_per_image, 1)
    )  # shape â†’ (samples, anchors_per_image, 1)

    return expand_dims(
        input=concat(  # noqa: E501 pylint: disable=unexpected-keyword-arg,no-value-for-parameter
            values=(
                batched_anchors_absolute_x,
                batched_anchors_absolute_y,
                batched_anchors_absolute_w,
                batched_anchors_absolute_h
            ),
            axis=-1
        ),  # shape â†’ (samples, anchors_per_image, 4)
        axis=2
    )  # shape â†’ (samples, anchors_per_image, 1, 4)


def batched_anchors_x_y_w_h_to_x1_y1_x2_y2(
        batched_anchors_absolute_x_y_w_h: Tensor
) -> Tensor:
    """
    Turn batches of several anchors each where every anchor is represented by
    absolute (x, y, w, h) values, into batches of the same anchors where each
    anchor is represented by absolute (x1, y1, x2, y2) values - x and y
    represent respectively the x and y coordinates of the top-left corner, w
    and y represent respectively the width and height of sides, x1 and y1
    represent respectively the x and y coordinates of the top-left corner, x2
    and y2 represent respectively the x and y coordinates of the bottom-right
    corner - eventually clipping all output coordinates' values to fall inside
    the image.
    ---
        Input Shape:
            - (
                VARIABLE_N_SAMPLES,
                N_ANCHORS_PER_IMAGE,
                1,
                4
            )
    ---
        Output Shape:
            - (
                VARIABLE_N_SAMPLES,
                N_ANCHORS_PER_IMAGE,
                1,
                4
            )
    """
    batched_anchors_absolute_x1 = clip_by_value(
        t=batched_anchors_absolute_x_y_w_h[..., 0],
        # shape â†’ (samples, anchors_per_image, 1)
        clip_value_min=0,
        clip_value_max=(IMAGE_N_COLUMNS - 1)
    )  # shape â†’ (samples, anchors_per_image, 1)

    batched_anchors_absolute_y1 = clip_by_value(
        t=batched_anchors_absolute_x_y_w_h[..., 1],
        # shape â†’ (samples, anchors_per_image, 1)
        clip_value_min=0,
        clip_value_max=(IMAGE_N_ROWS - 1)
    )  # shape â†’ (samples, anchors_per_image, 1)

    batched_anchors_absolute_x2 = clip_by_value(
        t=add(
            x=batched_anchors_absolute_x_y_w_h[..., 0],
            y=batched_anchors_absolute_x_y_w_h[..., 2]
        ),  # shape â†’ (samples, anchors_per_image, 1)
        clip_value_min=0,
        clip_value_max=(IMAGE_N_COLUMNS - 1)
    )  # shape â†’ (samples, anchors_per_image, 1)

    batched_anchors_absolute_y2 = clip_by_value(
        t=add(
            x=batched_anchors_absolute_x_y_w_h[..., 1],
            y=batched_anchors_absolute_x_y_w_h[..., 3]
        ),  # shape â†’ (samples, anchors_per_image, 1)
        clip_value_min=0,
        clip_value_max=(IMAGE_N_ROWS - 1)
    )  # shape â†’ (samples, anchors_per_image, 1)

    return expand_dims(
        input=concat(  # noqa: E501 pylint: disable=unexpected-keyword-arg,no-value-for-parameter
            values=(
                batched_anchors_absolute_x1,
                batched_anchors_absolute_y1,
                batched_anchors_absolute_x2,
                batched_anchors_absolute_y2
            ),
            axis=-1
        ),  # shape â†’ (samples, anchors_per_image, 4)
        axis=2
    )  # shape â†’ (samples, anchors_per_image, 1, 4)


def batched_anchors_x1_y1_x2_y2_to_x_y_w_h(
        batched_anchors_absolute_x1_y1_x2_y2: Tensor
) -> Tensor:
    """
    Turn batches of several anchors each where every anchor is represented by
    absolute (x1, y1, x2, y2) values, into batches of the same anchors where
     eachanchor is represented by absolute (x, y, w, h) values - x and y
    represent respectively the x and y coordinates of the top-left corner, w
    and y represent respectively the width and height of sides, x1 and y1
    represent respectively the x and y coordinates of the top-left corner, x2
    and y2 represent respectively the x and y coordinates of the bottom-right
    corner.
    ---
        Input Shape:
            - (
                VARIABLE_N_SAMPLES,
                VARIABLE_N_BOUNDING_BOXES,
                4
            )
    ---
        Output Shape:
            - (
                VARIABLE_N_SAMPLES,
                VARIABLE_N_BOUNDING_BOXES,
                4
            )
    """
    batched_anchors_absolute_x = batched_anchors_absolute_x1_y1_x2_y2[..., 0]
    # shape â†’ (samples, boxes)

    batched_anchors_absolute_y = batched_anchors_absolute_x1_y1_x2_y2[..., 1]
    # shape â†’ (samples, boxes)

    batched_anchors_absolute_w = subtract(
        x=batched_anchors_absolute_x1_y1_x2_y2[..., 2],
        y=batched_anchors_absolute_x1_y1_x2_y2[..., 0]
    )  # shape â†’ (samples, boxes)

    batched_anchors_absolute_h = subtract(
        x=batched_anchors_absolute_x1_y1_x2_y2[..., 3],
        y=batched_anchors_absolute_x1_y1_x2_y2[..., 1]
    )  # shape â†’ (samples, boxes)

    return stack(
        values=(
            batched_anchors_absolute_x,
            batched_anchors_absolute_y,
            batched_anchors_absolute_w,
            batched_anchors_absolute_h
        ),
        axis=-1
    )  # shape â†’ (samples, boxes, 4)


def convert_batched_bounding_boxes_to_final_format(
        batched_bounding_boxes: Tensor,
        batched_n_valid_bounding_boxes: Tensor,
        predicting_online: bool = True,
        as_strings: bool = True
) -> Union[
        Union[str, Tuple[float, int, int, int, int]],
        List[Union[str, Tuple[float, int, int, int, int]]]
]:
    """
    TODO
     - eventually discretizing all absolute coordinates' values to
    respect the physical constrant of representing image pixels
    ---
        Input Shapes:
            - (
                VARIABLE_N_SAMPLES,
                VARIABLE_N_BOUNDING_BOXES,
                N_OUTPUTS_PER_ANCHOR
            )
            - (
                VARIABLE_N_SAMPLES,
            )
    """
    # if the batched inputs represent a single sample:
    if predicting_online:
        # NOTE: this also automatically asserts that the mini-batch contains
        # only a single sample:
        n_valid_image_bounding_boxes = int(batched_n_valid_bounding_boxes)

        return convert_bounding_boxes_to_final_format(
            image_bounding_boxes=squeeze(
                input=batched_bounding_boxes,
                axis=0
            ),
            n_valid_bounding_boxes=n_valid_image_bounding_boxes,
            as_string=as_strings
        )

    # if the batched inputs contain more than a single sample:

    batch_of_converted_bounding_boxes = []
    for current_image_bounding_boxes, n_valid_image_bounding_boxes in zip(
            batched_bounding_boxes, batched_n_valid_bounding_boxes
    ):
        batch_of_converted_bounding_boxes.append(
            convert_bounding_boxes_to_final_format(
                image_bounding_boxes=current_image_bounding_boxes,
                n_valid_bounding_boxes=n_valid_image_bounding_boxes,
                as_string=as_strings
            )
        )

    return batch_of_converted_bounding_boxes


def convert_bounding_boxes_to_final_format(
        image_bounding_boxes: Tensor,
        n_valid_bounding_boxes: int,
        as_string: bool = True
) -> Union[str, Tuple[float, int, int, int, int]]:
    """
    TODO
    """
    if n_valid_bounding_boxes == 0:
        return '' if as_string else []

    image_bounding_boxes = (
        image_bounding_boxes.numpy().tolist()[:n_valid_bounding_boxes]
    )

    converted_bounding_boxes = '' if as_string else []
    for index, bounding_box_attributes in enumerate(
            image_bounding_boxes
    ):
        if as_string:
            if index != 0:
                converted_bounding_boxes += ' '
            converted_bounding_boxes += (
                '{confidence} {x} {y} {width} {height}'.format(
                    confidence=bounding_box_attributes[0],
                    x=round(bounding_box_attributes[1]),
                    y=round(bounding_box_attributes[2]),
                    width=round(bounding_box_attributes[3]),
                    height=round(bounding_box_attributes[4])
                )
            )
        else:
            converted_bounding_boxes.append(
                [
                    bounding_box_attributes[0],
                    round(bounding_box_attributes[1]),
                    round(bounding_box_attributes[2]),
                    round(bounding_box_attributes[3]),
                    round(bounding_box_attributes[4]),
                ]
            )

    return converted_bounding_boxes


def get_bounding_boxes_from_model_outputs(
        model_outputs: Tensor,
        from_labels: bool = False
) -> Tuple[Tensor, Tensor]:
    """
    Post-process model outputs by applying format conversion, un-normalization
    and reconstruction, and also non-maximum suppression in case the inputs do
    not intended as labels but as predictions, to turn batched model outputs
    into batches of bounding boxes expressed as (score, x, y, w, h), where x,
    y, w, and h respectively represent the top-lect corner absolute x and y
    coordinates and the absolute width and height, all in pixels - thus as
    (positive or null) integers.

    NOTE: in my approach, anchors are just used to create labels as relative
    aspect ratios, neither to recreate predictions nor as absolute sizes -
    that's why anchors are not used here
    ---
        Input Shape:
            - (
                VARIABLE_N_SAMPLES,
                OUTPUT_GRID_N_ROWS,
                OUTPUT_GRID_N_COLUMNS,
                N_ANCHORS_PER_CELL,
                N_OUTPUTS_PER_ANCHOR
            )
    ---
        Output Shapes:
            - (
                VARIABLE_N_SAMPLES,
                VARIABLE_N_BOUNDING_BOXES,
                N_OUTPUTS_PER_ANCHOR
            )
            - (
                VARIABLE_N_SAMPLES,
            )
    """
    n_mini_batch_samples = model_outputs.shape[0]

    # turning the model outputs into flattened (except along the batch
    # dimension) anchor predictions:
    anchors_outputs = reshape(
        tensor=model_outputs,
        shape=(n_mini_batch_samples, -1, N_OUTPUTS_PER_ANCHOR)
    )  # shape â†’ (samples, anchors_per_image, attributes)

    # crating a corresponding tensor of flattened (except along the batch
    # dimension) anchor corners' absolute (x, y) coordinates - NOTE:
    # TensorFlow uses a row-major ordering for reshaping, but the following
    # procedure ensures that the same ordering as for flattened anchor outputs
    # is followed:
    anchors_corners_absolute_x_y = reshape(
        tensor=tile(
            input=expand_dims(
                input=tile(
                    input=expand_dims(
                        input=convert_to_tensor(
                            value=OUTPUT_GRID_CELL_CORNERS_XY_COORDS
                        ),
                        # shape â†’ (rows, columns, 2)
                        axis=0
                    ),  # shape â†’ (1, rows, columns, 2)
                    multiples=(n_mini_batch_samples, 1, 1, 1)
                ),  # shape â†’ (samples, rows, columns, 2)
                axis=3
            ),  # shape â†’ (samples, rows, columns, 1, 2)
            multiples=(1, 1, 1, N_ANCHORS_PER_CELL, 1)
        ),  # shape â†’ (samples, rows, columns, anchors_per_cell, 2)
        shape=(n_mini_batch_samples, -1, 2)
    )  # shape â†’ (samples, anchors_per_image, 2)

    # applying non-maximum suppression to generate robust bounding box
    # candidates with respective reliability scores when the model outputs
    # are intended as predictions - non-maximum suppression is not relevant
    # when the model outputs are intended as labels as they are already
    # discretized:

    # adding a dummy class dimension for the later Tensorflow's function
    # application - NOTE: a single class in considered in the task of
    # interest:
    anchors_outputs = expand_dims(input=anchors_outputs, axis=2)
    # shape â†’ (samples, anchors_per_image, 1, attributes)

    anchors_scores = anchors_outputs[..., 0]
    # shape â†’ (samples, anchors_per_image, 1)

    anchors_relative_x_y_w_h = anchors_outputs[..., 1:]
    # shape â†’ (samples, anchors_per_image, 1, 4)

    anchors_absolute_x_y_w_h = batched_anchors_rel_to_real_abs_x_y_w_h(
        batched_anchors_relative_x_y_w_h=anchors_relative_x_y_w_h,
        batched_anchors_corners_absolute_x_y=anchors_corners_absolute_x_y
    )  # shape â†’ (samples, anchors_per_image, 1, 4)

    anchors_absolute_x1_y1_x2_y2 = batched_anchors_x_y_w_h_to_x1_y1_x2_y2(
        batched_anchors_absolute_x_y_w_h=anchors_absolute_x_y_w_h
    )  # shape â†’ (samples, anchors_per_image, 1, 4)

    (
        boxes_absolute_x1_y1_x2_y2,  # shape â†’ (samples, boxes, 4)
        boxes_scores,  # shape â†’ (samples, boxes)
        _,  # classes of boxes for each sample, not relevant here
        n_valid_bounding_boxes  # shape â†’ (samples,)
    ) = combined_non_max_suppression(
        boxes=anchors_absolute_x1_y1_x2_y2,
        scores=anchors_scores,
        # NOTE: a single class in considered in the task of interest:
        max_output_size_per_class=MAXIMUM_N_BOUNDING_BOXES_AFTER_NMS,
        max_total_size=MAXIMUM_N_BOUNDING_BOXES_AFTER_NMS,
        iou_threshold=(
            IOU_THRESHOLD_FOR_NON_MAXIMUM_SUPPRESSION if not from_labels
            else 0
        ),
        score_threshold=(
            SCORE_THRESHOLD_FOR_NON_MAXIMUM_SUPPRESSION if not from_labels
            else (1 - 1e-6)
        ),
        pad_per_class=False,
        clip_boxes=False
    )

    boxes_absolute_x_y_w_h = batched_anchors_x1_y1_x2_y2_to_x_y_w_h(
        batched_anchors_absolute_x1_y1_x2_y2=boxes_absolute_x1_y1_x2_y2
    )  # shape â†’ (samples, boxes, 4)

    bounding_boxes_scores_plus_absolute_x_y_w_h = concat(  # noqa: E501 pylint: disable=unexpected-keyword-arg,no-value-for-parameter
        values=(
            expand_dims(input=boxes_scores, axis=-1),
            # shape â†’ (samples, boxes, 1)
            boxes_absolute_x_y_w_h
            # shape â†’ (samples, boxes, 4)
        ),
        axis=-1
    )  # shape â†’ (samples, boxes, attributes)

    return (
        bounding_boxes_scores_plus_absolute_x_y_w_h,
        # shape â†’ (samples, boxes, attributes)
        n_valid_bounding_boxes
        # shape â†’ (samples,)
    )


if __name__ == '__main__':
    (
        training_samples_and_labels, validation_samples_and_labels
    ) = split_dataset_into_batched_training_and_validation_sets(
        training_plus_validation_set=dataset_of_samples_and_model_outputs(
            shuffle=False
        )
    )

    model = YOLOv3Variant()

    for samples_and_labels in training_samples_and_labels:
        print('\n' + '-'*90)

        (
            expected_bounding_boxes,
            n_valid_expected_bounding_boxes
        ) = get_bounding_boxes_from_model_outputs(
            model_outputs=samples_and_labels[1],
            from_labels=True
        )
        print(
            expected_bounding_boxes.shape,
            '-',
            n_valid_expected_bounding_boxes.shape
        )

        predictions = model(samples_and_labels[0])

        (
            inferred_bounding_boxes,
            n_valid_inferred_bounding_boxes
        ) = get_bounding_boxes_from_model_outputs(
            model_outputs=predictions,
            from_labels=False
        )
        print(
            inferred_bounding_boxes.shape,
            '-',
            n_valid_inferred_bounding_boxes.shape
        )

        break

    print('\n' + '_'*120)

    training_samples_and_labels = (
        training_samples_and_labels.unbatch().batch(1)
    )
    for samples_and_labels in training_samples_and_labels:
        print('\n' + '-'*90)

        (
            expected_bounding_boxes,
            n_valid_expected_bounding_boxes
        ) = get_bounding_boxes_from_model_outputs(
            model_outputs=samples_and_labels[1],
            from_labels=True
        )
        print(
            expected_bounding_boxes.shape,
            '-',
            n_valid_expected_bounding_boxes.shape
        )

        submissions = convert_batched_bounding_boxes_to_final_format(
            batched_bounding_boxes=expected_bounding_boxes,
            batched_n_valid_bounding_boxes=n_valid_expected_bounding_boxes,
            predicting_online=True,
            as_strings=True
        )
        print(submissions)

        predictions = model(samples_and_labels[0])

        (
            inferred_bounding_boxes,
            n_valid_inferred_bounding_boxes
        ) = get_bounding_boxes_from_model_outputs(
            model_outputs=predictions,
            from_labels=False
        )
        print(
            inferred_bounding_boxes.shape,
            '-',
            n_valid_inferred_bounding_boxes.shape
        )

        submissions = convert_batched_bounding_boxes_to_final_format(
            batched_bounding_boxes=inferred_bounding_boxes,
            batched_n_valid_bounding_boxes=n_valid_inferred_bounding_boxes,
            predicting_online=True,
            as_strings=True
        )
        print(submissions)

        break


#### Model architecture

In [None]:
"""
Model architecture definition.
"""


# pylint: disable=import-error,no-name-in-module
from tensorflow import Tensor
from tensorflow.keras import Input, Model, Sequential
from tensorflow.keras.activations import sigmoid
from tensorflow.keras.layers import (
    BatchNormalization,
    Convolution2D,
    LeakyReLU,
    MaxPooling2D,
    Reshape
)
from tensorflow.keras.layers.experimental.preprocessing import (
    RandomFlip,
    Rescaling
)
# pylint: enable=import-error,no-name-in-module

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    from common_constants import (
        DOWNSAMPLING_STEPS,
        IMAGE_N_CHANNELS,
        IMAGE_N_COLUMNS,
        IMAGE_N_ROWS,
        N_ANCHORS_PER_CELL,
        N_OUTPUTS_PER_ANCHOR,
        OUTPUT_GRID_N_COLUMNS,
        OUTPUT_GRID_N_ROWS
    )


CONVOLUTIONAL_LAYERS_COMMON_KWARGS = {
    'kernel_size': (3, 3),
    'strides': (1, 1),
    'padding': 'same',
    'data_format': 'channels_last',
    'dilation_rate': (1, 1),
    'groups': 1,
    'activation': None,
    'use_bias': True
}
FIRST_LAYER_N_CONVOLUTIONAL_FILTERS = 16  # TODO
INPUT_NORMALIZATION_OFFSET = 0.0
INPUT_NORMALIZATION_RESCALING_FACTOR = (1. / 255)
LEAKY_RELU_NEGATIVE_SLOPE = 0.1
N_CONVOLUTIONS_AT_SAME_RESOLUTION = 3
POOLING_LAYERS_COMMON_KWARGS = {
    'pool_size': (2, 2),
    'strides': (2, 2),
    'padding': 'valid',
    'data_format': 'channels_last',
}


class YOLOv3Variant(Model):  # noqa: E501 pylint: disable=abstract-method, too-many-ancestors
    """
    Customized architecture variant of YOLOv3.
    """

    @staticmethod
    def conv_plus_norm_plus_activation(
            n_of_filters: int
    ) -> Sequential:
        """
        Return an instance of an enriched convolutional layer block composed,
        going from inputs to outputs, of:
        - a 2D convolutional layer without any non-linearity;
        - a batch-normalization layer;
        - a leaky rectified linear unit activation function.
        """
        return Sequential(
            [
                Convolution2D(
                    filters=n_of_filters,
                    **CONVOLUTIONAL_LAYERS_COMMON_KWARGS
                ),
                BatchNormalization(),
                LeakyReLU(
                    alpha=LEAKY_RELU_NEGATIVE_SLOPE
                )
            ]
        )

    @staticmethod
    def build_fully_convolutional_yolov3_architecture() -> Model:
        """
        Return an instance of the herein defined YOLOv3 model architecture
        that represents its fully-convolutional part, that is excluding
        bounding boxes' postprocessing (filtering & aggregation).
        """
        inputs = Input(
            shape=(IMAGE_N_ROWS, IMAGE_N_COLUMNS, IMAGE_N_CHANNELS)
        )

        # rescaling the input image to normalize its pixels' intensities:
        outputs = Rescaling(
            scale=INPUT_NORMALIZATION_RESCALING_FACTOR,
            offset=INPUT_NORMALIZATION_OFFSET
        )(inputs)

        # randomly flipping input images horizontally as a form of data
        # augmentation during training:
        outputs = RandomFlip(mode='horizontal', seed=0,)(outputs)
        # NOTE: step carried out here to take advantage of GPU acceleration,
        # unlike as if it were in the training dataset

        current_n_of_filters = FIRST_LAYER_N_CONVOLUTIONAL_FILTERS
        # for each iso-resolution block of convolutional processing ended by a
        # downsampling:
        for _ in range(DOWNSAMPLING_STEPS):
            # for each enriched convolutional layer in the current
            # iso-resolution block:
            for _ in range(N_CONVOLUTIONS_AT_SAME_RESOLUTION):
                outputs = YOLOv3Variant.conv_plus_norm_plus_activation(
                    n_of_filters=current_n_of_filters
                )(outputs)

            # downsampling, ending the iso-resolution block:
            outputs = MaxPooling2D(**POOLING_LAYERS_COMMON_KWARGS)(outputs)

            # updating the number of filters for the next iso-resolution
            # convolutional layers (by doubling them):
            current_n_of_filters *= 2

        # final 1x1 convolutions to predict bounding boxes' attributes from
        # grid anchors' feature maps:
        outputs = Convolution2D(  # pylint: disable=repeated-keyword
            filters=(N_ANCHORS_PER_CELL * N_OUTPUTS_PER_ANCHOR),
            **(
                dict(CONVOLUTIONAL_LAYERS_COMMON_KWARGS, kernel_size=(1, 1))
            )
        )(outputs)
        # NOTE: now bounding boxes' attributes respect the order of meaning
        # (object centered probability, x, y, width, height)

        # asserting the correctness of the current outputs' shape:
        assert (
            outputs.shape[1:] == (
                OUTPUT_GRID_N_ROWS,
                OUTPUT_GRID_N_COLUMNS,
                N_ANCHORS_PER_CELL * N_OUTPUTS_PER_ANCHOR
            )
        ), "Unmatched expectations between outputs and labels shape."

        # reshaping the last output dimension to split anchors and their
        # features along two separate dimensions:
        outputs = Reshape(
            target_shape=(
                OUTPUT_GRID_N_ROWS,
                OUTPUT_GRID_N_COLUMNS,
                N_ANCHORS_PER_CELL,
                N_OUTPUTS_PER_ANCHOR
            )
        )(outputs)

        # applying an element-wise sigmoidal activation function as all 5
        # bounding boxes' output attributes must belong to [0;1] range,
        # since they are either probabilities of a single class (the first
        # attribute) or relative coordinates (the second and third one) or
        # relative sizes (the fourth and fifth one):
        outputs = sigmoid(outputs)
        # NOTE: these sigmoidal computations are carried out here instead of
        # with the loss computation (and during inference) since computing
        # them together with the loss functions's operations would not allow
        # to achieve better gradients during training, since the objectness
        # score needs to undergo the sigmoidal transformation beforehand and
        # the other attributes of the anchors do not udnergo transformations
        # as BCE, that can be fused together with softmax improving gradients'
        # flow, but they all undergo MSE instead, since they represent
        # coordinates and not likelihoods/probabilities

        return Model(
            inputs=inputs,
            outputs=outputs
        )

    def __init__(self) -> None:
        super(YOLOv3Variant, self).__init__()
        self.yolov3_fcn = self.build_fully_convolutional_yolov3_architecture()

    def call(self, inputs: Tensor, training: bool = False) -> Tensor:  # noqa: E501 pylint: disable=arguments-differ
        """
        Forward propagation definition.
        """
        # passing the inputs through the fully-convolutional network:
        fcn_outputs = self.yolov3_fcn(
            inputs=inputs,
            training=training
        )

        # --------------------------------------------------------------------
        # NOTE: this step is not carried out here because the validation loss
        # would not otherwise receive the same inputs as during training when
        # computed at inference time, with training = False:

        # # at inference time:
        # if not training:
        #     # post-processing the bounding boxes outputs to return only the
        #     # final, filtered and aggregated ones:
        #     get_bounding_boxes_from_model_outputs(
        #         model_outputs=fcn_outputs,
        #         from_labels=False
        #     )
        # # at training time:
        # else:
        #     # no post-processing:
        #     outputs = fcn_outputs

        # return outputs
        # --------------------------------------------------------------------

        return fcn_outputs


if __name__ == '__main__':
    model = YOLOv3Variant()

    model.yolov3_fcn.summary()


#### Loss and metrics

In [None]:
"""
Definitions of the employed loss function and metrics.

NOTE on the employed metric, citing the competition explanation:
------------------------------------------------------------------------------
"This competition is evaluated on the F2 Score at different intersection over
union (IoU) thresholds. The F2 metric weights recall more heavily than
precision, as in this case it makes sense to tolerate some false positives
in order to ensure very few starfish are missed.

The metric sweeps over IoU thresholds in the range of 0.3 to 0.8 with a step
size of 0.05, calculating an F2 score at each threshold. For example, at a
threshold of 0.5, a predicted object is considered a "hit" if its IoU with a
ground truth object is at least 0.5.

A true positive is the first (in confidence order, see details below)
submission box in a sample with an IoU greater than the threshold against an
unmatched solution box.

Once all submission boxes have been evaluated, any unmatched submission boxes
are false positives; any unmatched solution boxes are false negatives.

The final F2 Score is calculated as the mean of the F2 scores at each IoU
threshold. Within each IoU threshold the competition metric uses micro
averaging; every true positive, false positive, and false negative has equal
weight compared to each other true positive, false positive, and false
negative.

In your submission, you are also asked to provide a confidence level for each
bounding box. Bounding boxes are evaluated in order of their confidence
levels. This means that bounding boxes with higher confidence will be checked
first for matches against solutions, which determines what boxes are
considered true and false positives."
------------------------------------------------------------------------------
"""


from typing import List, Tuple

# pylint: disable=import-error,no-name-in-module
from tensorflow import (
    expand_dims,
    stack,
    Tensor,
    tile,
    where,
    zeros
)
from tensorflow.keras.losses import binary_crossentropy, mean_absolute_error
from tensorflow.math import (
    add,
    greater_equal,
    logical_not,
    multiply,
    reduce_mean,
    reduce_sum
)
# pylint: enable=import-error,no-name-in-module

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    from common_constants import (
        LOSS_CONTRIBUTE_IMPORTANCE_OF_EMPTY_ANCHORS,
        LOSS_CONTRIBUTE_IMPORTANCE_OF_FULL_ANCHORS,
        OUTPUT_GRID_N_ROWS,
        OUTPUT_GRID_N_COLUMNS,
        N_ANCHORS_PER_CELL,
        N_OUTPUTS_PER_ANCHOR
    )
    from inference import (
        get_bounding_boxes_from_model_outputs,
        convert_batched_bounding_boxes_to_final_format
    )
    from samples_and_labels import (
        MINI_BATCH_SIZE
    )


EPSILON = 1e-7
IOU_THRESHOLDS = [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]
LABELS_FULL_SHAPE = (
    MINI_BATCH_SIZE,
    OUTPUT_GRID_N_ROWS,
    OUTPUT_GRID_N_COLUMNS,
    N_ANCHORS_PER_CELL,
    N_OUTPUTS_PER_ANCHOR
)


def compute_intersection_over_union(
        x_y_w_h_first_box: Tuple[int, int, int, int],
        x_y_w_h_second_box: Tuple[int, int, int, int]
) -> float:
    """
    Compute the intersection over union (IoU) between two boxes represented
    by the two integer sets of {top-left corner x coordinate, top-left corner
    y coordinate, box width, box height} given as inputs.
    """
    # boxes intersection area:
    boxes_intersection_area = (
        (  # x-side intersection length:
            max(
                (
                    min(
                        x_y_w_h_first_box[0] + x_y_w_h_first_box[2],
                        x_y_w_h_second_box[0] + x_y_w_h_second_box[2]
                    ) - max(
                        x_y_w_h_first_box[0],
                        x_y_w_h_second_box[0]
                    )
                ),
                0
            )
        ) * (  # y-side intersection length:
            max(
                (
                    min(
                        x_y_w_h_first_box[1] + x_y_w_h_first_box[3],
                        x_y_w_h_second_box[1] + x_y_w_h_second_box[3]
                    ) - max(
                        x_y_w_h_first_box[1],
                        x_y_w_h_second_box[1]
                    )
                ),
                0
            )
        )
    )

    return (
        boxes_intersection_area / (  # boxes union area:
            (x_y_w_h_first_box[2] * x_y_w_h_first_box[3])  # 1st box area:
            + (x_y_w_h_second_box[2] * x_y_w_h_second_box[3])  # 2nd box area
            - boxes_intersection_area
        )
    )


def compute_mean_f2_scores(
        images_matches: List[Tuple[float, float, float]]
) -> float:
    """
    Return the F2-scores of each mini-batch sample, given their numbers of
    false positives, false negatives and true positives as inputs.
    """
    cumulative_f2_score = 0
    number_of_f2_scores_summed = 0
    for true_positives, false_positives, false_negatives in images_matches:
        number_of_f2_scores_summed += 1
        cumulative_f2_score += (
            true_positives / (
                true_positives + 0.8*false_negatives + 0.2*false_positives
                + EPSILON
            )
        )
    return cumulative_f2_score / number_of_f2_scores_summed


def evaluate_batched_bounding_boxes_matching(
        expected_bounding_boxes: List[Tuple[float, int, int, int, int]],
        predicted_bounding_boxes: List[Tuple[float, int, int, int, int]],
        iou_threshold: float
) -> List[Tuple[int, int, int]]:
    """
    Retun the true positives, false positives, false negatives - according to
    the competition metric definition - for each pair of arrays of predicted
    vs expected bounding boxes in the batched inputs.
    """
    matches = []
    for image_expected_bounding_boxes, image_predicted_bounding_boxes in zip(
            expected_bounding_boxes, predicted_bounding_boxes
    ):
        # sorting the predicted bounding boxes of the considered image by
        # relevance according to the predicted confidence score:
        best_to_worst_predicted_bounding_boxes = sorted(
            image_predicted_bounding_boxes,
            key=lambda bounding_box_attributes: bounding_box_attributes[0],
            reverse=True
        )
        # NOTE: the expected bounding boxes are equally important, no sorting
        # is required

        true_positives = 0
        false_positives = 0

        for predicted_bounding_box in best_to_worst_predicted_bounding_boxes:
            current_bounding_box_matched = True

            for index, expected_bounding_box in enumerate(
                    image_expected_bounding_boxes
            ):
                if (
                        compute_intersection_over_union(
                            x_y_w_h_first_box=predicted_bounding_box[1:],
                            x_y_w_h_second_box=expected_bounding_box[1:]
                        ) >= iou_threshold
                ):
                    current_bounding_box_matched = True
                    image_expected_bounding_boxes.remove(index)
                    true_positives += 1
                    break

            if not current_bounding_box_matched:
                false_positives += 1

        false_negatives = len(image_expected_bounding_boxes)

        matches.append([true_positives, false_positives, false_negatives])

    return matches


def iou_threshold_averaged_f2_score(y_true: Tensor, y_pred: Tensor) -> Tensor:
    """
    Metric used to validate the model goodness - according to the competition
    aim - that represents the F2 score, as they decided to favor recall twice
    as much as precision, avereaged over different IoU thresholds for
    considering bounding boxes as detected or not, with these thresholds
    being: {0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8}.
    """
    # turning the labels representing model outputs into bounding boxes,
    # following the same format that the predictions assume at inference time,
    # when they undergo an additional post-processing, unlike during training:
    (
        labels_bounding_boxes, labels_n_valid_bounding_boxes
    ) = get_bounding_boxes_from_model_outputs(
        model_outputs=y_true,
        from_labels=True
    )
    (
        predictions_bounding_boxes, predictions_n_valid_bounding_boxes
    ) = get_bounding_boxes_from_model_outputs(
        model_outputs=y_pred,
        from_labels=False
    )

    labels_as_lists_of_bounding_boxes = (
        convert_batched_bounding_boxes_to_final_format(
            batched_bounding_boxes=labels_bounding_boxes,
            batched_n_valid_bounding_boxes=labels_n_valid_bounding_boxes,
            predicting_online=False,
            as_strings=False
        )
    )
    predictions_as_lists_of_bounding_boxes = (
        convert_batched_bounding_boxes_to_final_format(
            batched_bounding_boxes=predictions_bounding_boxes,
            batched_n_valid_bounding_boxes=predictions_n_valid_bounding_boxes,
            predicting_online=False,
            as_strings=False
        )
    )

    mean_f2_scores_for_different_iou_thresholds = []

    for threshold in IOU_THRESHOLDS:
        mean_f2_scores_for_different_iou_thresholds.append(
            compute_mean_f2_scores(
                images_matches=evaluate_batched_bounding_boxes_matching(
                    expected_bounding_boxes=labels_as_lists_of_bounding_boxes,
                    predicted_bounding_boxes=(
                        predictions_as_lists_of_bounding_boxes
                    ),
                    iou_threshold=threshold
                )
            )
        )

    return reduce_mean(
        input_tensor=stack(
            values=mean_f2_scores_for_different_iou_thresholds,
            axis=-1
        ),
        axis=-1
    )


def yolov3_variant_loss(y_true: Tensor, y_pred: Tensor) -> Tensor:  # noqa: E501 pylint: disable=too-many-locals
    """
    Loss function minimized to train the defined YOLOv3 variant.
    ---
        Input Shapes:
            - (
                MINI_BATCH_SIZE,
                OUTPUT_GRID_N_ROWS,
                OUTPUT_GRID_N_COLUMNS,
                N_ANCHORS_PER_CELL,
                N_OUTPUTS_PER_ANCHOR
            )
            - (
                MINI_BATCH_SIZE,
                OUTPUT_GRID_N_ROWS,
                OUTPUT_GRID_N_COLUMNS,
                N_ANCHORS_PER_CELL,
                N_OUTPUTS_PER_ANCHOR
            )
    ---
        Output Shape:
            - (MINI_BATCH_SIZE,)
    """
    dummy_zeros_to_get_no_loss = zeros(shape=LABELS_FULL_SHAPE)

    true_anchors_with_objects_flags = tile(
        input=expand_dims(
            input=greater_equal(
                x=y_true[..., 0],
                y=0.5
            ),
            axis=-1
        ),
        multiples=(1, 1, 1, 1, N_OUTPUTS_PER_ANCHOR)
    )  # shape â†’ (samples, rows, columns, anchors, attributes)
    true_anchors_without_objects_flags = logical_not(
        x=true_anchors_with_objects_flags
    )  # shape â†’ (samples, rows, columns, anchors, attributes)

    y_true_full_anchors = expand_dims(
        input=where(
            condition=true_anchors_with_objects_flags,
            x=y_true,
            y=dummy_zeros_to_get_no_loss
        ),
        axis=-1
    )  # shape â†’ (samples, rows, columns, anchors, attributes, 1)
    y_true_empty_anchors = expand_dims(
        input=where(
            condition=true_anchors_without_objects_flags,
            x=y_true,
            y=dummy_zeros_to_get_no_loss
        ),
        axis=-1
    )  # shape â†’ (samples, rows, columns, anchors, attributes, 1)
    y_pred_full_anchors = expand_dims(
        input=where(
            condition=true_anchors_with_objects_flags,
            x=y_pred,
            y=dummy_zeros_to_get_no_loss
        ),
        axis=-1
    )  # shape â†’ (samples, rows, columns, anchors, attributes, 1)
    y_pred_empty_anchors = expand_dims(
        input=where(
            condition=true_anchors_without_objects_flags,
            x=y_pred,
            y=dummy_zeros_to_get_no_loss
        ),
        axis=-1
    )  # shape â†’ (samples, rows, columns, anchors, attributes, 1)

    full_anchors_objectness_loss_per_anchor = binary_crossentropy(
        y_true=y_true_full_anchors[..., 0, :],
        y_pred=y_pred_full_anchors[..., 0, :],
        from_logits=False,
        axis=-1,
    )  # shape â†’ (samples, rows, columns, anchors)

    empty_anchors_objectness_loss_per_anchor = binary_crossentropy(
        y_true=y_true_empty_anchors[..., 0, :],
        y_pred=y_pred_empty_anchors[..., 0, :],
        from_logits=False,
        axis=-1,
    )  # shape â†’ (samples, rows, columns, anchors)

    full_anchors_coordinates_offsets_loss_per_anchor = reduce_sum(
        input_tensor=mean_absolute_error(
            y_true=y_true_full_anchors[..., 1:3, :],
            y_pred=y_pred_full_anchors[..., 1:3, :],
        ),
        axis=-1
    )  # shape â†’ (samples, rows, columns, anchors)
    full_anchors_coordinates_scales_loss_per_anchor = reduce_sum(
        input_tensor=mean_absolute_error(
            y_true=y_true_full_anchors[..., 3:, :],
            y_pred=y_pred_full_anchors[..., 3:, :],
        ),
        axis=-1
    )  # shape â†’ (samples, rows, columns, anchors)

    full_anchors_coordinates_loss_per_anchor = add(
        x=full_anchors_coordinates_offsets_loss_per_anchor,
        y=full_anchors_coordinates_scales_loss_per_anchor
    )  # shape â†’ (samples, rows, columns, anchors)

    full_anchors_mean_loss = reduce_mean(
        input_tensor=add(
            x=full_anchors_objectness_loss_per_anchor,
            y=full_anchors_coordinates_loss_per_anchor
        ),
        axis=[1, 2, 3]
    )  # shape â†’ (samples,)

    empty_anchors_mean_loss = reduce_mean(
        input_tensor=empty_anchors_objectness_loss_per_anchor,
        axis=[1, 2, 3]
    )  # shape â†’ (samples,)

    # NOTE: without weighting, here, after mean reduction, it means that both
    # terms will have the same weight, irrespectively of their imbalance
    return add(
        x=multiply(
            x=full_anchors_mean_loss,
            y=LOSS_CONTRIBUTE_IMPORTANCE_OF_FULL_ANCHORS
        ),
        y=multiply(
            x=empty_anchors_mean_loss,
            y=LOSS_CONTRIBUTE_IMPORTANCE_OF_EMPTY_ANCHORS
        )
    )  # shape â†’ (samples,)


if __name__ == '__main__':
    raise NotImplementedError


#### Training and validation

In [None]:
"""
Execution of the defined model training and validation on the respective
preprocessed dataset splits, optimizing the defined loss and monitoring the
metrics of interest.
"""


from os import getcwd, pardir
from os.path import join as path_join
from typing import List

from matplotlib.pyplot import (
    close,
    figure,
    pause,
    plot,
    savefig,
    show,
    xlabel,
    ylabel
)
# pylint: disable=import-error,no-name-in-module
from tensorflow.data import Dataset
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
# pylint: enable=import-error,no-name-in-module

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    from loss_and_metrics import (
        iou_threshold_averaged_f2_score,
        yolov3_variant_loss
    )
    from model_architecture import YOLOv3Variant
    from samples_and_labels import (
        dataset_of_samples_and_model_outputs,
        split_dataset_into_batched_training_and_validation_sets
    )


LEARNING_RATE = 1e-3
N_EPOCHS = 10

# NOTE: these are 1-based indexes:
EPOCHS_WHEN_VALIDATION_CARRIED_OUT = [
    # 1,
    int(N_EPOCHS / 2),
    # (N_EPOCHS - 1),
    N_EPOCHS
]

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    TRAINING_AND_VALIDATION_STATISTICS_DIR = path_join(
        getcwd(),
        pardir,
        'docs',
        'pictures'
    )
else:
    TRAINING_AND_VALIDATION_STATISTICS_DIR = getcwd()


def plot_and_save_training_and_validation_statistics(
        training_epoch_numbers: List[int],
        training_loss_values: List[float],
        validation_epoch_numbers: List[int],
        validation_loss_values: List[float],
        validation_metric_values: List[float],
) -> None:
    """
    Plot and save the training and validation loss and metric trends with
    epochs.
    """
    figure()

    plot(training_epoch_numbers, training_loss_values)
    plot(validation_epoch_numbers, validation_loss_values, 'ro')

    xlabel(xlabel="Epoch Number")
    ylabel(ylabel="Loss")

    savefig(
        fname=path_join(
            TRAINING_AND_VALIDATION_STATISTICS_DIR,
            'Training and Validation Loss Trends.png'
        ),
        bbox_inches='tight'
    )

    show(block=False)
    pause(interval=5)

    close()

    figure()

    plot(validation_epoch_numbers, validation_metric_values, 'ro')

    xlabel(xlabel="Epoch Number")
    ylabel(ylabel="Metric")

    savefig(
        fname=path_join(
            TRAINING_AND_VALIDATION_STATISTICS_DIR,
            'Training and Validation Metric Trends.png'
        ),
        bbox_inches='tight'
    )

    show(block=False)
    pause(interval=5)

    close()


def train_and_validate_model(
        model_instance: Model,
        training_set: Dataset,
        validation_set: Dataset
) -> None:
    """
    Compile (in TensorFlow's language acception, i.e. associate optimizer,
    loss function and metrics to) the input model instance and alternatively
    training and validating it on the respective input datasets, eventually
    plotting and saving training and validation statistics.
    """
    # the same optimizer is references throughout all the training procedure
    # so as not to lose its internal states/weights, since it's a stateful
    # optimizer whose parameters are updated during training - as well as the
    # model ones:
    optimizer = Adam(learning_rate=LEARNING_RATE)

    # initializing the training and validation statistics:
    epoch_numbers = []
    training_loss_trend = []
    validation_loss_trend = []
    validation_metric_trend = []

    # for each epoch:
    for epoch_number in range(1, (N_EPOCHS + 1)):
        epoch_numbers.append(epoch_number)

        # training:

        # re-compiling the model to avoid the eager metric computation:
        model_instance.compile(
            optimizer=optimizer,
            loss=yolov3_variant_loss,
            # NOTE: the defined metric cannot be run when not in eager mode,
            # so it is not evaluated while training:
            metrics=[]
        )
        # training the model (on the training set):
        trainin_history = model_instance.fit(
            x=training_set,
            epochs=1,
        )
        training_loss_trend.append(trainin_history.history['loss'][0])

        if epoch_number in EPOCHS_WHEN_VALIDATION_CARRIED_OUT:
            # validation:

            # re-compiling the model to allow for the eager metric
            # computation:
            model_instance.compile(
                optimizer=optimizer,
                loss=yolov3_variant_loss,
                metrics=[iou_threshold_averaged_f2_score],
                # NOTE: the defined metric can only be run in eager mode:
                run_eagerly=True
            )
            # validating the model (on the validation set):
            loss_and_metric = model_instance.evaluate(
                x=validation_set
            )
            validation_loss_trend.append(loss_and_metric[0])
            validation_metric_trend.append(loss_and_metric[1])

    plot_and_save_training_and_validation_statistics(
        training_epoch_numbers=epoch_numbers,
        training_loss_values=training_loss_trend,
        validation_epoch_numbers=EPOCHS_WHEN_VALIDATION_CARRIED_OUT,
        validation_loss_values=validation_loss_trend,
        validation_metric_values=validation_metric_trend,
    )


if __name__ == '__main__':
    (
        training_samples_and_labels, validation_samples_and_labels
    ) = split_dataset_into_batched_training_and_validation_sets(
        training_plus_validation_set=dataset_of_samples_and_model_outputs()
    )

    model = YOLOv3Variant()

    train_and_validate_model(
        model_instance=model,
        training_set=training_samples_and_labels.take(4),
        validation_set=validation_samples_and_labels.take(3)
    )


#### Main

In [None]:
"""
Execution of the proposed competition solution.
"""


from random import seed as random_seed

from numpy.random import seed as numpy_seed
# pylint: disable=import-error,no-name-in-module
from tensorflow import convert_to_tensor, expand_dims
from tensorflow.keras import Model
from tensorflow.random import set_seed
# pylint: enable=import-error,no-name-in-module

# only when running everything in a unified notebook on Kaggle's servers:
if __name__ != 'main_by_mattia':
    from common_constants import DATA_TYPE_FOR_INPUTS
    from inference import (
        convert_batched_bounding_boxes_to_final_format,
        get_bounding_boxes_from_model_outputs
    )
    from model_architecture import YOLOv3Variant
    from samples_and_labels import (
        dataset_of_samples_and_model_outputs,
        split_dataset_into_batched_training_and_validation_sets
    )
    from training_and_validation import train_and_validate_model


def fix_seeds_for_reproducible_results() -> None:
    """
    Make the subsequent instructions produce purely deterministic outputs by
    fixing all the relevant seeds.
    """
    random_seed(a=0)
    _ = numpy_seed(seed=0)
    set_seed(seed=0)


def infer_on_test_set_and_submit(trained_model_instance: Model) -> None:
    """
    Predict bounding boxes on all test set images, while submitting
    predictions, in an online fashione: one sample at a time.
    NOTE: the logic is the same as specified in the Kaggle competition's
    rules.
    NOTE: the 'pixel_array's served by the competition API iterator are Numpy
    arrays with shape (720, 1280, 3), thus a single sample at a time is
    served, actually having to predict online.
    """
    import greatbarrierreef  # noqa: E501 pylint: disable=import-outside-toplevel,import-error

    # initialize the environment:
    env = greatbarrierreef.make_env()

    # an iterator which loops over the test set and sample submission:
    iter_test = env.iter_test()

    for (pixel_array, sample_prediction_df) in iter_test:
        sample_prediction_df['annotations'] = (  # make your predictions here
            convert_batched_bounding_boxes_to_final_format(
                *(
                    get_bounding_boxes_from_model_outputs(
                        model_outputs=trained_model_instance(
                            expand_dims(
                                input=convert_to_tensor(
                                    value=pixel_array,
                                    dtype=DATA_TYPE_FOR_INPUTS
                                ),
                                axis=0
                            )
                        ),
                        from_labels=False
                    )
                ),
                predicting_online=True,
                as_strings=True
            )
        )
        env.predict(sample_prediction_df)   # register your predictions


def main() -> None:
    """
    Execute the proposed competition solution.
    """
    fix_seeds_for_reproducible_results()

    (
        training_samples_and_labels, validation_samples_and_labels
    ) = split_dataset_into_batched_training_and_validation_sets(
        training_plus_validation_set=dataset_of_samples_and_model_outputs()
    )

    model = YOLOv3Variant()

    train_and_validate_model(
        model_instance=model,
        training_set=training_samples_and_labels,
        validation_set=validation_samples_and_labels
    )

    infer_on_test_set_and_submit(trained_model_instance=model)


# only when running everything in a unified notebook on Kaggle's servers:
if __name__ == 'main_by_mattia':
    main()
