### Load dataset

In [1]:
import numpy as np
import pickle as pkl
from typing import List
from dataset_objects.bbox import BoundingBox
import os

SHAPE_FILE = "data/shape.pkl"
if os.path.exists(SHAPE_FILE):
    with open(SHAPE_FILE, 'rb') as h:
        shape_array = pkl.load(h)

[cells_width, cells_height, G, F, dataset_size] = shape_array

region_centers = np.zeros(
    shape=(
        cells_width,
        cells_height,
        G
    ),
    dtype=np.float64
)

normalized_region_centers = np.zeros(
    shape=(
        cells_width,
        cells_height,
        G
    ),
    dtype=np.float64
)

input_features = np.zeros(
    shape=(
        dataset_size,
        F
    ),
    dtype=np.float64
)

outputs = np.zeros(
    shape=(dataset_size),
    dtype=np.float64
)

cells:List[List[BoundingBox]]

In [2]:
import os
import pickle as pkl

FOLDER = "data/"
INPUTS_FILE = FOLDER + "inputs.pkl"
OUTPUTS_FILE = FOLDER + "outputs.pkl"
CENTERS_FILE = FOLDER + "centers.pkl"
N_CENTERS_FILE = FOLDER + "normal_centers.pkl"
REGIONS_FILE = FOLDER + "regions.pkl"

# Model in/out
if os.path.exists(INPUTS_FILE):
    with open(INPUTS_FILE, 'rb') as h:
        input_features = pkl.load(h)

if os.path.exists(OUTPUTS_FILE):
    with open(OUTPUTS_FILE, 'rb') as h:
        outputs = pkl.load(h)

# Regions
if os.path.exists(CENTERS_FILE):
    with open(CENTERS_FILE, 'rb') as h:
        region_centers = pkl.load(h)

if os.path.exists(N_CENTERS_FILE):
    with open(N_CENTERS_FILE, 'rb') as h:
        normalized_region_centers = pkl.load(h)

if os.path.exists(REGIONS_FILE):
    with open(REGIONS_FILE, 'rb') as h:
        cells = pkl.load(h)

In [3]:
PERCENT_USED_FOR_TRAINING = 0.8

training_total = round(PERCENT_USED_FOR_TRAINING * dataset_size)

training_x = input_features[:training_total]
training_y = outputs[:training_total]

testing_x = input_features[training_total:]
testing_y = outputs[training_total:]

### Model parameters

In [4]:
LAYER_COUNT = 10                   # Dense layer count
NEURONS_PER_LAYER = 32             # Number of neurons per dense layer
EPOCHS = 10                        # Total epochs
PERCENT_USED_FOR_VALIDATION = 0.15 # Validation %
BATCH_SIZE = 16                    # Batch size

### Model building

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

network_input = layers.Input(shape=np.shape(input_features[0]))
dense = layers.Dense(NEURONS_PER_LAYER, activation="relu")(network_input)
for i in range(LAYER_COUNT - 1):
    dense = layers.Dense(NEURONS_PER_LAYER, activation="relu")(dense)
final_dense = layers.Dense(1, activation="sigmoid")(dense)
final_model = keras.Model(network_input, final_dense)

print(final_model.summary())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5)]               0         
_________________________________________________________________
dense (Dense)                (None, 32)                192       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_4 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_5 (Dense)              (None, 32)                1056  

2022-09-03 17:00:50.767770: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2022-09-03 17:00:50.791445: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2899885000 Hz
2022-09-03 17:00:50.791884: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5654582737d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-09-03 17:00:50.791897: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2022-09-03 17:00:50.791958: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


### Train

In [6]:
final_model.compile(
    optimizer=keras.optimizers.SGD(),
    loss='binary_crossentropy',
    metrics=[
        tf.keras.metrics.MeanSquaredError(name="Mean Squared Error"),
        tf.keras.metrics.MeanAbsoluteError(name="Mean Absolute Error"),
        tf.keras.metrics.LogCoshError(name="Log Cosh"),
        tf.keras.metrics.RootMeanSquaredError(name="Root Mean Squared Error")
    ]
)

fit_history = final_model.fit(
    x=training_x,
    y=training_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=1,
    callbacks=None,
    validation_split=PERCENT_USED_FOR_VALIDATION,
    validation_data=None,
    shuffle='batch',
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_batch_size=None,
    validation_freq=1,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
)

Epoch 1/10
 6225/62173 [==>...........................] - ETA: 5:50 - loss: 0.1666 - Mean Squared Error: 0.0385 - Mean Absolute Error: 0.0798 - Log Cosh: 0.0171 - Root Mean Squared Error: 0.1962

### Test

In [None]:
print(final_model.metrics)
final_model.evaluate(
    x=testing_x,
    y=testing_y,
    batch_size=BATCH_SIZE,
    verbose=1,
    sample_weight=None,
    steps=None,
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
    return_dict=False,
)

In [None]:
VERSION_NAME = "bce_10_10_32_sgd"
FILENAME = VERSION_NAME + '.h5'
final_model.save(FILENAME)

In [None]:
outbound_regions = []
for x in range(cells_width):
    for y in range(cells_height):
        outbound_regions.append(
            {
                "regionId": "",
                "predictor": "",
                "center": {
                    "latitude": region_centers[x][y][0],
                    "longitude": region_centers[x][y][1]
                },
                "normalizedCenter": {
                    "latitude": normalized_region_centers[x][y][0],
                    "longitude": normalized_region_centers[x][y][1]
                },
                "risk": -1,
                "bounds": {
                    "coordinates": [
                        [
                            cells[x][y].south_west.latitude,
                            cells[x][y].south_west.longitude
                        ],
                        [
                            cells[x][y].south_east.latitude,
                            cells[x][y].south_east.longitude
                        ],
                        [
                            cells[x][y].north_east.latitude,
                            cells[x][y].north_east.longitude
                        ],
                        [
                            cells[x][y].north_west.latitude,
                            cells[x][y].north_west.longitude
                        ],
                    ],
                    "type": "Polygon"
                }
            }
        )

In [None]:
import requests
import json
with open(FILENAME, 'rb') as f:
    requests.post(
        url='http://localhost:8080/prediction/model/persist',
        data={"bounds": json.dumps(outbound_regions)},
        files=dict(predictiveModel = f)
    )