# Predictor for content of bbox
- what is in the box and what are the scores?
- can be used to filter out non reasonable boxes
- also tells us what is moving



## IO
- input 
  - low res image taken from bbox
  - x, y, h, w of bbox
  - distance (if available)
- output, probas for
   - Clear category
   - nothing reasonable
   - mix

In [1]:
import cv2 as cv
import numpy as np

In [2]:
import tensorflow as tf
tf.__version__

'2.7.0'

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (30, 8)

In [36]:
xres = 32
yres = 64

In [37]:
# consider addint residuals: https://keras.io/guides/functional_api/#a-toy-resnet-model

from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, BatchNormalization, Flatten

image_model = tf.keras.Sequential()

image_model.add(Conv2D(filters=32, kernel_size=3, activation='relu')) 
image_model.add(MaxPooling2D(pool_size=2))
image_model.add(BatchNormalization())
image_model.add(Dropout(0.3))

image_model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
image_model.add(MaxPooling2D(pool_size=2))
image_model.add(BatchNormalization())
image_model.add(Dropout(0.3))

image_model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
image_model.add(MaxPooling2D(pool_size=2))
image_model.add(BatchNormalization())
image_model.add(Dropout(0.3))

image_model.add(Flatten())
image_model.add(Dense(256, activation='relu'))
image_model.add(BatchNormalization())
image_model.add(Dropout(0.5))

In [38]:
# https://keras.io/guides/functional_api/

from tensorflow.keras import Model
from tensorflow.keras.layers import Input, concatenate

# dtypes need to match as we concatenate
image_input = Input(shape=(xres ,yres, 1), dtype="float32", name='image_input')
bbox_input = Input(shape=4, dtype="float32", name='bbox_input')
distance_input = Input(shape=4, dtype="float32", name='distance_input')

# todo: other inputs might also need a bit of hidden layers (but maybe not)
x = concatenate([image_model(image_input), bbox_input, distance_input])

category_output = Dense(10, activation='softmax', name='category_output')(x)

model = Model(
    name='composed',
    inputs=[image_input, bbox_input, distance_input],
    outputs=[category_output]
)

model.summary()

Model: "composed"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image_input (InputLayer)       [(None, 32, 64, 1)]  0           []                               
                                                                                                  
 sequential_19 (Sequential)     (None, 256)          488064      ['image_input[0][0]']            
                                                                                                  
 bbox_input (InputLayer)        [(None, 4)]          0           []                               
                                                                                                  
 distance_input (InputLayer)    [(None, 4)]          0           []                               
                                                                                           

In [24]:
tf.keras.utils.plot_model(model, show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
