In [0]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
from keras.models import Model
from keras import Input
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import *
from sklearn.metrics import classification_report, confusion_matrix
from keras.callbacks import EarlyStopping, ModelCheckpoint

import random
import glob
import cv2

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Set the correct file path 
images_path = "/Render/"
# Value used when the window is missing
nonVector = -1000

# Load data and build model

**Load photos**

In [0]:
all_data_left = []
for i in glob.glob(images_path + '**/left*.png', recursive=True):
    all_data_left.append(cv2.cvtColor(cv2.imread(i), cv2.COLOR_BGR2GRAY))  

all_data_left = np.stack(all_data_left)

print(all_data_left.shape)

(628, 720, 1280)


In [0]:
all_data_right = []
for i in glob.glob(images_path + '**/right*.png', recursive=True):
    all_data_right.append(cv2.cvtColor(cv2.imread(i), cv2.COLOR_BGR2GRAY))  

all_data_right = np.stack(all_data_right)

print(all_data_right.shape)

(628, 720, 1280)


**Load Json with photos parameters**

In [0]:
target_windows = []
target_cam = []
maxSize = 0

In [0]:
import json

for file in glob.glob(images_path + '**/out.json', recursive=True):
  with open(file) as json_file:
    data = json_file.read()
    data = data.replace("\'", "\"")
    data = data.replace("][", "],[")
    json_data = json.loads(data)
    for record in json_data:
      single_target = [];
      camera = [record['tilt'],record['roll']]
      target_cam.append(np.asarray(camera))
      for window in record['windows']:
        single_target.append(window)
      target_windows.append(single_target)

for target in target_windows:
  if (len(target) > maxSize):
    maxSize = len(target)

for target in target_windows:
  if (len(target) != maxSize):
    for i in range(len(target), maxSize):
      target.append([nonVector,nonVector,nonVector])

In [0]:
target_windows = np.stack(target_windows)
print(target_windows.shape)

target_cam = np.stack(target_cam)
print(target_cam.shape)

(628, 144, 3)
(628, 2)


**Shuffle data**

In [0]:
shuffle_keys = []
for i in range (target_cam.shape[0]):
  shuffle_keys.append(i);

random.shuffle(shuffle_keys)

buf_data_left = all_data_left.copy()
buf_data_right = all_data_right.copy()
buf_target_win = target_windows.copy()
buf_target_cam = target_cam.copy()

for i in range (all_data_left.shape[0]):
  all_data_left[i] = buf_data_left[shuffle_keys[i]]
  all_data_right[i] = buf_data_right[shuffle_keys[i]]
  target_windows[i] = buf_target_win[shuffle_keys[i]]
  target_cam[i] = buf_target_cam[shuffle_keys[i]]

**Division into test and training sets**

In [0]:
train_data_left = all_data_left[0:int(len(all_data_left)*0.7)]
print(train_data_left.shape)

test_data_left = all_data_left[int(len(all_data_left)*0.7)-1:len(all_data_left)]
print(test_data_left.shape)

train_data_right = all_data_right[0:int(len(all_data_right)*0.7)]
print(train_data_right.shape)

test_data_right = all_data_right[int(len(all_data_right)*0.7)-1:len(all_data_right)]
print(test_data_right.shape)

train_target = target_windows[0:int(len(target_windows)*0.7)]
print(train_target.shape)

test_target = target_windows[int(len(target_windows)*0.7)-1:len(target_windows)]
print(test_target.shape)

train_target_cam = target_cam[0:int(len(target_cam)*0.7)]
print(train_target_cam.shape)

test_target_cam = target_cam[int(len(target_cam)*0.7)-1:len(target_cam)]
print(test_target_cam.shape)

(439, 720, 1280)
(190, 720, 1280)
(439, 720, 1280)
(190, 720, 1280)
(439, 144, 3)
(190, 144, 3)
(439, 2)
(190, 2)


**Huber and custom loss function**

In [0]:
import tensorflow as tf

def huber_loss(y_true, y_pred, clip_delta):
  error = y_true - y_pred
  cond  = tf.keras.backend.abs(error) < clip_delta

  squared_loss = 0.5 * tf.keras.backend.square(error)
  linear_loss  = clip_delta * (tf.keras.backend.abs(error) - 0.5 * clip_delta)

  return tf.where(cond, squared_loss, linear_loss)

'''
 ' Same as above but returns the mean loss.
'''
def huber_loss_mean(y_true, y_pred, clip_delta=500.0):
  return tf.keras.backend.mean(huber_loss(y_true, y_pred, clip_delta))

In [0]:
def my_loss(y_true, y_pred):
  error = y_true - y_pred
  cond  = y_true == nonVector
  cond2 = y_pred < nonVector/2

  squared_loss = 0.5 * tf.keras.backend.square(error)
  tmp = tf.where(cond, 0.0, squared_loss)

  return tf.where(cond2, tmp, squared_loss)

'''
 ' Same as above but returns the mean loss.
'''
def my_loss_mean(y_true, y_pred):
  return tf.keras.backend.mean(my_loss(y_true, y_pred))

**Build and fit model**

Testing with 3 loss functions Huber, MSE and custom 

In [0]:
def build_model():
  inp1 = Input((720,1280))
  inp2 = Input((720,1280))

  x = concatenate([inp1, inp2],axis=-1)
  x = Conv1D(filters=32, kernel_size=3, activation='relu')(x)
  x = MaxPooling1D(pool_size=2)(x)
  x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
  x = MaxPooling1D(pool_size=2)(x)
  x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
  x = MaxPooling1D(pool_size=2)(x)
  x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
  x = MaxPooling1D(pool_size=2)(x)

  # first output cam
  out1 = Flatten()(x)
  out1 = Dense(3000)(out1)
  out1 = Dense(2)(out1)

  # second output windows
  out2 = Flatten()(x)
  out2 = Dense(3000)(out2)
  out2 = Dense(maxSize*3)(out2)
  out2 = Reshape((maxSize, 3))(out2)

  model = Model([inp1,inp2], [out1, out2])
  return model

model = build_model();
# model.compile(optimizer='rmsprop', loss=huber_loss_mean, metrics=['mae'])
# model.compile(optimizer='rmsprop', loss=my_loss_mean, metrics=['mae'])
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 720, 1280)    0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 720, 1280)    0                                            
__________________________________________________________________________________________________
concatenate_3 (Concatenate)     (None, 720, 2560)    0           input_5[0][0]                    
                                                                 input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_9 (Conv1D)               (None, 718, 32)      245792      concatenate_3[0][0]        

In [0]:
es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=5, min_delta=0.0001) 
mc = ModelCheckpoint('best_model.hdf5', monitor='loss', verbose=1, save_best_only=True, mode='min')

In [0]:
history=model.fit([train_data_left, train_data_right], [train_target_cam, train_target], epochs=50, callbacks=[es,mc], batch_size=1)

Epoch 1/50

Epoch 00001: loss improved from inf to 2705969.72288, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 2/50

Epoch 00002: loss improved from 2705969.72288 to 437308.24148, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 3/50

Epoch 00003: loss improved from 437308.24148 to 374395.83972, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 4/50

Epoch 00004: loss improved from 374395.83972 to 354170.10926, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 5/50

Epoch 00005: loss improved from 354170.10926 to 209018.78714, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 6/50

Epoch 00006: loss improved from 209018.78714 to 151118.34919, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 7/50

Epoch 00007: loss improved from 151118.34919 to 94986.74045, saving model to /content/drive/My Drive/best_model.hdf5
Epoch 8/50

Epoch 00008: loss improved from 94986.74045 to 79979.93548, saving model to /content/d

# Load model and analyze results

In [0]:
import keras.losses
keras.losses.huber_loss_mean = huber_loss_mean
keras.losses.my_loss_mean = my_loss_mean

In [0]:
from keras.models import load_model
model=load_model('best_model.hdf5')

In [0]:
[pred_target_cam,pred_target_windows] = model.predict([test_data_left,test_data_right])

In [0]:
loss, cam_loss, windows_loss, cam_metrics, windows_metrics  = model.evaluate([test_data_left,test_data_right], [test_target_cam, test_target])
print("loss :", loss)
print("cam_loss: ",cam_loss)
print("windows_loss: ", windows_loss)
print("cam_metrics: ", cam_metrics)
print("windows_metrics: ", windows_metrics)

loss : 77488.51332236842
cam_loss:  152.37286376953125
windows_loss:  77309.5546875
cam_metrics:  9.623960494995117
windows_metrics:  197.10873413085938


**Compare predict result with real**

In [0]:
print(pred_target_windows[2])
print(test_target[2])

[[  -10.515834     64.79555      71.478165 ]
 [  -54.41687      50.36494     -32.827316 ]
 [  -22.858803      4.8844266   -13.476168 ]
 [   -7.1158895   103.77395      22.437204 ]
 [  -87.36763      65.61594     -33.321327 ]
 [  -53.50479      23.25414      34.047752 ]
 [  -51.27337      89.25115      67.40274  ]
 [   26.89279     -12.319849     87.603714 ]
 [   64.358604     56.84823      37.91698  ]
 [   -2.9155014    12.66102      48.93273  ]
 [   31.673723     71.94319      -2.1186757]
 [ -113.99135      24.21956      -5.668258 ]
 [   -4.327773     76.17695      69.20424  ]
 [   35.575897    -41.41187       2.5408993]
 [   64.381584    128.70493      37.147488 ]
 [   93.36191      61.0635       32.88103  ]
 [    2.5550387   -19.793457    -86.08118  ]
 [   90.66158     -48.259876     43.168495 ]
 [   30.043365     41.95741     -25.178946 ]
 [  -91.85182      28.627132     21.682302 ]
 [ -890.6742     -886.5179     -869.7052   ]
 [ -880.9566     -885.84705    -858.17883  ]
 [ -885.69

In [0]:
print(pred_target_cam[55])
print(test_target_cam[55])

[  8.598923 -15.868231]
[ 1.53193271 -0.08726646]


In [0]:
[pred_target_cam,pred_target_windows] = model.predict([test_data_left,test_data_right])

for photo in range(len(pred_target_windows)):
  for window in range(len(pred_target_windows[photo])):
    for i in range(len(pred_target_windows[photo][window])):
      if(pred_target_windows[photo][window][i] < nonVector/3 ):
        pred_target_windows[photo][window][i] = nonVector

In [0]:
print(pred_target_windows[80])
print(test_target[80])

[[  -19.700275     24.04827      -8.77463  ]
 [   35.56099     103.716255    -14.366431 ]
 [    2.1803617    77.65384      37.39055  ]
 [  -76.0948       42.486557     23.201265 ]
 [    3.2261126    23.072895     19.079851 ]
 [   10.404868     67.888145     43.949615 ]
 [  -11.841255     37.873867     49.153297 ]
 [    2.3465698    87.359726     23.567162 ]
 [    6.116331     76.295         4.3454976]
 [  -27.171782     67.45649      -4.2823515]
 [   52.358414     92.91413      35.108612 ]
 [  -34.759552     85.58761      65.87465  ]
 [  -53.34285      17.753235     27.231865 ]
 [  -64.0562      -38.073917     47.79392  ]
 [   29.602238    161.32564      -7.1391864]
 [   31.782053     38.319866     36.245995 ]
 [    9.716111    100.68621     -69.691216 ]
 [   34.515858     15.955771     72.232605 ]
 [  -42.34761      84.776596   -116.26353  ]
 [  -32.76517      80.2694      -47.89468  ]
 [-1000.        -1000.        -1000.       ]
 [-1000.        -1000.        -1000.       ]
 [-1000.  

Windows counting accuracy

In [0]:

truePositive = 0
falsePositive = 0
trueNegative = 0
falseNegative = 0

for photo in range(len(pred_target_windows)):
  for window in range(len(pred_target_windows[photo])):
    for i in range(len(pred_target_windows[photo][window])):
      if(pred_target_windows[photo][window][i] == nonVector == test_target[photo][window][i]):
        trueNegative += 1
      elif(pred_target_windows[photo][window][i] != nonVector and test_target[photo][window][i] != nonVector):
        truePositive += 1
      elif(pred_target_windows[photo][window][i] == nonVector and test_target[photo][window][i] != nonVector):
        falseNegative += 1
      else:
        falsePositive += 1

print("True Positive: ", truePositive)
print("False Positive: ", falsePositive)
print("True Negative: ", trueNegative)
print("False Negative: ", falseNegative)

print("Acc: ", (truePositive+trueNegative)/(truePositive+falsePositive+trueNegative+falseNegative))


True Positive:  18721
False Positive:  1364
True Negative:  59608
False Negative:  2387
Acc:  0.9543006822612086
