# From images to coordinates

### Initial imports

In [1]:
import numpy as np
import cv2
from tqdm import tqdm
from glob import glob
from sklearn.model_selection import train_test_split

import keras
from keras.layers import Input, Conv2D, Flatten, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping
import keras.backend as K

### Loading images

In [2]:
# Dataset dependant parameters
data_path = "data/1ball/"
num_balls = 1
num_systems = 10000
color = False
pix = 32

X1 = []
for i in tqdm(glob(data_path + "Input/image1_*.png")):
    if color:
        im = cv2.imread(i)
    else:
        im = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    im = keras.utils.img_to_array(im)
    X1.append(im)
X1 = np.array(X1, dtype="float32") / 255

X2 = []
for i in tqdm(glob(data_path + "Input/image2_*.png")):
    if color:
        im = cv2.imread(i)
    else:
        im = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
    im = keras.utils.img_to_array(im)
    X2.append(im)
X2 = np.array(X2, dtype="float32") / 255

100%|██████████| 10000/10000 [00:01<00:00, 5964.74it/s]
100%|██████████| 10000/10000 [00:01<00:00, 5319.44it/s]


### Coordinates generation and data split

In [3]:
coordinates = np.zeros((num_systems, 4*num_balls))

for i in tqdm(range(num_systems)):
    non_white_x_t0 = []
    non_white_y_t0 = []
    non_white_x_t1 = []
    non_white_y_t1 = []
    for j in range(pix):
        for k in range(pix):
            if X1[i,j,k] != 1:
                if j not in non_white_y_t0: non_white_y_t0.append(j)
                if k not in non_white_x_t0: non_white_x_t0.append(k)

            if X2[i,j,k] != 1:
                if j not in non_white_y_t1: non_white_y_t1.append(j)
                if k not in non_white_x_t1: non_white_x_t1.append(k)
    
    coordinates[i] = np.mean(non_white_y_t1), np.mean(non_white_x_t1), np.mean(non_white_x_t1) - np.mean(non_white_x_t0), np.mean(non_white_y_t1) - np.mean(non_white_y_t0)

coordinates[:, :2*num_balls] /= pix - 1
coordinates[:, 2*num_balls:] /= 4

# Concatenate both frames
X = np.concatenate((X1, X2), axis=3)
# Train-val-test split (80-10-10)
x_train, x_test, coordinates_train, coordinates_test = train_test_split(X, coordinates, test_size=0.2)
x_val, x_test, coordinates_val, coordinates_test = train_test_split(x_test, coordinates_test, test_size=0.5)

100%|██████████| 10000/10000 [00:38<00:00, 256.90it/s]


### Model definition and training

In [4]:
# Input layer
inputs = Input(shape=(pix,pix,2 + 4*color))

# Hidden layers
x = Conv2D(1, (3,3), activation="relu")(inputs)
x = Conv2D(1, (3,3), activation="relu")(x)
x = Conv2D(6, (28,28), activation="relu")(x)
x = Flatten()(x)
x = Dense(12*num_balls)(x)

# Output layer
outputs = Dense(4*num_balls)(x)

model = Model(inputs, outputs, name="ImgToCoordinates")
model.summary()

# Free up RAM in case the model definition cells were run multiple times
K.clear_session()

Model: "ImgToCoordinates"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 2)]       0         
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 1)         19        
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 1)         10        
                                                                 
 conv2d_2 (Conv2D)           (None, 1, 1, 6)           4710      
                                                                 
 flatten (Flatten)           (None, 6)                 0         
                                                                 
 dense (Dense)               (None, 12)                84        
                                                                 
 dense_1 (Dense)             (None, 4)            

In [5]:
model.compile(loss='mae', optimizer='adam')

model.fit(x_train, coordinates_train,
                epochs=100,
                batch_size=16,
                shuffle=True,
                validation_data=(x_val, coordinates_val),
                callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, min_delta=0.0001)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


<keras.callbacks.History at 0x1b4de185fd0>

### Model results

In [6]:
predictions = model.predict(x_test)

for i in range(5):
    print(str(i) + '\nPrediction: ' + str(predictions[i]) + '\nActual coordinates: ' + str(coordinates_test[i]))

0
Prediction: [6.7942172e-01 5.4686618e-01 5.1492453e-04 2.2205877e-01]
Actual coordinates: [0.67741935 0.5483871  0.5        0.25      ]
1
Prediction: [ 0.41604063  0.32963115 -0.00115312 -0.11232381]
Actual coordinates: [0.41935484 0.35483871 0.         0.5       ]
2
Prediction: [ 0.22514647  0.7461756   0.0020702  -0.31634507]
Actual coordinates: [ 0.22580645  0.74193548  0.         -0.5       ]
3
Prediction: [ 0.3331149   0.38561282 -0.00071788 -0.20929793]
Actual coordinates: [0.32258065 0.38709677 0.25       0.25      ]
4
Prediction: [ 3.9973035e-01  5.4567945e-01  5.1573664e-04 -1.1770392e-01]
Actual coordinates: [ 0.38709677  0.5483871  -0.25        0.25      ]


We are getting pretty good predictions in terms of position coordinates but not so with the predictions of the velocity vectors.