### Initially just some playing round with Keras

Input: Image<br>
Initial output: center of hand<br>
Is anchors needed? So the prediction is an offset?<br>


In [1]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Model
from keras import backend as K
from keras.layers import Input, Conv2D, Conv3D, Reshape, Dense, Flatten
from keras.initializers import TruncatedNormal
from keras.regularizers import l2
from keras import optimizers
from tqdm import tqdm_notebook as tqdm

Using TensorFlow backend.


In [2]:
from SqueezeDetHelpers import fire_layer

In [3]:
print(tf.__version__)

1.11.0


- Grid over image size
    - Grid nodes will be anchors
    - Net predicts: Probability of class at anchor, and offset from anchor.
        - In later versions, several offsets will be predicted at each offset.
- The net is fully convolutional, meaning the output must be feature maps.
    - Amount of output filters will then be confidence+x_offset+y_offset
    - filter size will be the size of the anchor grid

In [4]:
num_out = 3

In [5]:
DATA_DIR = r"./data"
ANNOTATION_FILE = r"annot"
annotation = os.path.join(DATA_DIR, ANNOTATION_FILE)
print(annotation)

./data/annot


In [6]:
EPSILON = 1e-16

In [7]:
BATCHSIZE = 10

In [8]:
HEIGHT = 320
WIDTH = 320
CHANNELS = 3

In [9]:
WEIGHT_DECAY = 0.001
CLASSES = 1

In [10]:
ANCHOR_HEIGHT = 78
ANCHOR_WIDTH = 78

In [11]:
num_anchor_nodes = ANCHOR_HEIGHT * ANCHOR_WIDTH

print(f"Out dim: {ANCHOR_HEIGHT}x{ANCHOR_WIDTH}")
print(f"Number of anchor nodes: {num_anchor_nodes}")

Out dim: 78x78
Number of anchor nodes: 6084


In [12]:
def set_anchors():
    
    #anchors = np.zeros((num_anchor_nodes, 2))
    anchors = np.zeros((ANCHOR_HEIGHT, ANCHOR_WIDTH, 2))
    print(f"Number of anchors: {num_anchor_nodes}")
    
    print(f"Anchor dimension: ({ANCHOR_HEIGHT}, {ANCHOR_WIDTH})")
    print(f"Anchor shape: {anchors.shape}")
    
    #xs = np.arange(PIXELS_BETWEEN_ANCHORS, WIDTH, PIXELS_BETWEEN_ANCHORS)
    #ys = np.arange(PIXELS_BETWEEN_ANCHORS, HEIGHT, PIXELS_BETWEEN_ANCHORS)
    
    x_start = WIDTH / (ANCHOR_WIDTH + 1)
    x_end = WIDTH - x_start
    y_start = HEIGHT / (ANCHOR_HEIGHT + 1)
    y_end = HEIGHT - y_start
    xs = np.linspace(x_start, x_end, num=ANCHOR_WIDTH)
    ys = np.linspace(y_start, y_end, num=ANCHOR_HEIGHT)
    
    """
    counter = 0
    for cx in range(len(xs)):
        for cy in range(len(ys)):
            anchors[counter] = [xs[cx], ys[cy]]
            counter += 1
    """
    
    for ix in range(ANCHOR_HEIGHT):
        for iy in range(ANCHOR_WIDTH):
            anchors[ix, iy] = (xs[ix], ys[iy])
    
    return anchors
    
anchs = set_anchors()

Number of anchors: 6084
Anchor dimension: (78, 78)
Anchor shape: (78, 78, 2)


In [13]:
input_layer = Input(shape=(HEIGHT, WIDTH, CHANNELS), name="input")
print(f"input: {input_layer.shape}")

conv1 = Conv2D(name='conv1', filters=32, kernel_size=(3, 3), strides=(2, 2), padding="SAME", activation='relu',
               #use_bias=True,
               #kernel_initializer=TruncatedNormal(stddev=0.001),
               #kernel_regularizer=l2(WEIGHT_DECAY))
               )(input_layer)
print(f"conv1: {conv1.shape}")

conv2 = Conv2D(name='conv2', filters=64, kernel_size=(3, 3), strides=(2, 2), activation=None, padding="SAME",
               #filters=len(ANCHORS),
               #use_bias=True,
               #kernel_initializer=TruncatedNormal(stddev=0.001),
               #kernel_regularizer=l2(WEIGHT_DECAY)
               )(conv1)
print(f"conv2: {conv2.shape}")

preds = Conv2D(name='preds', filters=num_out, kernel_size=(3, 3), strides=(1, 1), activation='sigmoid', padding="VALID",
               #use_bias=True,
               #kernel_initializer=TruncatedNormal(stddev=0.001),
               #kernel_regularizer=l2(WEIGHT_DECAY)
               )(conv2)
print(f"preds: {preds.shape}")

#pred_reshaped = Reshape((-1, 1))(preds)
#print(pred_reshaped.shape)
"""
flat = Flatten()(conv2)

dense1 = Dense(256,
               name='dense1',
               activation='relu')(flat)
print(dense1.shape)

out = Dense(2,
            name='out',
            activation='sigmoid')(flat)
print(out.shape)
"""

input: (?, 320, 320, 3)
conv1: (?, 160, 160, 32)
conv2: (?, 80, 80, 64)
preds: (?, 78, 78, 3)


"\nflat = Flatten()(conv2)\n\ndense1 = Dense(256,\n               name='dense1',\n               activation='relu')(flat)\nprint(dense1.shape)\n\nout = Dense(2,\n            name='out',\n            activation='sigmoid')(flat)\nprint(out.shape)\n"

cross-entropy: q * -log(p) + (1-q) * -log(1-p)

In [14]:
def loss(y_pred, y_true):
    # We are predicting a batchsize x anchorwidth x anchorheight x 3 output.
    c_predictions = y_pred[:, :, :, 0]
    c_labels = y_true[:, :, :, 0]
    
    y_pred_coords = y_pred[:, :, :, 1:]
    y_true_coords = y_true[:, :, :, 1:]
    
    pred_conf = K.sigmoid(c_predictions)
    
    c_loss = K.sum(
        (c_labels * (-K.log(pred_conf + EPSILON))) + (1-c_labels) * (-K.log(1-pred_conf + EPSILON))
                  ) / BATCHSIZE
    
    l2_loss = K.sum(
        K.pow(y_pred_coords - y_true_coords, 2)
                    )
    
    total_loss = c_loss + l2_loss
    
    return total_loss

y_true_test = np.zeros((BATCHSIZE, ANCHOR_HEIGHT, ANCHOR_WIDTH, 3))
y_pred_test = np.zeros((BATCHSIZE, ANCHOR_HEIGHT, ANCHOR_WIDTH, 3))
l = loss(y_pred_test, y_true_test)
print(l)

Tensor("add_3:0", shape=(), dtype=float64)


In [15]:
c_labels = 0
c_predictions = 1
c_loss = (c_labels * (-np.log(c_predictions + EPSILON))) + (1-c_labels) * (-np.log(1-c_predictions + EPSILON))
print(c_loss)

36.841361487904734


In [16]:
#model = Model(inputs=input_layer, outputs=preds)
#model.compile(loss='mse', optimizer='adam')
#model.compile(loss=loss, optimizer='adam')

In [17]:
def load_data():
    
    with open(annotation, 'r') as f:
        lines = f.readlines()
    
    gt = [(None, None)] * len(lines)
    
    for l in lines:
        obj = l.split(',')
        pic_id = int(obj[0].split('.')[0])
        x = int(obj[1])
        y = int(obj[2])
        
        gt[pic_id] = (x, y)

    images = []
    
    for fi in os.listdir(DATA_DIR):
        if not fi.endswith('jpg'):
            continue
        im = cv2.imread(os.path.join(DATA_DIR, fi))
        images.append(im)
    
    return gt, images

labels_old, images_old = load_data()

In [18]:
def closest_anchor_map(x, y, anchor_coords):
    """ Create a anchor_height x anchor_width x 3 map.
        First entry is 1 if the anchor point is closest to true point. Zero otherwise.
        Second is x offset.
        Third is y offset. """
    closest = 10000
    closest_x = None
    closest_y = None
    closest_x_offset = None
    closest_y_offset = None
    
    res = np.zeros((ANCHOR_HEIGHT, ANCHOR_WIDTH, 3))
    for ix in range(ANCHOR_HEIGHT):
        for iy in range(ANCHOR_WIDTH):
            p_x, p_y = anchor_coords[ix, iy]
            dist = np.sqrt( (x - p_x)**2 + (y - p_y)**2 )
            #res[ix, iy, 1:] = (x - p_x, y - p_y)
            if dist < closest:
                closest = dist
                closest_x = ix
                closest_y = iy
                closest_x_offset = x - p_x
                closest_y_offset = y - p_y
    
    #print(f"({closest_x}, {closest_y}) -> {anchor_coords[closest_x, closest_y]}")
    res[closest_x, closest_y, 0] = 1
    res[closest_x, closest_y, 1:] = (closest_x_offset, closest_y_offset)
    
    return res
        
test_map = closest_anchor_map(20, 30, anchs)
print(test_map.shape)
print(np.count_nonzero(test_map[:,:, 0]))
print(np.mean(test_map[:, :, 1]))
print(np.mean(test_map[:, :, 2]))
print(test_map[4, 6])

(78, 78, 3)
1
-4.1611531387577765e-05
0.00027047495401925865
[ 1.         -0.25316456  1.64556962]


In [19]:
def load_data_with_anchors():
    # load images
    # labels will be:
    #   anchor_height x anchor_width x 3
    #     the last 3 entries is: 1 if closest gridpoint to a point. x and y offsets to closest point.
    with open(annotation, 'r') as f:
        lines = f.readlines()
    
    gt = np.zeros((len(lines), ANCHOR_HEIGHT, ANCHOR_WIDTH, 3))
    
    for c, l in enumerate(tqdm(lines)):
        obj = l.split(',')
        pic_id = int(obj[0].split('.')[0])
        x = int(obj[1])
        y = int(obj[2])
        
        gt[c, :, :] = closest_anchor_map(x, y, anchs)
    
    images = []
    
    for fi in tqdm(os.listdir(DATA_DIR)):
        if not fi.endswith('jpg'):
            continue
        im = cv2.imread(os.path.join(DATA_DIR, fi))
        images.append(im)
    
    images = np.array(images)
    
    return gt, images
        
labels, images = load_data_with_anchors()
print(labels.shape)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1001), HTML(value='')))


(1000, 78, 78, 3)


In [20]:
#labels = np.array(labels)#.reshape(1, 100, 2)
#print(labels.shape)
#print(labels[0])

In [21]:
#for c, i in enumerate(images):
#    model.fit(i.reshape(1, 320, 320, 3), labels[c].reshape(1, 2), epochs=1, verbose=1)

In [22]:
#model.fit(images.reshape(-1, 320, 320, 3), labels.reshape(-1, 2), batch_size=10, epochs=10, verbose=1)

In [23]:
#def loss(y_true, y_pred):
#    return K.sqrt(K.sum(K.square(y_true - y_pred)))

In [24]:
model = Model(inputs=input_layer, outputs=preds)
opt = optimizers.Adam(lr=0.0001)
#opt =optimizers.SGD()
model.compile(loss=loss, optimizer=opt)

In [25]:
#model.fit(images.reshape(-1, 320, 320, 3), labels.reshape(-1, 2), batch_size=10, epochs=10, verbose=1)
model.fit(images.reshape(-1, 320, 320, 3),
          labels.reshape(-1, ANCHOR_HEIGHT, ANCHOR_WIDTH, 3),
          batch_size=64,
          epochs=10,
          verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

KeyboardInterrupt: 

In [None]:
res = model.predict(images[1].reshape(1, 320, 320, 3)).reshape(78, 78, 3)
print(np.argmax(labels[1, :, :, 0], axis=0))
print(res.shape)

In [None]:
indicies = np.where(res[:,:,0] == res[:,:,0].max())
print(indicies)
print(res[indicies[0][0], indicies[1][0], 0])
print(res[indicies[0][5], indicies[1][5], 0])

In [None]:
print(np.max(res[:, :, 0]))
print(np.count_nonzero(res[:, :, 0] > 0.9))
print(np.argmax(res[:, :, 0]))

In [None]:
def ohe_encode_gt(num_data, gt):
    gt_ohe = np.zeros((num_data, num_output))
    print(gt_ohe.shape)
    
    lowest_index = [0, 0]
    for c, i in enumerate(gt):
        
        np.sqrt(np.sum(np.square(  )))
        
ohe_encode_gt(len(labels), labels)