In [2]:
import tensorflow as tf
from tqdm import tqdm
import pickle
from tensorflow.keras.preprocessing.image import load_img,img_to_array,array_to_img
import cv2
import tensorflow.keras.backend as K
import numpy as np
from tensorflow.keras.losses import categorical_crossentropy,mean_squared_error

In [3]:
class Yolo_Reshape(tf.keras.layers.Layer):
    def __init__(self, target_shape):
        super(Yolo_Reshape, self).__init__()
        self.target_shape = tuple(target_shape)

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'target_shape': self.target_shape
        })
        return config

    def call(self, input):
        # grids 7x7
        S = [self.target_shape[0], self.target_shape[1]]
        # classes
        C = 4
        # no of bounding boxes per grid
        B = 2

        idx1 = S[0] * S[1] * C 
        idx2 = idx1 + S[0] * S[1] * B 

        # class probabilities
        class_probs = K.reshape(input[:, :idx1], (K.shape(input)[0],) + tuple([S[0], S[1], C]))
        class_probs = K.softmax(class_probs)

        #confidence
        confs = K.reshape(input[:, idx1:idx2], (K.shape(input)[0],) + tuple([S[0], S[1], B]))
        confs = K.sigmoid(confs)

        # boxes
        boxes = K.reshape(input[:, idx2:], (K.shape(input)[0],) + tuple([S[0], S[1], B * 4]))
        boxes = K.sigmoid(boxes)

        outputs = K.concatenate([class_probs, confs, boxes])
        return outputs

### Loss Function

In [None]:

def xywh2minmax(xy, wh):
    xy_min = xy - wh / 2
    xy_max = xy + wh / 2

    return xy_min, xy_max


def iou(pred_mins, pred_maxes, true_mins, true_maxes):
    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_wh = pred_maxes - pred_mins
    true_wh = true_maxes - true_mins
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    return iou_scores


def yolo_head(feats):
    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(
        K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_xy = (feats[..., :2] + conv_index) / conv_dims * 448
    box_wh = feats[..., 2:4] * 448

    return box_xy, box_wh


def yolo_loss(y_true, y_pred):
    label_class = y_true[..., :4]  
    label_box = y_true[..., 5:9]  
    response_mask = y_true[..., 4]  
    response_mask = K.expand_dims(response_mask)  

    predict_class = y_pred[..., :4]  
    predict_trust = y_pred[..., 4:6]  
    predict_box = y_pred[..., 6:]  

    _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4])

    label_xy, label_wh = yolo_head(_label_box)  
    label_xy = K.expand_dims(label_xy, 3)  
    label_wh = K.expand_dims(label_wh, 3)  
    label_xy_min, label_xy_max = xywh2minmax(label_xy, label_wh)  

    predict_xy, predict_wh = yolo_head(_predict_box)  
    predict_xy = K.expand_dims(predict_xy, 4)  
    predict_wh = K.expand_dims(predict_wh, 4)  
    predict_xy_min, predict_xy_max = xywh2minmax(predict_xy, predict_wh)  

    iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min, label_xy_max)  
    best_ious = K.max(iou_scores, axis=4) 
    best_box = K.max(best_ious, axis=3, keepdims=True)  

    box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious))  

    no_object_loss = 0.5 * (1 - box_mask * response_mask) * K.square(0 - predict_trust)
    object_loss = box_mask * response_mask * K.square(1 - predict_trust)
    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    class_loss = response_mask * K.square(label_class - predict_class)
    class_loss = K.sum(class_loss)

    _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4])

    label_xy, label_wh = yolo_head(_label_box)  
    predict_xy, predict_wh = yolo_head(_predict_box) 

    box_mask = K.expand_dims(box_mask)
    response_mask = K.expand_dims(response_mask)

    box_loss = 5 * box_mask * response_mask * K.square((label_xy - predict_xy) / 448)
    box_loss += 5 * box_mask * response_mask * K.square((K.sqrt(label_wh) - K.sqrt(predict_wh)) / 448)
    box_loss = K.sum(box_loss)

    loss = confidence_loss + class_loss + box_loss

    return loss

Model Architecture

In [4]:
input_height = 448
input_width = 448
cell_size = 7
num_classes = 4
boxes_per_cell =2

base_model = tf.keras.applications.VGG19(include_top=False, weights='imagenet', input_shape=(input_height, input_width, 3))

for layer in base_model.layers:
    base_model.trainable = False

#for layer in base_model.layers[-2:]:
#    print(layer)
#    base_model.trainable = True

x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
# x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.Dense(512,activation='relu')(x)
x = tf.keras.layers.Dense(7*7*14, activation='sigmoid')(x)
output = Yolo_Reshape((7,7,14))(x)
model = tf.keras.Model(inputs=base_model.input, outputs=output)
# model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


### Label Tensor Processing

In [5]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 448, 448, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 448, 448, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 448, 448, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 224, 224, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 224, 224, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 224, 224, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 112, 112, 128)     0     

Data Pipeline

In [None]:
with open('/content/drive/MyDrive/dataset/train_files.pkl','rb') as f:
    train_file = pickle.load(f)
with open('/content/drive/MyDrive/dataset/test_files.pkl','rb') as f:
    test_file = pickle.load(f)

image_path = '/content/drive/MyDrive/dataset/images/'
label_path = '/content/drive/MyDrive/dataset/labels/'
S = 7
C = 4
B = 2

def load_data(files):
  x = []
  y = []
  for file in tqdm(files,total=len(files)):
    label_file_path = label_path+file+'.txt'
    image_file_path = image_path+file+'.jpg'
    try:
      label_f = open(label_file_path,'r',encoding='utf-8')
      image = load_img(image_file_path,target_size=(input_height,input_width))
    except FileNotFoundError:
      print('no file')
      continue
    x.append(image_file_path)
    y.append(label_file_path)
  return x,y

def read_image(path):
  x = load_img(path,target_size=(input_height,input_width))
  x = img_to_array(x)
  x = x/255
  return x

def get_boxes(label_file):
    boxes = []
    for box in label_file.readlines():
        cls,x,y,w,h = box.split(' ')
        cls = int(cls)
        x,y = float(x),float(y)
        w,h = float(w),float(h)
        boxes.append([cls,x,y,w,h])
    return boxes

def read_label(path):
    label_file = open(path,'r',encoding='utf-8')
    
    boxes = get_boxes(label_file)
    label_matrix = np.zeros((S, S, C + 5 * B))
        
    for c,x,y,w,h in boxes:
        
      i, j = int(S * y), int(S * x)
      x_cell, y_cell = S * x - j, S * y - i
      width_cell, height_cell = w * S, h * S

      if label_matrix[i, j, 4] == 0:
        # Set that there exists an object
        label_matrix[i, j, 4] = 1
        label_matrix[i, j, 5] = 1
                
        # Box coordinates
        label_matrix[i, j, 6:10] = x_cell,y_cell,w,h   #x,y,w,h
        label_matrix[i, j, 10:] = x_cell,y_cell,w,h   #x,y,w,h
        # Set one hot encoding for class_label
        label_matrix[i, j, c] = 1

    return label_matrix

def preprocess(x, y):
  def f(x, y):
    x = x.decode()
    y = y.decode()

    x = read_image(x)
    y = read_label(y)

    return x, y.astype('float64')

  images, masks = tf.numpy_function(f, [x, y], [tf.float32, tf.float64])
  
  images.set_shape([448, 448, 3])
  masks.set_shape([7, 7, 14])

  return images, masks

def tf_dataset(x, y, batch=70):
  dataset = tf.data.Dataset.from_tensor_slices((x, y))
  dataset = dataset.map(preprocess)
  dataset = dataset.batch(batch)
  dataset = dataset.prefetch(1)
  return dataset

In [None]:
val_images, val_masks = load_data(test_file[:30])
val_dataset = tf_dataset(val_images, val_masks)

train_images, train_masks = load_data(train_file[100:250])
train_dataset = tf_dataset(train_images, train_masks)

100%|██████████| 30/30 [00:00<00:00, 98.54it/s]
100%|██████████| 150/150 [00:38<00:00,  3.87it/s]


Training

In [None]:
# filepath = os.path.join('drive','MyDrive','yolo_checkpoint.h5')
# checkpoint = tf.keras.callbacks.ModelCheckpoint(
#     filepath,
#     save_weights_only=True,
#     save_freq="epoch",
# )

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(loss=yolo_loss,optimizer = optimizer)
H = model.fit((train_dataset),batch_size=100,epochs=100,validation_data = (val_dataset))
model.save_weights('/content/drive/MyDrive/Deep Sort/yoloweights.h5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(loss=yolo_loss,optimizer = 'adam')
H = model.fit((train_dataset),batch_size=5,epochs=101,validation_data = (val_dataset))
model.save_weights('/content/drive/MyDrive/yoloweights2.h5')

Epoch 1/101
Epoch 2/101
Epoch 3/101
Epoch 4/101
Epoch 5/101
Epoch 6/101
Epoch 7/101
Epoch 8/101
Epoch 9/101
Epoch 10/101
Epoch 11/101
Epoch 12/101
Epoch 13/101
Epoch 14/101
Epoch 15/101
Epoch 16/101
Epoch 17/101
Epoch 18/101
Epoch 19/101
Epoch 20/101
Epoch 21/101
Epoch 22/101
Epoch 23/101
Epoch 24/101
Epoch 25/101
Epoch 26/101
Epoch 27/101
Epoch 28/101
Epoch 29/101
Epoch 30/101
Epoch 31/101
Epoch 32/101
Epoch 33/101
Epoch 34/101
Epoch 35/101
Epoch 36/101
Epoch 37/101
Epoch 38/101
Epoch 39/101
Epoch 40/101
Epoch 41/101
Epoch 42/101
Epoch 43/101
Epoch 44/101
Epoch 45/101
Epoch 46/101
Epoch 47/101
Epoch 48/101
Epoch 49/101
Epoch 50/101
Epoch 51/101
Epoch 52/101
Epoch 53/101
Epoch 54/101
Epoch 55/101
Epoch 56/101
Epoch 57/101
Epoch 58/101
Epoch 59/101
Epoch 60/101
Epoch 61/101
Epoch 62/101
Epoch 63/101
Epoch 64/101
Epoch 65/101
Epoch 66/101
Epoch 67/101
Epoch 68/101
Epoch 69/101
Epoch 70/101
Epoch 71/101
Epoch 72/101
Epoch 73/101
Epoch 74/101
Epoch 75/101
Epoch 76/101
Epoch 77/101
Epoch 78

In [None]:
for i in range(len(model.layers)):
  model.layers[i].trainable = True

In [None]:
def preprocess_image(image_path): 
  image = load_img(image_path,target_size=(448,448)) 
  image = img_to_array(image) 
  image = image/255 
  return np.expand_dims(image,0)

Decoding Prediction

In [None]:
def decode_pred(y_matrix,thresh=.5):
    i_s = []
    j_s = []
    classes = []
    bboxes = []
    for i in range(7):
        for j in range(7):
            if y_matrix[0][i][j][4] > thresh:
                i_s.append(i)
                j_s.append(j)
    for idx in range(len(i_s)):
        i = i_s[idx]
        j = j_s[idx]
        cls = np.argmax(y_matrix[0][i,j,:4])
        classes.append(cls)
        box = y_matrix[0][i,j,6:10]
        bboxes.append(box)
    return classes,bboxes