In [2]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import show
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
from PIL import ImageFont, ImageDraw, Image
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor
from tensorflow.keras.models import load_model
from yad2k.models.keras_yolo import yolo_head
from yad2k.utils.utils import scale_boxes, read_classes, read_anchors, preprocess_image, draw_boxes, scale_boxes
# from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

%matplotlib inline

In [3]:
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=0.6):
    
    x = 10
    y = tf.constant(100)
    box_scores = box_class_probs * box_confidence
    box_classes = tf.math.argmax(box_scores, axis=-1)
    box_class_scores = tf.math.reduce_max(box_scores, axis=-1)
    filtering_mask = (box_class_scores > threshold)
    
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes,filtering_mask)
    return scores, boxes, classes

In [4]:
# !export TF_CPP_MIN_LOG_LEVEL=2

In [5]:
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [6]:
tf.random.set_seed(10)
box_confidence = tf.random.normal([19, 19, 5, 1], mean=1, stddev=4, seed=1)
# box_confidence[:, :, 2, 0].shape
boxes = tf.random.normal([19, 19, 5, 4], mean=1, stddev=4, seed=1)
box_class_probs = tf.random.normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)

scores, boxes, classes = yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=0.5)
classes.shape


2022-08-28 08:47:22.910689: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorShape([1789])

In [7]:
-6.796674 * 3.2204947

-21.888652594627803

In [8]:
def iou(box1, box2):
    
    (box1_x1, box1_y1, box1_x2, box1_y2) = box1
    (box2_x1, box2_y1, box2_x2, box2_y2) = box2
    
    xi1 = max(box1_x1, box2_x1)
    yi1 = max(box1_y1, box2_y1)
    xi2 = min(box1_x2, box2_x2)
    yi2 = min(box1_y2, box2_y2)
    
    inter_width = max(0, yi2-yi1)
    inter_height = max(0, xi2-xi1)
    inter_area = inter_width * inter_height
    
    box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
    box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
    union_area = box1_area + box2_area - inter_area
    
    iou = inter_area / union_area
    return iou

In [9]:
box1 = (2, 1, 4, 3)
box2 = (1, 2, 3, 4)

print("iou for intersecting boxes = " + str(iou(box1, box2)))

iou for intersecting boxes = 0.14285714285714285


In [10]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    
    
    max_boxes_tensor = tf.Variable(max_boxes, dtype='int32')
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold)
    scores = tf.gather(scores, nms_indices)
    boxes = tf.gather(boxes, nms_indices)
    classes = tf.gather(classes, nms_indices)
    
    return scores, boxes, classes

In [11]:
tf.random.set_seed(10)
scores = tf.random.normal([54,], mean=1, stddev=4, seed = 1)
# print(scores)
boxes = tf.random.normal([54, 4], mean=1, stddev=4, seed = 1)
classes = tf.random.normal([54,], mean=1, stddev=4, seed = 1)
scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)

# print(max_boxes_tensor)
print("scores = " + str(scores))
print("boxes = " + str(boxes))
# print("classes[2] = " + str(classes[2].numpy()))
# print("scores.shape = " + str(scores.numpy().shape))
# print("boxes.shape = " + str(boxes.numpy().shape))
# print("classes.shape = " + str(classes.numpy().shape))

scores = tf.Tensor(
[10.045049   8.779423   8.147684   7.2734113  6.5295115  4.758901
  4.75377    4.2408733  4.213396   3.9896295], shape=(10,), dtype=float32)
boxes = tf.Tensor(
[[ 6.8292465   4.5420675   6.376947    0.08290124]
 [ 2.3163939   3.367772   -4.257925   -0.6737312 ]
 [ 6.0797963   3.743308    1.3914018  -0.34089637]
 [ 1.8424486   1.5118144   0.67746973  0.02823198]
 [-5.2856283   6.826084   12.549065    7.8252673 ]
 [-0.5006654   1.6858735  -4.3660636   1.7608016 ]
 [-0.4836073  -1.5442374  -0.92690074  0.40863764]
 [ 3.4293325  -1.454927    5.490014   -0.57602644]
 [ 1.5415473   2.496262    6.9025564  -2.0676918 ]
 [ 8.448372    1.1546985   4.7752748   6.2906938 ]], shape=(10, 4), dtype=float32)


In [12]:
def yolo_boxes_to_corners(box_xy, box_wh):
    box_mins = box_xy - (box_wh / 2.)
    box_maxs = box_xy + (box_wh / 2.)
    return tf.keras.backend.concatenate([
        box_mins[..., 1:2], #y_min
        box_mins[..., 0:1], #x_min
        box_maxs[..., 1:2], #y_max
        box_mins[..., 0:1], #y_max
    ])

In [13]:
# c = np.array([5.3776083 , 0.77948886])
# b = np.array([5.3776083 , 0.77948886])
# a = yolo_boxes_to_corners(c, b)
# a


In [14]:
def yolo_eval(yolo_outputs, image_shape=(720, 1280), max_boxes=10, score_threshold=0.6, iou_threshold=.5):
    
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    scores, boxes, classes = yolo_filter_boxes(boxes, box_confidence, box_class_probs, score_threshold)
    boxes = scale_boxes(boxes, image_shape)
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
    
    return scores, boxes, classes

In [15]:
tf.random.set_seed(10)
yolo_outputs = (tf.random.normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
                tf.random.normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
                tf.random.normal([19, 19, 5, 1], mean=1, stddev=4, seed = 1),
                tf.random.normal([19, 19, 5, 80], mean=1, stddev=4, seed = 1))
scores, boxes, classes = yolo_eval(yolo_outputs)
print("scores[2] = " + str(scores[2].numpy()))
print("boxes[2] = " + str(boxes[2].numpy()))
print("classes[2] = " + str(classes[2].numpy()))
print("scores.shape = " + str(scores.numpy().shape))
print("boxes.shape = " + str(boxes.numpy().shape))
print("classes.shape = " + str(classes.numpy().shape))

scores[2] = 171.60194
boxes[2] = [-1240.3483 -3212.5881  -645.78   -3212.5881]
classes[2] = 16
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


In [16]:
from keras import backend as K
sess = K.get_session()

In [17]:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.) 


In [18]:
yolo_model = load_model("model_data/yolo.h5", compile=False)

In [19]:
yolo_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d_1 (Conv2D)              (None, None, None,   864         ['input_1[0][0]']                
                                32)                                                               
                                                                                                  
 batch_normalization_1 (BatchNo  (None, None, None,   128        ['conv2d_1[0][0]']               
 rmalization)                   32)                                                         

                                                                                                  
 batch_normalization_8 (BatchNo  (None, None, None,   256        ['conv2d_8[0][0]']               
 rmalization)                   64)                                                               
                                                                                                  
 leaky_re_lu_8 (LeakyReLU)      (None, None, None,   0           ['batch_normalization_8[0][0]']  
                                64)                                                               
                                                                                                  
 conv2d_9 (Conv2D)              (None, None, None,   73728       ['leaky_re_lu_8[0][0]']          
                                128)                                                              
                                                                                                  
 batch_nor

                                256)                                                              
                                                                                                  
 batch_normalization_16 (BatchN  (None, None, None,   1024       ['conv2d_16[0][0]']              
 ormalization)                  256)                                                              
                                                                                                  
 leaky_re_lu_16 (LeakyReLU)     (None, None, None,   0           ['batch_normalization_16[0][0]'] 
                                256)                                                              
                                                                                                  
 add_6 (Add)                    (None, None, None,   0           ['add_5[0][0]',                  
                                256)                              'leaky_re_lu_16[0][0]']         
          

 conv2d_24 (Conv2D)             (None, None, None,   294912      ['leaky_re_lu_23[0][0]']         
                                256)                                                              
                                                                                                  
 batch_normalization_24 (BatchN  (None, None, None,   1024       ['conv2d_24[0][0]']              
 ormalization)                  256)                                                              
                                                                                                  
 leaky_re_lu_24 (LeakyReLU)     (None, None, None,   0           ['batch_normalization_24[0][0]'] 
                                256)                                                              
                                                                                                  
 add_10 (Add)                   (None, None, None,   0           ['add_9[0][0]',                  
          

                                                                                                  
 add_13 (Add)                   (None, None, None,   0           ['add_12[0][0]',                 
                                512)                              'leaky_re_lu_31[0][0]']         
                                                                                                  
 conv2d_32 (Conv2D)             (None, None, None,   131072      ['add_13[0][0]']                 
                                256)                                                              
                                                                                                  
 batch_normalization_32 (BatchN  (None, None, None,   1024       ['conv2d_32[0][0]']              
 ormalization)                  256)                                                              
                                                                                                  
 leaky_re_

                                512)                                                              
                                                                                                  
 add_17 (Add)                   (None, None, None,   0           ['add_16[0][0]',                 
                                512)                              'leaky_re_lu_39[0][0]']         
                                                                                                  
 conv2d_40 (Conv2D)             (None, None, None,   131072      ['add_17[0][0]']                 
                                256)                                                              
                                                                                                  
 batch_normalization_40 (BatchN  (None, None, None,   1024       ['conv2d_40[0][0]']              
 ormalization)                  256)                                                              
          

 batch_normalization_47 (BatchN  (None, None, None,   2048       ['conv2d_47[0][0]']              
 ormalization)                  512)                                                              
                                                                                                  
 leaky_re_lu_47 (LeakyReLU)     (None, None, None,   0           ['batch_normalization_47[0][0]'] 
                                512)                                                              
                                                                                                  
 conv2d_48 (Conv2D)             (None, None, None,   4718592     ['leaky_re_lu_47[0][0]']         
                                1024)                                                             
                                                                                                  
 batch_normalization_48 (BatchN  (None, None, None,   4096       ['conv2d_48[0][0]']              
 ormalizat

                                                                                                  
 leaky_re_lu_55 (LeakyReLU)     (None, None, None,   0           ['batch_normalization_55[0][0]'] 
                                512)                                                              
                                                                                                  
 conv2d_56 (Conv2D)             (None, None, None,   4718592     ['leaky_re_lu_55[0][0]']         
                                1024)                                                             
                                                                                                  
 batch_normalization_56 (BatchN  (None, None, None,   4096       ['conv2d_56[0][0]']              
 ormalization)                  1024)                                                             
                                                                                                  
 leaky_re_

                                128)                                                              
                                                                                                  
 batch_normalization_66 (BatchN  (None, None, None,   512        ['conv2d_68[0][0]']              
 ormalization)                  128)                                                              
                                                                                                  
 leaky_re_lu_66 (LeakyReLU)     (None, None, None,   0           ['batch_normalization_66[0][0]'] 
                                128)                                                              
                                                                                                  
 up_sampling2d_2 (UpSampling2D)  (None, None, None,   0          ['leaky_re_lu_66[0][0]']         
                                128)                                                              
          

 leaky_re_lu_72 (LeakyReLU)     (None, None, None,   0           ['batch_normalization_72[0][0]'] 
                                256)                                                              
                                                                                                  
 conv2d_59 (Conv2D)             (None, None, None,   261375      ['leaky_re_lu_58[0][0]']         
                                255)                                                              
                                                                                                  
 conv2d_67 (Conv2D)             (None, None, None,   130815      ['leaky_re_lu_65[0][0]']         
                                255)                                                              
                                                                                                  
 conv2d_75 (Conv2D)             (None, None, None,   65535       ['leaky_re_lu_72[0][0]']         
          

In [20]:
yolo_model.output[0]

<KerasTensor: shape=(None, None, None, 255) dtype=float32 (created by layer 'conv2d_59')>

In [21]:
yolo_outputs = yolo_head(yolo_model.output[0], anchors, len(class_names))

In [22]:
import numpy as np
a=np.array([[1, 3, 3],[1, 2, 2]])
a[:, :1]

array([[1],
       [1]])

In [23]:
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

In [24]:
def predict(sess, image_file):
    image, image_data = preprocess_image('images/', image_file, model_image_size=(608, 608))
    out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})
    print('Found {} boxes for {}'.format(len(out_boxes), image_file))
    colors = generate_colors(class_names)
    draw_boxes(image, out_score, out_boxes, out_classes, class_names, colors)
    image.save(os.path.join("out", image_file))
    output_image = scipy.misc.imread(os.path.join("out", image_file))
    imshow(output_image)
    
    return out_scores, out_boxes, out_classes

Python *args
As in the above example we are not sure about the number of arguments that can be passed to a function. Python has *args which allow us to pass the variable number of non keyword arguments to function.

Python **kwargs
Python passes variable length non keyword argument to function using *args but we cannot use this to pass keyword argument. For this problem Python has got a solution called **kwargs, it allows us to pass the variable length of keyword arguments to the function.

In [25]:
out_scores, out_boxes, out_classes = predict(sess, "test.jpg")

TypeError: preprocess_image() got multiple values for argument 'model_image_size'