In [1]:
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
import numpy as np
import cv2

In [2]:
vgg = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

In [3]:
flatten = vgg.output
flatten = Flatten()(flatten)

bboxHead = Dense(128, activation="relu")(flatten)
bboxHead = Dense(64, activation="relu")(bboxHead)
bboxHead = Dense(32, activation="relu")(bboxHead)
bboxHead = Dense(4, activation="sigmoid", name="bounding_box")(bboxHead)

softmaxHead = Dense(512, activation="relu")(flatten)
softmaxHead = Dropout(0.5)(softmaxHead)
softmaxHead = Dense(512, activation="relu")(softmaxHead)
softmaxHead = Dropout(0.5)(softmaxHead)
softmaxHead = Dense(8, activation="softmax", name="class_label")(softmaxHead)

model = Model(inputs=vgg.input, outputs=(softmaxHead, bboxHead))

In [4]:
model.load_weights('model.weights.h5')

In [5]:
def draw_bbox(img, boxPreds, lbl):
    image = np.copy(img)
    (h, w) = image.shape[:2]

    # predict the bounding box of the object along with the class label
    (X, Y, dX, dY) = boxPreds[0]

    # scale the predicted bounding box coordinates based on the image
    # dimensions
    X = X * w
    Y = Y * h
    dX = dX * w
    dY = dY * h
    startX = int(X - dX/2)
    startY = int(Y - dY/2)
    endX = int(X + dX/2)
    endY = int(Y + dY/2)
    # draw the predicted bounding box and class label on the image
    y = startY - 10 if startY - 10 > 10 else startY + 10
    cv2.putText(image, lbl, (startX, y), cv2.FONT_HERSHEY_SIMPLEX,
        0.65, (0, 0, 0), 2)
    cv2.rectangle(image, (startX, startY), (endX, endY),
        (0, 0, 0), 2)

    return image

In [6]:
decode_predictions = {0:'zero', 1:'dislike', 2:'five', 3:'exactly', 4:'two', 5:'three', 6:'left', 7:'like'}

In [None]:
cap = cv2.VideoCapture(0)
while True:
    _, frame1 = cap.read()
    frame = cv2.resize(frame1, (224, 224)) / 255.0
    inputs = np.expand_dims(frame[...,::-1], axis=0)
    lbl, box = model.predict(inputs)
    lbl = decode_predictions[np.argmax(lbl)]
    img = draw_bbox(frame1, box, lbl)
    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == 13: #enter key
        break
cv2.destroyAllWindows()
cap.release()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
