In [1]:
from utils import detector_utils as detector_utils
import cv2
import tensorflow as tf
import cv2
from keras.models import load_model
import numpy as np
import os





Using TensorFlow backend.


In [2]:
model = load_model('emojinator.h5')
detection_graph, sess = detector_utils.load_inference_graph()


Instructions for updating:
If using Keras pass *_constraint arguments to layers.






In [3]:
def keras_predict(model, image):
    processed = keras_process_image(image)
    pred_probab = model.predict(processed)[0]
    pred_class = list(pred_probab).index(max(pred_probab))
    return max(pred_probab), pred_class


In [4]:
def keras_process_image(img):
    image_x = 50
    image_y = 50
    img = cv2.resize(img, (image_x, image_y))
    img = np.array(img, dtype=np.float32)
    img = np.reshape(img, (-1, image_x, image_y, 1))
    return img

In [5]:
def get_emojis():
    emojis_folder = 'hand_emo/'
    emojis = []
    for emoji in range(len(os.listdir(emojis_folder))):
        print(emoji)
        emojis.append(cv2.imread(emojis_folder + str(emoji) + '.png', -1))
    return emojis



In [6]:
def overlay(image, emoji, x, y, w, h):
    emoji = cv2.resize(emoji, (w, h))
    try:
        image[y:y + h, x:x + w] = blend_transparent(image[y:y + h, x:x + w], emoji)
    except:
        pass
    return image


In [7]:
def blend_transparent(face_img, overlay_t_img):
    # Split out the transparency mask from the colour info
    overlay_img = overlay_t_img[:, :, :3]  # Grab the BRG planes
    overlay_mask = overlay_t_img[:, :, 3:]  # And the alpha plane

    # Again calculate the inverse mask
    background_mask = 255 - overlay_mask

    # Turn the masks into three channel, so we can use them as weights
    overlay_mask = cv2.cvtColor(overlay_mask, cv2.COLOR_GRAY2BGR)
    background_mask = cv2.cvtColor(background_mask, cv2.COLOR_GRAY2BGR)

    # Create a masked out face image, and masked out overlay
    # We convert the images to floating point in range 0.0 - 1.0
    face_part = (face_img * (1 / 255.0)) * (background_mask * (1 / 255.0))
    overlay_part = (overlay_img * (1 / 255.0)) * (overlay_mask * (1 / 255.0))

    # And finally just add them together, and rescale it back to an 8bit integer image
    return np.uint8(cv2.addWeighted(face_part, 255.0, overlay_part, 255.0, 0.0))


In [8]:
def main():
    emojis = get_emojis()

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1080)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

    im_width, im_height = (cap.get(3), cap.get(4))
    # max number of hands we want to detect/track
    num_hands_detect = 1

    cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL)

    while True:
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        ret, image_np = cap.read()
        image_np = cv2.flip(image_np, 1)
        # image_np = cv2.flip(image_np, 1)
        try:
            image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
        except:
            print("Error converting to RGB")

        # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
        # while scores contains the confidence for each of these boxes.
        # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

        boxes, scores = detector_utils.detect_objects(image_np,
                                                      detection_graph, sess)

        # draw bounding boxes on frame
        img = detector_utils.draw_box_on_image(num_hands_detect, 0.4,
                                               scores, boxes, im_width, im_height,
                                               image_np)
        image_np=cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        mask2 = cv2.inRange(hsv, np.array([2, 50, 60]), np.array([25, 150, 255]))
        res = cv2.bitwise_and(img, img, mask=mask2)
        gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
        median = cv2.GaussianBlur(gray, (5, 5), 0)

        kernel_square = np.ones((5, 5), np.uint8)
        dilation = cv2.dilate(median, kernel_square, iterations=2)
        opening = cv2.morphologyEx(dilation, cv2.MORPH_CLOSE, kernel_square)
        ret, thresh = cv2.threshold(opening, 30, 255, cv2.THRESH_BINARY)

        newImage = cv2.resize(thresh, (50, 50))
        pred_probab, pred_class = keras_predict(model, newImage)
        print(pred_class, pred_probab)
        image_np = overlay(image_np, emojis[pred_class], 400, 300, 90, 90)

        cv2.imshow('Single-Threaded Detection',
                   image_np)
        cv2.imshow('img', img)

        if cv2.waitKey(25) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break


In [None]:
keras_predict(model, np.zeros((50, 50, 1), dtype=np.uint8))
if __name__ == '__main__':
    main()

0
1
2
3
4
5
6
7
8
9
10
11
12
6 1.0
6 1.0
9 1.0
11 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
11 1.0
6 1.0
11 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
11 1.0
6 1.0
6 1.0
6 1.0
6 1.0
6 1.0
11 1.0
6 1.0
6 1.0
6 1.0
6 1.0
11 1.0
11 1.0
9 1.0
4 1.0
4 1.0
4 1.0
6 1.0
6 1.0
6 1.0
6 1.0
2 1.0
6 1.0
6 1.0
4 0.99999547
4 0.9896018
4 1.0
6 1.0
6 1.0
3 1.0
3 1.0
3 1.0
3 1.0
3 1.0
3 1.0
11 0.1097384
9 1.0
11 0.1097384
3 0.9999993
11 0.1097384
11 0.1097384
11 0.1097384
11 0.1097384
11 0.1097384
11 0.1097384
3 1.0
3 1.0
3 1.0
1 1.0
1 1.0
3 1.0
3 1.0
3 1.0
11 1.0
9 1.0
9 1.0
9 1.0
4 1.0
4 1.0
4 1.0
4 1.0
4 1.0
4 1.0
4 1.0
4 1.0
4 1.0
9 1.0
9 1.0
4 1.0
4 1.0
9 1.0
9 1.0
9 1.0
9 1.0
9 1.0
9 1.0
5 1.0
5 1.0
5 1.0
5 1.0
7 1.0
11 1.0
11 1.0
11 1.0
11 1.0
5 1.0
5 1.0
11 1.0
11 1.0
3 1.0
9 1.0
3 1.0
3 1.0
11 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0
7 1.0