In [3]:
import sys # system functions (ie. exiting the program)
import os # operating system functions (ie. path building on Windows vs. MacOs)
import time # for time operations
import uuid # for generating unique file names
import math # math functions

from IPython.display import display as ipydisplay, Image, clear_output, HTML # for interacting with the notebook better

import numpy as np # matrix operations (ie. difference between two matricies)
import cv2 # (OpenCV) computer vision functions (ie. tracking)

import matplotlib.pyplot as plt # (optional) for plotting and showing images inline
%matplotlib inline

import json

import keras # high level api to tensorflow (or theano, CNTK, etc.) and useful image preprocessing
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.models import Sequential, load_model, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint

In [4]:
#Predefined Pathing to save the model and training history

DATA_FOLDER = os.path.join('images')
MODEL_PATH = os.path.join('model')
MODEL_FILE = os.path.join(MODEL_PATH, 'hand_recog_model.hdf5')
MODEL_HISTORY = os.path.join(MODEL_PATH, 'model_history.txt')
OUTPUT_TEXT = os.path.join('output', 'outputtext.txt')

In [5]:
# Set up tracker.
def setup_tracker(ttype):
    tracker_types = {
        'KCF': cv2.TrackerBoosting_create,
        'MEDIANFLOW': cv2.TrackerMedianFlow_create,
        'CSRT': cv2.TrackerCSRT_create,
        'MOSSE': cv2.TrackerMOSSE_create
    }
    tracker = tracker_types[ttype]()
    return tracker

In [9]:
#Code for Data Collection
classes = {
    0: 'A',
    1: 'B',
    2: 'C',
    3: 'D',
    4: 'E',
    5: 'F',
    6: 'G',
    7: 'H',
    8: 'I',
}

#Modify Storage Paths, images will be saved in the following path: DATA/POSE
POSE = 'I'
#DATA = os.path.join('training_data')
DATA = os.path.join('validation_data')

# Begin video cpature
video = cv2.VideoCapture(0)
if not video.isOpened():
    print("ERROR: Could not open webcam")
    sys.exit()


# Read first frame
success, frame = video.read()
if not success:
    print("ERROR: Cannot read video")
    sys.exit()
# Use the first frame as an initial background frame
bg = frame.copy()


# Kernel for image processing
kernel = np.ones((2,2),np.uint8)


# Initialize Tracker's Position
# Bounding box -> (TopRightX, TopRightY, Width, Height)
bbox_initial = (60, 60, 170, 170)
bbox = bbox_initial
# Tracking status, -1 for not tracking, 0 for unsuccessful tracking, 1 for successful tracking
tracking = -1


# Text display positions
positions = {
    'hand_pose': (15, 40),
    'fps': (15, 20)
}


# Image counter for convinience with naming dataset
img_count = 0

# Begin video loop
while True:
    # Read a new frame
    success, frame = video.read()
    display = frame.copy()
    if not success:
        break

        
    #To limit and keep image datasets amount similar
    if img_count > 50:
        break
        
    # Start timer
    timer = cv2.getTickCount()

    # Process video feed from the webcam to be used for training/validation sets later
    
    # Get absolute difference between first frame (background) and current frame (frame)
    diff = cv2.absdiff(bg, frame)
    mask = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
    
    # Morphological Transformation applied to the mask
    gradient = cv2.morphologyEx(mask, cv2.MORPH_GRADIENT, kernel)
    
    # Threshold applied to add more defining outline
    th, thresh = cv2.threshold(gradient, 10, 255, cv2.THRESH_BINARY)
    
    # If tracking is active, update the tracker
    if tracking != -1:
        tracking, bbox = tracker.update(foreground)
        tracking = int(tracking)
        
        
    # Use numpy array indexing to crop the foreground frame
    hand_crop = thresh[int(bbox[1]):int(bbox[1]+bbox[3]), int(bbox[0]):int(bbox[0]+bbox[2])]
    
        
    # Draw bounding box
    p1 = (int(bbox[0]), int(bbox[1]))
    p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
    cv2.rectangle(thresh, p1, p2, (255, 0, 0), 2, 1)
    cv2.rectangle(display, p1, p2, (255, 0, 0), 2, 1)
    
        
    # Calculate Frames per second (FPS)
    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
    # Display FPS on frame
    cv2.putText(thresh, "FPS : " + str(int(fps)), positions['fps'], cv2.FONT_HERSHEY_SIMPLEX, 0.65, (50, 170, 50), 2)
    cv2.putText(display, "FPS : " + str(int(fps)), positions['fps'], cv2.FONT_HERSHEY_SIMPLEX, 0.65, (50, 170, 50), 2)
    cv2.putText(display, "hand pose: {}".format(POSE), positions['hand_pose'], cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
    cv2.putText(gradient, "hand pose: {}".format(POSE), positions['hand_pose'], cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
    
    # Display result
    cv2.imshow("display", display)
    # Display diff
    cv2.imshow("diff", diff)
    # Display thresh
    cv2.imshow("thresh", thresh)
    
    try:
        # Display hand_crop
        cv2.imshow("hand_crop", hand_crop)
    except:
        pass
    
    
    k = cv2.waitKey(1) & 0xff
    
    if k == 27: break # ESC pressed then stop recording
    elif k == 114 or k == 82: 
        # Press R to restart the video and bounding box
        bg = frame.copy()
        bbox = bbox_initial
        tracking = -1
    elif k == 84:
        # T pressed
        # Initialize tracker in first frame and bounding box
        tracker = setup_tracker('MEDIANFLOW')
        tracking = tracker.init(frame, bbox)
    elif k == 115 or k == 83:
        # s pressed then save images to the folder
        img_count += 1
        DATA_FOLDER = os.path.join("DATA")
        fname = os.path.join(DATA, POSE, "{}_{}.jpg".format(POSE, img_count))
        status = cv2.imwrite(fname, hand_crop)
        if status is True:
            print("Image saved!")
        else:
            print("There was an error in saving the image")
    elif k != 255: print(k)
        
cv2.destroyAllWindows()
video.release()

In [8]:
cv2.destroyAllWindows()
video.release()