# Dataset Generation for Model Training

First, we add all the useful dependencies for image collection and processing.  
Be sure to be in the root directory of the project.

In [1]:
import cv2
import time
import os
import uuid
from IPython.display import clear_output
import random

In [2]:
imagePath = 'Tensorflow/Workspace/images/capturedimages'

Now we define the list of gesture classes _labels_, which will be used to generate data for the model to train onto.  
The _nImages_ variable specifies how many images of a single gesture shall be taken: the higher, the better variance you'll get in the final dataset. However, be warned that all the images for the dataset must be labeled manually, so choose wisely. We will exploit data augmentation to generate a bigger dataset with more variance, hopefully improving the final model's generalization capabilities.  

In [9]:
labels = ['LeftL'] #, 'LeftA', 'LeftO', 'LeftV', 'LeftOpenHand', 'RIndexUp', 'RIndexDown', 'RIndexLeft', 'RIndexRight', 'RIndexFront', 'RThumbBack', 'RClearC']
nImages = 1

We define some data aumentation functions to produce multiple output images from a single input.

In [20]:
def vertical_shift(input_img, ratio=0.2):
    if ratio > 1 or ratio < 0:
        print('Value should be less than 1 and greater than 0')
        return input_img
    img = input_img.copy()
    ratio = random.uniform(-ratio, ratio)
    h, w = img.shape[:2]
    to_shift = h*ratio
    if ratio > 0:
        img = img[:int(h-to_shift), :, :]
    if ratio < 0:
        img = img[int(-1*to_shift):, :, :]
    img = cv2.resize(img, (w, h), cv2.INTER_CUBIC)
    return img

def horizontal_shift(input_img, ratio=0.2):
    if ratio > 1 or ratio < 0:
        print('Value should be less than 1 and greater than 0')
        return input_img
    img = input_img.copy()
    ratio = random.uniform(-ratio, ratio)
    h, w = img.shape[:2]
    to_shift = w*ratio
    if ratio > 0:
        img = img[:, :int(w-to_shift), :]
    if ratio < 0:
        img = img[:, int(-1*to_shift):, :]
    img = cv2.resize(img, (w, h), cv2.INTER_CUBIC)
    return img

def zoom(input_img, value=0.85):
    if value > 1 or value < 0:
        print('Value for zoom should be less than 1 and greater than 0')
        return input_img
    img = input_img.copy()
    value = random.uniform(value, 1)
    h, w = img.shape[:2]
    h_taken = int(value*h)
    w_taken = int(value*w)
    h_start = random.randint(0, h-h_taken)
    w_start = random.randint(0, w-w_taken)
    img = img[h_start:h_start+h_taken, w_start:w_start+w_taken, :]
    img = cv2.resize(img, (w, h), cv2.INTER_CUBIC)
    return img

The following code box will warn you about which gesture must be performed. The delay between each frame capture is of 2 seconds, but can be freely adjusted to whatever you need.  

In [19]:
frame_capture_delay = 2 #seconds
for label in labels:
    !mkdir {'Tensorflow\Workspace\images\capturedimages\\' + label}
    clear_output(wait=True)
    print('Collecting images for {}'.format(label))
    cap = cv2.VideoCapture(0)
    input()
    time.sleep(5)
    for imageNumber in range(nImages):
        ret, frame = cap.read()
        tmpUUID = format(str(uuid.uuid1()))
        imageName = os.path.join(imagePath, label, label+'_'+f'{tmpUUID}.jpg')
        cv2.imwrite(imageName, frame)
        print('Collected')
        cv2.imshow('frame', frame)
        cv2.imwrite(os.path.join(imagePath, label, label+'_vshift_'+f'{tmpUUID}.jpg'),
                    vertical_shift(input_img=frame))
        cv2.imwrite(os.path.join(imagePath, label, label+'_hshift_'+f'{tmpUUID}.jpg'),
                    horizontal_shift(input_img=frame))
        cv2.imwrite(os.path.join(imagePath, label, label+'_zoom_'+f'{tmpUUID}.jpg'),
                    zoom(input_img=frame))
        time.sleep(frame_capture_delay)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cv2.destroyWindow('frame')
            break
    cap.release()
cv2.destroyWindow('frame')

Collecting images for LeftL
Collected


At the end of this step, go into your _capturedimages_ directory.  
Move all the images in the automatically generated sub-directories of each class together into the _capturedimages_ directory: this will simplify the labeling procedure later.