### 1. Import Dependencies

In [1]:
# Import opencv
import cv2 

# Import uuid
import uuid

# Import Operating System
import os

# Import time
import time

# Import Numpy
import numpy as np


### 2. Define Images to Collect

In [2]:
# Create a list of labels for the different hand gestures
labels = ['thumbsup', 'thumbsdown', 'thankyou', 'livelong']

# Specify the number of images to capture for each hand gesture
number_imgs = 5

### 3. Setup Folders

In [3]:
# Specify the path to the directory where the collected images will be stored
IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images', 'collectedimages')

In [4]:
# Check if the directory specified by IMAGES_PATH exists
if not os.path.exists(IMAGES_PATH):
    # If it does not exist, create it based on the operating system
    if os.name == 'posix':  # Linux or Mac
        !mkdir -p {IMAGES_PATH}  # Use the 'mkdir' command with the '-p' flag to create the directory and any missing parent directories
    if os.name == 'nt':  # Windows
         !mkdir {IMAGES_PATH}  # Use the 'mkdir' command to create the directory

# Loop over the labels in the list 'labels'
for label in labels:
    # Create a path by joining the IMAGES_PATH and the current label
    path = os.path.join(IMAGES_PATH, label)
    # If the directory specified by the path does not exist, create it using the 'mkdir' command
    if not os.path.exists(path):
        !mkdir {path}

### 4. Capture Images

#### 4.1.0 Display the camera (testing)

In [5]:
# Set the desired frame width and height
frameWidth = 1024
frameHeight = 576


# Initialize a video capture object with the default camera (index 0)
cap = cv2.VideoCapture(0)

# Check if the camera was opened successfully
if not cap.isOpened():
    print("Unable to open camera")
    exit()
    
# Sets the capture resolution 
cap.set(cv2.CAP_PROP_FPS, 60)

# Loop over the frames captured from the video capture object
while True:
    # Read a single frame from the video capture object
    ret, img = cap.read()
    
    # Check if the frame was read successfully
    if not ret:
        print("Failed to read frame")
        break
    
    img = cv2.resize(img, (frameWidth, frameHeight))
    # Display the resulting frame in a window with the title "Result"
    cv2.imshow("Result", img)
    
    # Wait for a key press event for 125s
    key = cv2.waitKey(125)
    
    
    # Exit the loop if the 'q' key or the 'Esc' key is pressed
    if key == ord('q') or key == 27:  # 27 is the ASCII code for the 'Esc' key
        break

# Release the video capture object
cap.release()

# Close all windows opened by OpenCV
cv2.destroyAllWindows()


#### 4.1.1 Manual capture image

In [6]:
# Set the desired frame width and height
frameWidth = 1024
frameHeight = 576

# Define the starting point and size of the crop
x = int((frameWidth - frameHeight) / 2)  # starting x-coordinate
y = int((frameHeight - frameHeight) / 2)  # starting y-coordinate
w = h = frameHeight  # width and height of the crop

# Set the desired image width and height
imgWidth = 320
imgHeight = 320

# set timer countdown
TIMER = int(4)

# Initialize a video capture object with the default camera (index 0)
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Failed to open the camera.")
    exit()
    
# Sets the capture resolution 
cap.set(cv2.CAP_PROP_FPS, 60)

# Loop over the frames captured from the video capture object
while True:
    # Read a single frame from the video capture object
    ret, img = cap.read()
    img = cv2.resize(img, (frameWidth, frameHeight))
    # If the frame is None, exit the loop
    if not ret:
        print("Failed to retrieve a frame from the camera.")
        break

    # Crop the image
    crop_img = img[y:y+h, x:x+w]
    
    # Display the resulting frame in a window with the title "Result"
    cv2.imshow("Result", crop_img)

    # Wait for a key press event for 125ms
    key = cv2.waitKey(125)

    # If the 'r' key is pressed, capture and save the current frame
    if key == ord('r'):
        prev = time.time()
        while TIMER >= 0:
            ret, img = cap.read()
            img = cv2.resize(img, (frameWidth, frameHeight))
            # Crop the image
            crop_img = img[y:y+h, x:x+w]

            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(crop_img, str(TIMER), 
                        (240, 300), font,
                        2, (255, 255, 255),
                        3, cv2.LINE_AA)
            cv2.imshow('Result', crop_img)
            
            # Check for key presses
            key = cv2.waitKey(125)
            if key == ord('q') or key == 27:
                break
            elif key == ord('r'):
                # Restart the countdown if 'r' key is pressed
                TIMER = int(3)
                prev = time.time()

            # current time
            cur = time.time()

            # Update and keep track of Countdown
            # if time elapsed is one second 
            # then decrease the counter
            if cur-prev >= 1:
                prev = cur
                TIMER = TIMER-1
        else:
            ret, img = cap.read()
            img = cv2.resize(img, (frameWidth, frameHeight))
            # Crop the image
            crop_img = img[y:y+h, x:x+w]
            
            cv2.imshow('Result', crop_img)
            # time for which image displayed
            cv2.waitKey(2000)

            # Prompt the user to enter the label
            while True:
                label = input("Enter label or folder: ")
                if len(label.strip()) > 0:
                    break
                else:
                    print("Invalid label name. Please try again.")

            # Generate unique image name using uuid
            imgname = os.path.join(IMAGES_PATH, label, label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
            
            try:
                # Save the current frame to the specified filename and path
                # Resize the frame to the desired width and height
                saved_img = cv2.resize(crop_img, (imgWidth, imgHeight))
                cv2.imwrite(imgname, saved_img)
                print(f"Image saved as {imgname}")
            except Exception as e:
                print(f"Failed to save the image: {str(e)}")
        
        # If the 'q' key or the 'esc' key is pressed, exit the loop
        if key == ord('q') or key == 27:
            break
    # If the 'q' key or the 'esc' key is pressed, exit the loop
    if key == ord('q') or key == 27:
        break
# Release the video capture object
cap.release()

# Close all windows opened by OpenCV
cv2.destroyAllWindows()


#### 4.2 Capture the images

In [6]:
# Set frame dimensions
frameWidth = 1024
frameHeight = 576

# Define the starting point and size of the crop
x = int((frameWidth - frameHeight) / 2)  # starting x-coordinate
y = int((frameHeight - frameHeight) / 2)  # starting y-coordinate
w = h = frameHeight  # width and height of the crop

# Set the desired image width and height
imgWidth = 320
imgHeight = 320

# Initialize a video capture object with the default camera (index 0)
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Failed to open the camera.")
    exit()
    
# Sets the capture resolution 
cap.set(cv2.CAP_PROP_FPS, 60)

# Loop over the frames captured from the video capture object
while True:
    # Read a single frame from the video capture object
    ret, img = cap.read()
    img = cv2.resize(img, (frameWidth, frameHeight))
    # If the frame is None, exit the loop
    if not ret:
        print("Failed to retrieve a frame from the camera.")
        break
    
    # Crop the image
    crop_img = img[y:y+h, x:x+w]

    # Display the resulting frame in a window with the title "Frame"
    cv2.imshow('Frame', crop_img)

    # Wait for a key press event for 125s
    key = cv2.waitKey(125)
    
    # Loop over each label
    for label in labels:
        print('Collecting images for {}'.format(label))
        
        # Prompt user to start image collection
        print("Press 'r' to start image collection for {}".format(label))
        ready = False
        while not ready:
            # Read frame from camera 
            ret, img = cap.read()
            img = cv2.resize(img, (frameWidth, frameHeight))
            if not ret:
                print("Cannot capture frame")
                continue
            
            # Crop the image
            crop_img = img[y:y+h, x:x+w]
            
            # Display the current frame with the prompt
            text = "Press 'r' to start image collection for {}".format(label)
            textSize, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            l = int((frameHeight - textSize[0]) / 2)
            t = (frameHeight - textSize[1])
            cv2.putText(crop_img, text, (l, t), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            
            cv2.imshow('Frame', crop_img)
            
            key = cv2.waitKey(1)
            if key == ord('r'):
                ready = True
                prev = time.time()
            elif key == ord('q') or key == 27:
                break
        if key == ord('q') or key == 27:
            break
        try:
            # Loop over number of images to collect
            for imgnum in range(number_imgs):
                print('Collecting image number {}'.format(imgnum+1))
                
                #set timer countdown
                TIMER = int(3)
                
                while TIMER >= 0:
                    ret, img = cap.read()
                    img = cv2.resize(img, (frameWidth, frameHeight))
                    # Crop the image
                    crop_img = img[y:y+h, x:x+w]

                    font = cv2.FONT_HERSHEY_DUPLEX
                    cv2.putText(crop_img, str(TIMER), 
                                (240, 300), font,
                                4, (255, 255, 255),
                                4, cv2.LINE_AA)
                    cv2.imshow('Frame', crop_img)

                    # Check for key presses
                    key = cv2.waitKey(125)
                    if key == ord('q') or key == 27:
                        break
                    elif key == ord('r'):
                        # Restart the countdown if 'r' key is pressed
                        TIMER = int(3)
                        prev = time.time()
                        
                    # current time
                    cur = time.time()

                    # Update and keep track of Countdown
                    # if time elapsed is one second 
                    # then decrease the counter
                    if cur-prev >= 1:
                        prev = cur
                        TIMER = TIMER-1
                        
                # Check if the countdown loop was exited due to a key press
                if key == ord('q') or key == 27:
                    break
                else:
                    ret, img = cap.read()
                    img = cv2.resize(img, (frameWidth, frameHeight))
                    # Crop the image
                    crop_img = img[y:y+h, x:x+w]
                    cv2.imshow('Frame', crop_img)
                    # time for which image displayed
                    cv2.waitKey(2000)

                    # Generate unique image name using uuid
                    imgname = os.path.join(IMAGES_PATH, label, label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
                    
                    try:
                        # Save the current frame to the specified filename and path
                        # Resize the frame to the desired width and height
                        saved_img = cv2.resize(crop_img, (imgWidth, imgHeight))
                        cv2.imwrite(imgname, saved_img)
                        print(f"Image saved as {imgname}")
                    except Exception as e:
                        print(f"Failed to save the image: {str(e)}")
                key = cv2.waitKey(1)
                if key == ord('q') or key == 27:
                    break
            if key == ord('q') or key == 27:
                break
        except Exception as e:
            # Catch any exceptions that occur during testing and print the error message
            print("Error:", e)
            
        if key == ord('q') or key == 27:
            break
    break
# Release the VideoCapture object and close all windows
cap.release()
cv2.destroyAllWindows()

Collecting images for thumbsup
Press 'r' to start image collection for thumbsup
Collecting image number 1
Image saved as Tensorflow\workspace\images\collectedimages\thumbsup\thumbsup.f960a0c4-c620-11ed-a739-44af2861d338.jpg
Collecting image number 2
Image saved as Tensorflow\workspace\images\collectedimages\thumbsup\thumbsup.fc766b04-c620-11ed-b72d-44af2861d338.jpg
Collecting image number 3
Image saved as Tensorflow\workspace\images\collectedimages\thumbsup\thumbsup.ff8c5a87-c620-11ed-b752-44af2861d338.jpg
Collecting image number 4
Image saved as Tensorflow\workspace\images\collectedimages\thumbsup\thumbsup.02a22dec-c621-11ed-b6bd-44af2861d338.jpg
Collecting image number 5
Image saved as Tensorflow\workspace\images\collectedimages\thumbsup\thumbsup.05b86a96-c621-11ed-bcb3-44af2861d338.jpg
Collecting images for thumbsdown
Press 'r' to start image collection for thumbsdown
Collecting image number 1
Image saved as Tensorflow\workspace\images\collectedimages\thumbsdown\thumbsdown.0b63f3ef-

### 5. Image Labelling using labelImg

In [6]:
LABELIMG_PATH = os.path.join('Tensorflow', 'labelimg')

In [7]:
if not os.path.exists(LABELIMG_PATH):
    !mkdir {LABELIMG_PATH}
    !git clone https://github.com/tzutalin/labelImg {LABELIMG_PATH}

In [8]:
if os.name == 'posix':
    !cd {LABELIMG_PATH} && make qt5py3
if os.name =='nt':
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [9]:
#Opens label image package
!cd {LABELIMG_PATH} && python labelImg.py