In [42]:
import os
import shutil
import time

import cv2
import numpy as np

#### Setting up for data collection

Reference for how to do the sign language on [youtube](https://www.youtube.com/watch?v=0FcwzMq4iWg)

In [43]:
# path for saving the dataset
DATASET_PATH = os.path.join("../storage/datasets/raw")

# actions to be detected
ACTIONS = [
    "_", "hello", "thanks", "i-love-you", "I", "Yes", "No", "Help", "Please",
    "Want", "Eat", "More", "Bathroom", "Learn", "Sign",
]

# limit to first x actions for testing
ACTIONS = ACTIONS[4:8]

# number of videos and actions per video
videos_per_label = 60
frames_per_video = 60

In [44]:
# create dataset directories if they do not exist
try:
    try:
        shutil.rmtree(DATASET_PATH)
    except:
        pass

    os.makedirs(DATASET_PATH)
except FileExistsError:
    print("Dataset folder exists, skipping creation.")

# create directories for each action
for action in ACTIONS:
    try:
        os.makedirs(os.path.join(DATASET_PATH, action))
        print(f"[CREATED] {action}")
    except FileExistsError:
        print(f"[SKIPPED] {action}")

[CREATED] I
[CREATED] Yes
[CREATED] No
[CREATED] Help


##### Saving the landmarker data

In [45]:
def video_path(action, sequence):
    return os.path.join(DATASET_PATH, action, f"{sequence}.avi")

#### Recording the dataset

In [46]:
def display_starting_text(
    img,
):
    cv2.putText(
        img,
        "STARTING COLLECTION",
        (120, 200),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        4,
        cv2.LINE_AA,
    )


def display_collecting_text(img, act, seq, pos=(15, 12)):
    cv2.putText(
        img,
        f"Collecting frames for {act} Video Number {seq}",
        pos,
        cv2.FONT_HERSHEY_SIMPLEX,
        0.5,
        (0, 0, 255),
        1,
        cv2.LINE_AA,
    )

In [47]:
# capture video from webcam
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
cap.set(cv2.CAP_PROP_FPS, 60)

start_time = time.time()
frames = 0
is_quit = False

# define the codec for VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*"XVID")

NOTE: It's take total of 14 minutes or more to capture 60 video for all 4 action/labels

In [48]:
# while the video capture is opened (i.e., the camera is functioning)
while cap.isOpened():
    # loop through each action in the predefined ACTIONS list
    for action in ACTIONS:
        # loop through the number of video sequences per label
        for sequence in range(videos_per_label):
            # initialize VideoWriter for each video sequence
            out = cv2.VideoWriter(
                video_path(action, sequence),  # path for saving the video
                fourcc,                        # codec used for compression
                60.0,                          # frames per second
                (640, 480)                     # frame/image size (width, height)
            )

            # loop through each frame in the sequence
            for action_length in range(frames_per_video + 1):
                success, frame = cap.read()
                # create a black image for pauses or displaying text
                pause_image = np.zeros((480, 640, 3), dtype=np.uint8)

                # if frame capture fails, ignore and continue
                if not success:
                    print("Ignoring empty camera frame")
                    continue

                image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                # for the first frame (per each sequence), display starting and collecting text
                # this gives time for the user to adjust to the next sign language action
                if action_length == 0:
                    display_starting_text(pause_image)
                    display_collecting_text(pause_image, action, sequence)
                    cv2.imshow("Detecting Sign Language", pause_image)
                    cv2.waitKey(1500)  # wait for 1.5 seconds to give time for adjustment

                # if frame reaches 60 (59 since the frame started from 0), break out of the loop
                # NOTE: this might needed since using the last code
                #       it only gives us only 58 frame also adding + 1
                #       in frames_per_video loop is crucial
                elif action_length == 60:
                    out.write(frame)
                    break

                # for other frames, display collecting text and show the frame
                else:
                    display_collecting_text(image_rgb, action, sequence)
                    cv2.imshow("Detecting Sign Language", cv2.cvtColor(image_rgb, cv2.COLOR_BGR2RGB))
                    # write the frame to the video file
                    out.write(frame)

                # break the loop if 'q' key is pressed
                if cv2.waitKey(10) & 0xFF == ord("q"):
                    is_quit = True
                    break

            # break out of the action sequence loop
            if is_quit:
                break

        # break out of the action loop
        if is_quit:
            break

    # release the opencv related video object
    cap.release()
    out.release()
    cv2.destroyAllWindows()


In [225]:
cap.release()
out.release()
cv2.destroyAllWindows()