# 1. Import and install needed dependencies

In [None]:
%pip install --user tensorflow==2.6.0 tensorflow-gpu==2.6.0 opencv-python mediapipe sklearn matplotlib

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh]) #without face
    
def draw_styled_landmarks(image, results):
    
     # 轮廓线 Draw face connection
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
    #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness = 1, circle_radius = 1),
    #                          mp_drawing.DrawingSpec(color=(80,256,121), thickness = 1, circle_radius = 1))
    
     # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness = 2, circle_radius = 4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness = 2, circle_radius = 2)) 
    
    # draw left hand connections,
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness = 2, circle_radius = 4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness = 2, circle_radius = 2))
    
    # draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness = 2, circle_radius = 4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness = 2, circle_radius = 2))
    

# Vedio capture (Used for collecting sign language)

In [4]:
# 定义逐帧处理函数，可不进行任何处理，直接将摄像头捕获的画面写入视频帧 （代关键点的保存）
# 使用前置Webcam 收集数据集脚本，一次一条
output_name = 'record_video.mp4'

# 获取摄像头，传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(0)

# 打开cap
cap.open(0)

frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)

out = cv2.VideoWriter(output_name, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))

# 无限循环，直到break被触发
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # 获取画面
        success, frame = cap.read()
        if not success:
            break

        # Make detection
        image, results = mediapipe_detection(frame, holistic)
        
        # draw landmarks
        draw_styled_landmarks(image, results)
        
        ## 将帧写入视频文件中
        out.write(image)

        # 展示处理后的三通道图像
        cv2.imshow('press q to break', image)

        if cv2.waitKey(1) in [ord('q'),27]: # 按键盘上的q或esc退出（在英文输入法下）
            break

    # 关闭图像窗口
    cv2.destroyAllWindows()

    out.release()

    # 关闭摄像头
    cap.release()

    print('视频已保存', output_name)

视频已保存 record_video.mp4


In [9]:
# 调用摄像头拍摄视频模板 （无关键点保存）
# 生成的视频文件名默认为output_video.mp4，帧处理函数process_frame()默认不进行任何处理
# 同济子豪兄 2021-7-11

# 导入opencv-python
import cv2
import time

# 定义逐帧处理函数，可不进行任何处理，直接将摄像头捕获的画面写入视频帧
def process_frame(img):
    return img

output_name = 'record_video_1.mp4'

# 获取摄像头，传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(0)

# 打开cap
cap.open(0)

frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)

out = cv2.VideoWriter(output_name, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))

# 无限循环，直到break被触发
while cap.isOpened():
    # 获取画面
    success, frame = cap.read()
    if not success:
        break
    
    # 对捕获的帧进行图像处理
    frame = process_frame(frame)
    
    ## 将帧写入视频文件中
    out.write(frame)
    
    # 展示处理后的三通道图像
    cv2.imshow('press q to break', frame)

    if cv2.waitKey(1) in [ord('q'),27]: # 按键盘上的q或esc退出（在英文输入法下）
        break
    
# 关闭图像窗口
cv2.destroyAllWindows()

out.release()

# 关闭摄像头
cap.release()

print('视频已保存', output_name)

视频已保存 record_video_1.mp4


# 4. Load the video that without landmarks and processing

In [7]:
import os
# 获取视频数据集中每条视频帧的数量，i.e.,no_fps 并保存到 fps_list
fps_list = []
for root, dirs, files in os.walk(r"C:\deep-learning\HKMU\Extrat_keypoints\data\WLASL_train\book"):  # 这里就填文件夹目录就可以了
    for file in files:
        # 获取文件路径
        if ('.mp4' in file):
            path = os.path.join(root, file)
            video = cv2.VideoCapture(path)
            no_fps = video.get(7)
            video_fps = int(video.get(cv2.CAP_PROP_FPS))
            fps_list.append(no_fps)
    print(fps_list)

[58.0, 31.0, 73.0, 18.0, 18.0, 28.0, 33.0, 69.0, 90.0, 99.0, 79.0, 93.0, 104.0, 75.0, 82.0, 98.0, 80.0, 93.0, 100.0, 80.0, 87.0, 77.0, 95.0, 75.0, 78.0, 85.0, 88.0, 86.0, 83.0, 63.0, 124.0, 74.0, 64.0, 33.0, 47.0, 96.0, 124.0, 64.0, 100.0, 121.0]


In [9]:
wlasl = ['book', 'drink', 'computer', 'before', 'chair', 'go', 'clothes', 'who', 'candy', 'cousin', 'deaf', 'fine', 'help', 'no', 'thin', 'walk', 'year', 'yes', 'all', 'black', 'cool', 'finish', 'hot', 'like', 'many', 'mother', 'now', 'orange', 'table', 'thanksgiving', 'what', 'woman', 'bed', 'blue', 'bowling', 'can', 'dog', 'f', 'white', 'wrong', 'accident', 'apple', 'bird', 'change', 'color', 'corn', 'cow', 'dance', 'dark', 'doctor', 'eat', 'enjoy', 'forget', 'give', 'last', 'meet', 'pink', 'pizza', 'play', 'school', 'secretary', 'short', 'time', 'want', 'work', 'africa', 'basketball', 'birthday', 'brown', 'but', 'cheat', 'city', 'cook', 'decide', 'full', 'how', 'jacket', 'letter', 'medicine', 'need', 'paint', 'paper', 'pull', 'purple', 'right', 'same', 'son', 'tell', 'thursday']
wlasl_10 = ['book', 'drink', 'computer', 'before', 'chair', 'go', 'clothes', 'who', 'candy', 'cousin']#1-10
wlasl_40 = ['deaf', 'fine', 'help', 'no', 'thin', 'walk', 'year', 'yes', 'all', 'black', 'cool', 'finish', 'hot', 'like', 'many', 'mother', 'now', 'orange', 'table', 'thanksgiving', 'what', 'woman', 'bed', 'blue', 'bowling', 'can', 'dog', 'f', 'white', 'wrong', 'accident', 'apple', 'bird', 'change', 'color', 'corn', 'cow', 'dance', 'dark', 'doctor']# 11-50
wlasl_40.sort()
print(wlasl_40)
print(len(wlasl_40))

['accident', 'all', 'apple', 'bed', 'bird', 'black', 'blue', 'bowling', 'can', 'change', 'color', 'cool', 'corn', 'cow', 'dance', 'dark', 'deaf', 'doctor', 'dog', 'f', 'fine', 'finish', 'help', 'hot', 'like', 'many', 'mother', 'no', 'now', 'orange', 'table', 'thanksgiving', 'thin', 'walk', 'what', 'white', 'woman', 'wrong', 'year', 'yes']
40


In [33]:
# Path for exported data, numpy arrays 制作关键点数据集文件夹
DATA_PATH = os.path.join('test') 

# Actions that we try to detect 只需要在这里加action即可 参考路径 data\WLASL_train下的文件名
actions = np.array(wlasl_10)

# Thirty videos worth of data
no_sequences = 40

# Videos are going to be 30 frames in length
# sequence_length = 30

# Folder start
# start_folder = 30

In [34]:
# 制作对应手语单词的视频数量的文件夹，index 0 ~ （len - 1）
for action in actions:
    for root, dirs, files in os.walk(r"C:\deep-learning\HKMU\fyp_LSTM\ActionDetectionforSignLanguage\data\test\{}".format(action)):
        for sequence in range (len(files)):
            try:
                os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
            except:
                pass

In [None]:

# 获取视频数据集中每条视频帧的数量，i.e.,no_fps 并保存到 fps_list
for action in actions:
    fps_list = []
    for root, dirs, files in os.walk(r"C:\deep-learning\HKMU\fyp_LSTM\ActionDetectionforSignLanguage\data\test\{}".format(action)):  # 这里就填文件夹目录就可以了
        for file in files:
            # 获取文件路径
            if ('.mp4' in file):
                path = os.path.join(root, file)
                video = cv2.VideoCapture(path)
                no_fps = video.get(7)
                # video_fps = int(video.get(cv2.CAP_PROP_FPS))
                fps_list.append(no_fps)
        print(action, "'s # of videos: ", len(files)) # 把这个数 可以作为no_sequences 视频数量 ！！注意必须与此Cell中#2 for 循环一起组合使用，否则len（files）数量不正确。原因是得不到正确的遍历，值只为最后一个动作的文件数总和
        print("The frames that each video contains: ", fps_list,'\n')

In [None]:
print(str(files.index(file)))
print(sequence)

In [None]:
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for root, dirs, files in os.walk(r"C:\deep-learning\HKMU\fyp_LSTM\ActionDetectionforSignLanguage\data\test\{}".format(action)):  # 这里就填文件夹目录就可以了
            for file in files:
                print()
                if ('.mp4' in file):
                    path = os.path.join(root, file)
                    cap = cv2.VideoCapture(path)
                    no_fps = int(cap.get(7))
                    #print(action, files.index(file), no_fps)
                    for frame_num in range(no_fps):
                        
                        ret, frame = cap.read()
                        image, results = mediapipe_detection(frame, holistic)
                        draw_styled_landmarks(image, results)
                        keypoints = extract_keypoints(results)
                        npy_path = os.path.join(DATA_PATH, action, str(files.index(file)), str(frame_num))
                        np.save(npy_path, keypoints)
                        if files.index(file) == len(files):
                            break

    cap.release()
    cv2.destroyAllWindows()

In [4]:
def create_keypoints_data_folders(data_path, actions):
    for action in actions:
        action_path = os.path.join(data_path, action)
        if not os.path.exists(action_path):
            os.makedirs(action_path)
        for root, dirs, files in os.walk(os.path.join(data_path, action)):
            for sequence in range(len(files)):
                sequence_path = os.path.join(action_path, str(sequence))
                if not os.path.exists(sequence_path):
                    os.makedirs(sequence_path)
                    
def process_videos(base_path, actions, data_path):
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for action in actions:
            for root, dirs, files in os.walk(os.path.join(base_path, action)):
                for file in files:
                    if '.mp4' in file:
                        path = os.path.join(root, file)
                        cap = cv2.VideoCapture(path)
                        no_fps = int(cap.get(7))

                        for frame_num in range(no_fps):
                            ret, frame = cap.read()
                            image, results = mediapipe_detection(frame, holistic)
                            draw_styled_landmarks(image, results)
                            keypoints = extract_keypoints(results)
                            npy_path = os.path.join(data_path, action, str(files.index(file)), str(frame_num))
                            
                            os.makedirs(os.path.dirname(npy_path), exist_ok=True)
                            np.save(npy_path, keypoints)
                            if files.index(file) == len(files):
                                break

                        cap.release()
                        cv2.destroyAllWindows()

In [5]:
# Set paths for raw datasets
raw_train_path = "../rawdata/train"
raw_val_path = "../rawdata/val"
raw_test_path = "../rawdata/test"

In [6]:
# Set paths for keypoints datasets
DATA_PATH_TRAIN = "keypoints/train"
DATA_PATH_VAL = "keypoints/val"
DATA_PATH_TEST = "keypoints/test"

In [14]:
# Create folders for keypoints data
create_keypoints_data_folders(DATA_PATH_TRAIN, wlasl_40)
create_keypoints_data_folders(DATA_PATH_VAL, wlasl_40)
create_keypoints_data_folders(DATA_PATH_TEST, wlasl_40)

In [7]:
# Process train, validation, and test videos
process_videos(raw_train_path, wlasl_40, DATA_PATH_TRAIN)
process_videos(raw_val_path, wlasl_40, DATA_PATH_VAL)
process_videos(raw_test_path, wlasl_40, DATA_PATH_TEST)