In [1]:
import mediapipe as mp
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

In [11]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic

In [12]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [13]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [13]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=1, circle_radius=1)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=1, circle_radius=1)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=1, circle_radius=1)
                             ) 

In [14]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh]), results.segmentation_mask

In [16]:
def feature_extractor_image_sequences(video_path, key_points_path):
    image_files = []
    for filename in os.listdir(video_path):
        if filename.endswith('.png'):
            image_files.append(os.path.join(video_path, filename))

    all_keypoints = []
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, static_image_mode=False,
    model_complexity=2) as holistic:
        # Loop through video
        for idx, frame in enumerate(tqdm(image_files)):
            # Read frame
            frame = cv2.imread(frame)
            new_height = 300
            # resize image
            frame = cv2.resize(frame, (frame.shape[1], new_height))
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            # Export coordinates
            try:
                keypoints = extract_keypoints(results)
                all_keypoints.append(keypoints)
                # Draw landmarks
                draw_styled_landmarks(image, results)
                image_path = os.path.join(video_path, f'tmp/annotated_image_{idx}.png')
                if not cv2.imwrite(image_path , image):
                    raise Exception("Could not write image")
            except:
                raise Exception(f"Could not extract keypoints for frame {idx}")

    # Convert to NumPy Array
    all_keypoints = np.array(all_keypoints)

    # save keypoints
    if not os.path.exists(key_points_path):
        os.makedirs(key_points_path)
    np.save(f'{key_points_path}/kp.npy', all_keypoints)

In [45]:
def feature_extractor_video(video_path, annotations_path):
    cap = cv2.VideoCapture(video_path)
    # extract video name
    video_name = video_path.split('/')[-1].split('.')[0]
    # create vid folder
    vid_folder = os.path.join(annotations_path, video_name)
    if not os.path.exists(vid_folder):
        os.makedirs(vid_folder)

    all_keypoints = []
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, static_image_mode=False,
    model_complexity=2, enable_segmentation=True) as holistic:
        idx = 0
        # Loop through video
        while cap.isOpened():
            # Read frame
            ret, frame = cap.read()
            if not ret:
                break
            
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            white_background = np.ones((720, 1280, 3), dtype=np.uint8) * 255
            pose = mp.solutions.drawing_utils.landmark_pb2.NormalizedLandmarkList()
            pose_kp = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]) if results.face_landmarks else np.zeros(468*3)
            # save face keypoints
            np.save(f'{vid_folder}/pkp.npy', pose_kp)

            # load face keypoints
            pose_kp = np.load(f'{vid_folder}/pkp.npy')

            for pkp in pose_kp:
                    pose.landmark.add(x=pkp[0], y=pkp[1], z=pkp[2], visibility=pkp[3])
            mp_drawing.draw_landmarks(white_background, pose , mp_holistic.POSE_CONNECTIONS, 
                            mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                            mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                            )
            
            cv2.imwrite('fntest.png', white_background)
            
            # Export coordinates
            try:
                keypoints, segmentation_mask = extract_keypoints(results)
                all_keypoints.append(keypoints)
                # save segmentation mask
                segmentation_mask_path = os.path.join(vid_folder, f'{idx}.png')
                # scale the pixels values to 0-255
                segmentation_mask = (segmentation_mask * 255).astype(np.uint8)
                if not cv2.imwrite(segmentation_mask_path , segmentation_mask):
                    raise Exception("Could not write image")
                
                # Draw landmarks
                draw_landmarks(image, results)
                # image_path = os.path.join(vid_folder, f'{idx}.png')
                # if not cv2.imwrite(image_path , image):
                #     raise Exception("Could not write image")
            except:
                raise Exception(f"Could not extract keypoints for frame {idx}")
            idx += 1

    # Convert to NumPy Array
    all_keypoints = np.array(all_keypoints)
    np.save(f'{vid_folder}/kp.npy', all_keypoints)

In [8]:
folder_path = "Dataset/PHOENIX-2014-T-release-v3/PHOENIX-2014-T/features/fullFrame-210x260px/train/01April_2010_Thursday_heute-6694"
key_points_path = "Dataset/PHOENIX-2014-T-release-v3/PHOENIX-2014-T/features/fullFrame-210x260px/train/01April_2010_Thursday_heute-6694/kp"

In [15]:
feature_extractor_image_sequences(folder_path, key_points_path)

100%|██████████| 53/53 [00:28<00:00,  1.83it/s]


In [46]:
vid_path = "F:/Datasets/raw_videos/-_3bUhnn4PU_13-8-rgb_front.mp4"
annotations_path = "F:/Datasets/annotations"
feature_extractor_video(vid_path, annotations_path)

Traceback (most recent call last):
  File "_pydevd_bundle/pydevd_cython.pyx", line 1134, in _pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch
  File "_pydevd_bundle/pydevd_cython.pyx", line 311, in _pydevd_bundle.pydevd_cython.PyDBFrame.do_wait_suspend
  File "C:\Users\HP\AppData\Roaming\Python\Python310\site-packages\debugpy\_vendored\pydevd\pydevd.py", line 2062, in do_wait_suspend
    keep_suspended = self._do_wait_suspend(thread, frame, event, arg, suspend_type, from_this_thread, frames_tracker)
  File "C:\Users\HP\AppData\Roaming\Python\Python310\site-packages\debugpy\_vendored\pydevd\pydevd.py", line 2098, in _do_wait_suspend
    time.sleep(0.01)
KeyboardInterrupt


KeyboardInterrupt: 

In [3]:
# read keypoint file
kp = np.load('kp.npy')

In [4]:
kp.shape

(294, 1662)

In [30]:
# white background 720 x 1280
white_background = np.ones((720, 1280, 3), dtype=np.uint8) * 255

In [37]:
pose = mp.solutions.drawing_utils.landmark_pb2.NormalizedLandmarkList()
face = mp.solutions.drawing_utils.landmark_pb2.NormalizedLandmarkList()
lh = mp.solutions.drawing_utils.landmark_pb2.NormalizedLandmarkList()
rh = mp.solutions.drawing_utils.landmark_pb2.NormalizedLandmarkList()

In [20]:
for frame in kp:
    pose_kp = frame[:33*4].reshape((33, 4))
    face_kp = frame[33*4:33*4+468*3].reshape((468, 3))
    lh_kp = frame[33*4+468*3:33*4+468*3+21*3].reshape((21, 3))
    rh_kp = frame[33*4+468*3+21*3:].reshape((21, 3))

    for pkp in pose_kp:
        pose.landmark.add(x=pkp[1], y=pkp[2], z=pkp[3], visibility=pkp[0])
    for fkp in face_kp:
        face.landmark.add(x=fkp[0], y=fkp[1], z=fkp[2])
    for lkp in lh_kp:
        lh.landmark.add(x=lkp[0], y=lkp[1], z=lkp[2])
    for rkp in rh_kp:
        rh.landmark.add(x=rkp[0], y=rkp[1], z=rkp[2])

In [28]:
#mp_drawing.draw_landmarks(white_background, pose, mp_holistic.POSE_CONNECTIONS) # Draw face connections
mp_drawing.draw_landmarks(white_background, face, mp_holistic.FACEMESH_TESSELATION, 
                            mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                            mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                            ) 
# mp_drawing.draw_landmarks(white_background, lh, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
# mp_drawing.draw_landmarks(white_background, rh, mp_holistic.HAND_CONNECTIONS) 

In [29]:
# save image
cv2.imwrite('test.png', white_background)

True