In [1]:
# loading required libraries
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

import warnings
warnings.filterwarnings('ignore')

In [2]:
# load holistic model
mp_holistic = mp.solutions.holistic
# we also want to draw the located coords
mp_drawing = mp.solutions.drawing_utils

In [3]:
# define our custom function
def mediapipe_detection(frame, model):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    frame.flags.writeable = False                  # Image is no longer writeable
    results = model.process(frame)                 # Make prediction
    frame.flags.writeable = True                   # Image is now writeable 
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return(frame, results)

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [5]:
cap  = cv2.VideoCapture(0)

# lets set our holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
    while(cap.isOpened):
        ret, frame = cap.read()

        if ret == True:
            
            # detection on video
            frame, results = mediapipe_detection(frame, holistic_model)
            draw_landmarks(frame, results)
            
            cv2.imshow("Frame", frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [6]:
len(results.pose_landmarks.landmark)

33

## Extracting key points

In [7]:
# face = 468 values
# left, right hand = 21 values
# face = 33 values

In [8]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    res = np.concatenate([pose, face, left_hand, right_hand])
    
    return(res)

In [9]:
extract_keypoints(results)[:-10]

array([ 0.53896797,  0.47898901, -0.73038661, ...,  0.        ,
        0.        ,  0.        ])

In [10]:
# remember that each frame should now return 1662 values 

## Collecting Key values from each video

In [11]:
frames = []

for i in os.listdir('Dataset/drink/'):
    data = cv2.VideoCapture(os.path.join('Dataset/drink/', i))
    
    frames.append(data.get(cv2.CAP_PROP_FRAME_COUNT))

np.mean(frames)

# so in average each video is approx 65 frames,
# we will get 30 frames per video

64.53333333333333

In [12]:
# we are collecting 30 frames (aka np.array) from each video regardless of video length

# how many frames we want to collect from each video
sequence_length = 30

# for example, we have 6 seconds video @25fps = 150frames
# so to get 30 frames out of this at equal lengths
# 150/30 = 5, i.e. we have to skip every 5 frames

In [47]:
actions = ['go', 'thin', 'drink']

os.mkdir('Extracted')

for i in actions:
    os.mkdir(os.path.join('Extracted', i))
    
for gloss in os.listdir('Dataset'):
    for each_vid in os.listdir(os.path.join('Dataset', gloss)):
        filename = os.path.splitext(each_vid)[0]
        os.mkdir(os.path.join('Extracted', gloss, filename))

In [48]:
for gloss in os.listdir('Dataset/'):
    for each_vid in os.listdir(os.path.join('Dataset', gloss)):
        
        vid_path = os.path.join('Dataset', gloss, each_vid) 
        filename = os.path.splitext(each_vid)[0]
        
        video_reader = cv2.VideoCapture(vid_path)

        # get total frames of current video
        video_frames_count = video_reader.get(cv2.CAP_PROP_FRAME_COUNT)

        # calculate the interval after which frames will be considered
        skip_frames_window = max(int(video_frames_count/sequence_length), 1)

        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
            for frame_counter in range(sequence_length):

                # set the current frame position of the video
                video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter*skip_frames_window)

                ret, frame = video_reader.read()
                
                if ret == True:
                    frame, results = mediapipe_detection(frame, holistic_model)
                    draw_landmarks(frame, results)

                    if ret != True:
                        break

                    # export the extracted key points
                    keypoints = extract_keypoints(results) 
                    saving_path = os.path.join('Extracted', gloss, filename, str(frame_counter))
                    np.save(saving_path, keypoints)

#                 plt.imshow(frame)
#                 plt.title(frame_counter)
#                 plt.show()

            video_reader.release()

        print("{} - Extracted Successfully".format(vid_path))

Dataset\drink\17709.mp4 - Extracted Successfully
Dataset\drink\17710.mp4 - Extracted Successfully
Dataset\drink\17711.mp4 - Extracted Successfully
Dataset\drink\17712.mp4 - Extracted Successfully
Dataset\drink\17713.mp4 - Extracted Successfully
Dataset\drink\17720.mp4 - Extracted Successfully
Dataset\drink\17721.mp4 - Extracted Successfully
Dataset\drink\17722.mp4 - Extracted Successfully
Dataset\drink\17723.mp4 - Extracted Successfully
Dataset\drink\17724.mp4 - Extracted Successfully
Dataset\drink\17733.mp4 - Extracted Successfully
Dataset\drink\17734.mp4 - Extracted Successfully
Dataset\drink\65539.mp4 - Extracted Successfully
Dataset\drink\65540.mp4 - Extracted Successfully
Dataset\drink\69302.mp4 - Extracted Successfully
Dataset\go\24940.mp4 - Extracted Successfully
Dataset\go\24941.mp4 - Extracted Successfully
Dataset\go\24943.mp4 - Extracted Successfully
Dataset\go\24946.mp4 - Extracted Successfully
Dataset\go\24947.mp4 - Extracted Successfully
Dataset\go\24952.mp4 - Extracted Su

In [58]:
# checking if all videos has been extracted

count = 0

for i in os.listdir('Extracted'):
    for j in os.listdir(os.path.join('Extracted', i)):
        for k in os.listdir(os.path.join('Extracted', i, j)):
            count += 1
#         print('{} - {}'.format(k, count))
print(count)

1320
