In [31]:
# loading required libraries
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

import warnings
warnings.filterwarnings('ignore')

In [32]:
# load holistic model
mp_holistic = mp.solutions.holistic
# we also want to draw the located coords
mp_drawing = mp.solutions.drawing_utils

In [33]:
# define our custom function
def mediapipe_detection(frame, model):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    frame.flags.writeable = False                  # Image is no longer writeable
    results = model.process(frame)                 # Make prediction
    frame.flags.writeable = True                   # Image is now writeable 
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return(frame, results)

In [34]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [60]:
cap  = cv2.VideoCapture(0)

# lets set our holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
    while(cap.isOpened):
        ret, frame = cap.read()

        if ret == True:
            
            # detection on video
            frame, results = mediapipe_detection(frame, holistic_model)
            draw_landmarks(frame, results)
            
            cv2.imshow("Frame", frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [10]:
len(results.pose_landmarks.landmark)

33

## Data Augmentation 
Like flipping the video vertically

In [58]:
def augment_video(file_path):

    dst_path = os.path.splitext(file_path)[0] + '_flipped.mp4'
    
    cap = cv2.VideoCapture(file_path)

    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    fps = cap.get(cv2.CAP_PROP_FPS)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    out = cv2.VideoWriter(dst_path, fourcc, fps, size)

    while(cap.isOpened):
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

        if ret != True:
            break

        out.write(frame)
        #cv2.imshow('frame', frame)

        if cv2.waitKey(33) & 0xFF == ord('q'):
            break

    out.release()
    cap.release()
    cv2.destroyAllWindows()

In [61]:
#augment_video('Dataset/drink/17709.mp4')

In [71]:
# now we will create data augmentation for each video in each gloss
for each_gloss in os.listdir('Dataset/'):
    for each_vid in os.listdir(os.path.join('Dataset', each_gloss)):
        vid_path = os.path.join('Dataset', each_gloss, each_vid)
        augment_video(vid_path)
        print('{} - Augmented Successfully'.format(vid_path))

Dataset\drink\17709.mp4 - Augmented Successfully
Dataset\drink\17710.mp4 - Augmented Successfully
Dataset\drink\17711.mp4 - Augmented Successfully
Dataset\drink\17712.mp4 - Augmented Successfully
Dataset\drink\17713.mp4 - Augmented Successfully
Dataset\drink\17720.mp4 - Augmented Successfully
Dataset\drink\17721.mp4 - Augmented Successfully
Dataset\drink\17722.mp4 - Augmented Successfully
Dataset\drink\17723.mp4 - Augmented Successfully
Dataset\drink\17724.mp4 - Augmented Successfully
Dataset\drink\17733.mp4 - Augmented Successfully
Dataset\drink\17734.mp4 - Augmented Successfully
Dataset\drink\65539.mp4 - Augmented Successfully
Dataset\drink\65540.mp4 - Augmented Successfully
Dataset\drink\69302.mp4 - Augmented Successfully
Dataset\go\24940.mp4 - Augmented Successfully
Dataset\go\24941.mp4 - Augmented Successfully
Dataset\go\24943.mp4 - Augmented Successfully
Dataset\go\24946.mp4 - Augmented Successfully
Dataset\go\24947.mp4 - Augmented Successfully
Dataset\go\24952.mp4 - Augmented Su

## Extracting key points

In [72]:
# face = 468 values
# left, right hand = 21 values
# face = 33 values

In [73]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    res = np.concatenate([pose, face, left_hand, right_hand])
    
    return(res)

In [74]:
extract_keypoints(results)[:-10]

array([ 0.58764875,  0.38250107, -1.28204489, ...,  0.        ,
        0.        ,  0.        ])

In [75]:
# remember that each frame should now return 1662 values 

## Collecting Key values from each video

In [76]:
frames = []

for i in os.listdir('Dataset/drink/'):
    data = cv2.VideoCapture(os.path.join('Dataset/drink/', i))
    
    frames.append(data.get(cv2.CAP_PROP_FRAME_COUNT))

np.mean(frames)

# so in average each video is approx 65 frames,
# we will get 30 frames per video

64.36666666666666

In [77]:
# we are collecting 30 frames (aka np.array) from each video regardless of video length

# how many frames we want to collect from each video
sequence_length = 30

# for example, we have 6 seconds video @25fps = 150frames
# so to get 30 frames out of this at equal lengths
# 150/30 = 5, i.e. we have to skip every 5 frames

In [79]:
actions = ['drink', 'go', 'How are you', 'NTMU']

os.mkdir('Extracted')

for i in actions:
    os.mkdir(os.path.join('Extracted', i))
    
for gloss in os.listdir('Dataset'):
    for each_vid in os.listdir(os.path.join('Dataset', gloss)):
        filename = os.path.splitext(each_vid)[0]
        os.mkdir(os.path.join('Extracted', gloss, filename))

In [80]:
for gloss in os.listdir('Dataset/'):
    for each_vid in os.listdir(os.path.join('Dataset', gloss)):
        
        vid_path = os.path.join('Dataset', gloss, each_vid) 
        filename = os.path.splitext(each_vid)[0]
        
        video_reader = cv2.VideoCapture(vid_path)

        # get total frames of current video
        video_frames_count = video_reader.get(cv2.CAP_PROP_FRAME_COUNT)

        # calculate the interval after which frames will be considered
        skip_frames_window = max(int(video_frames_count/sequence_length), 1)

        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:
            for frame_counter in range(sequence_length):

                # set the current frame position of the video
                video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter*skip_frames_window)

                ret, frame = video_reader.read()
                
                if ret == True:
                    frame, results = mediapipe_detection(frame, holistic_model)
                    draw_landmarks(frame, results)

                    if ret != True:
                        break

                    # export the extracted key points
                    keypoints = extract_keypoints(results) 
                    saving_path = os.path.join('Extracted', gloss, filename, str(frame_counter))
                    np.save(saving_path, keypoints)

#                 plt.imshow(frame)
#                 plt.title(frame_counter)
#                 plt.show()

            video_reader.release()

        print("{} - Extracted Successfully".format(vid_path))

Dataset\drink\17709.mp4 - Extracted Successfully
Dataset\drink\17709_flipped.mp4 - Extracted Successfully
Dataset\drink\17710.mp4 - Extracted Successfully
Dataset\drink\17710_flipped.mp4 - Extracted Successfully
Dataset\drink\17711.mp4 - Extracted Successfully
Dataset\drink\17711_flipped.mp4 - Extracted Successfully
Dataset\drink\17712.mp4 - Extracted Successfully
Dataset\drink\17712_flipped.mp4 - Extracted Successfully
Dataset\drink\17713.mp4 - Extracted Successfully
Dataset\drink\17713_flipped.mp4 - Extracted Successfully
Dataset\drink\17720.mp4 - Extracted Successfully
Dataset\drink\17720_flipped.mp4 - Extracted Successfully
Dataset\drink\17721.mp4 - Extracted Successfully
Dataset\drink\17721_flipped.mp4 - Extracted Successfully
Dataset\drink\17722.mp4 - Extracted Successfully
Dataset\drink\17722_flipped.mp4 - Extracted Successfully
Dataset\drink\17723.mp4 - Extracted Successfully
Dataset\drink\17723_flipped.mp4 - Extracted Successfully
Dataset\drink\17724.mp4 - Extracted Successful

In [89]:
# checking if all videos has been extracted

count = 0
tt = 0

for i in os.listdir('Extracted'):
    for j in os.listdir(os.path.join('Extracted', i)):
        tt+=1
        count = 0
        
        for k in os.listdir(os.path.join('Extracted', i, j)):
            count += 1
            
        if count < 30:
            print('{} has missing values'.format(j))
        else:
            print('{} - {}'.format(j, count))
print("- - - - - - - - - - - - - - - - - - - - - - - ")
print('Total files = ', tt)

17709 - 30
17709_flipped - 30
17710 - 30
17710_flipped - 30
17711 - 30
17711_flipped - 30
17712 - 30
17712_flipped - 30
17713 - 30
17713_flipped - 30
17720 - 30
17720_flipped - 30
17721 - 30
17721_flipped - 30
17722 - 30
17722_flipped - 30
17723 - 30
17723_flipped - 30
17733 - 30
17733_flipped - 30
17734 - 30
17734_flipped - 30
65539 - 30
65539_flipped - 30
65540 - 30
65540_flipped - 30
69302 - 30
69302_flipped - 30
24940 - 30
24940_flipped - 30
24941 - 30
24941_flipped - 30
24943 - 30
24943_flipped - 30
24946 - 30
24946_flipped - 30
24947 - 30
24947_flipped - 30
24952 - 30
24952_flipped - 30
24954 - 30
24954_flipped - 30
24955 - 30
24955_flipped - 30
24956 - 30
24956_flipped - 30
24960 - 30
24960_flipped - 30
24961 - 30
24961_flipped - 30
24962 - 30
24962_flipped - 30
24973 - 30
24973_flipped - 30
65824 - 30
65824_flipped - 30
69345 - 30
69345_flipped - 30
1287_edited - 30
1287_edited_flipped - 30
13417_edited - 30
13417_edited_flipped - 30
13693_edited - 30
13693_edited_flipped - 30
