credit: github/nicknochnack

In [2]:
# !pip install --user mediapipe

In [3]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# Keypoints using MP Holistic

In [4]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [5]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [6]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [107]:
# testing
cap = cv2.VideoCapture(0)


fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter('output.avi', fourcc, 30.0, (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),  
                                                    int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        out.write(frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    out.release()
    cap.release()
    cv2.destroyAllWindows()

In [8]:
out.release()
cap.release()
cv2.destroyAllWindows()

# Extract Keypoint Values

In [9]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Setup Folders for Collection

In [105]:
name= 'Mo'

# Actions that we try to detect
# actions = np.array(['ineed', 'ambulance', 'where', 'street'])
# actions = np.array(['thankyou', 'work', 'bootcamp', 'in this'])
# actions = np.array(['i_need_ambulance', 'i_want', 'report', 'accedint'])
# actions = np.array(['thankyou_bootcamp'])
actions = np.array(['final_sentetnce'])

# Path for exported data, numpy arrays
DATA_PATH = os.path.join(f'MP_Data 15 {" ".join(actions)} {name}')
Video_PATH = os.path.join(f'MP_Video 15 {" ".join(actions)} {name}')

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

# Folder start
# start_folder = 30

In [106]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass
        
for action in actions: 
    try: 
        os.makedirs(os.path.join(Video_PATH, action))
    except:
        pass

# Collect Keypoint Values for Training and Testing

In [108]:
cap = cv2.VideoCapture(0)


fourcc = cv2.VideoWriter_fourcc(*'MJPG')


# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences):
            out = cv2.VideoWriter(Video_PATH + f'/{action}/{action} {sequence}.avi', fourcc, 30.0, (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),  
                                                    int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
            frame_list = []
            
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (220,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (115,40), 
                               cv2.FONT_HERSHEY_COMPLEX, .6, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (115,40), 
                               cv2.FONT_HERSHEY_COMPLEX, .6, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
                
                frame_list.append(frame)
                

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
            
            for i in range(len(frame_list)):
                out.write(frame_list[i])
            out.release()
                    
#     out.release()
    cap.release()
    cv2.destroyAllWindows()

In [72]:
out.release()
cap.release()
cv2.destroyAllWindows()

# MediaPipe on a video

In [41]:
cap = cv2.VideoCapture('./MP_Videos 2/ambulance/ambulance 20.avi')

fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter('output2.avi', fourcc, cap.get(cv2.CAP_PROP_FPS), (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),  
                                                                         int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))


# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    for i in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):

        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Export keypoints
        keypoints = extract_keypoints(results)

        out.write(image)


out.release()
cap.release()
cv2.destroyAllWindows()

In [42]:
out.release()
cap.release()
cv2.destroyAllWindows()

In [None]:
# Presenttation

In [112]:
# testing
cap = cv2.VideoCapture(0)


fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter('sentence 5.mp4', fourcc, 30.0, (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),  
                                                    int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

i = 0
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        if i == 30:
            i = 0
        else:
            i += 1
        
        cv2.putText(frame, str(i), (30,100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
#         print(i, end=' ')


        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
#         keypoints = extract_keypoints(results)
#         sequence.append(keypoints)
#         sequence = sequence[-30:]
        
#         cv2.putText(image, str(len(sequence)), (30,100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
#         if len(sequence) == 30:
#             sequence = []

        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        out.write(frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    out.release()
    cap.release()
    cv2.destroyAllWindows()

In [110]:
out.release()
cap.release()
cv2.destroyAllWindows()