## Imports

In [1]:
import mediapipe as mp
import cv2
import numpy as np
from datetime import datetime

## Mediapipe utilities setup

In [2]:
mp_draw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic
mp_facemesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands

## Live Webcam Feed

In [3]:
# Run this to check if webcam is working correctly - press Q to exit
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    cv2.imshow('Webcam Feed', frame)
    
    # Press 'Q' key to break
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

## Face Mesh Overlay - Live Feed

In [4]:
cap = cv2.VideoCapture(0)
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)

# Initialize Face Mesh model
with mp_facemesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5, refine_landmarks=True) as face:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor (Format)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Detections
        results = face.process(image)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.multi_face_landmarks:
            
            for face_landmarks in results.multi_face_landmarks:
                mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks, 
                                          connections=mp_facemesh.FACEMESH_TESSELATION,
                                          landmark_drawing_spec=None,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_face_mesh_tesselation_style())
                
                mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                          connections=mp_facemesh.FACEMESH_CONTOURS,
                                          landmark_drawing_spec=None,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_face_mesh_contours_style())
                
                mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                          connections=mp_facemesh.FACEMESH_IRISES,
                                          landmark_drawing_spec=None,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_face_mesh_iris_connections_style())
        
        cv2.imshow('Face Mesh Feed', image)
    
        # Press 'Q' key to break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

Mediapipe stores the detected landmark data in an list (which we called results). You can retrieve the specific data by calling their respective names, e.g. results.multi_face_landmarks. You can find the names on the mediapipe github under the output sections.

https://google.github.io/mediapipe/solutions/face_mesh.html

As the data type is structured for multiple faces / hands, you have to use indices to get the data for a single object out of the list. When only a single face is detected, the output is a single item list.

The landmark data is stored as a 'landmark' list type. You can retrieve the coordinate points by calling landmark[index].x, y or z.

In [7]:
print(len(results.multi_face_landmarks[0].landmark))
print(results.multi_face_landmarks[0].landmark[0])
print(results.multi_face_landmarks[0].landmark[0].x)

478
x: 0.5470672845840454
y: 0.7167614698410034
z: -0.03574107587337494

0.5470672845840454


In [7]:
# Save output from latest frame to csv
# Might have to adjust output path if using Windows.
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer

for i in range(len(results.multi_face_landmarks)):
    face_output = [[lm.x, lm.y, lm.z] for lm in results.multi_face_landmarks[i].landmark]
    face_output = np.array(face_output)
    out = 'outputCSV/'+str(i)+'_FaceMeshLandmark_' + date + '.csv'
    np.savetxt(out, face_output, delimiter=',')

## Hand LandMarks Overlay - Live feed

In [55]:
cap = cv2.VideoCapture(0)
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)
node_spec = mp_draw.DrawingSpec(color=[0, 0, 255], thickness=2, circle_radius=2)

# Initialize Hand Tracking model
with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as hands:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor (Format)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Detections
        results = hands.process(image)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.multi_hand_landmarks:
            
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(image=image, landmark_list=hand_landmarks, 
                                          connections=mp_hands.HAND_CONNECTIONS,
                                          landmark_drawing_spec=node_spec,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_hand_connections_style())

        cv2.imshow('Hand Feed', image)
    
        # Press 'Q' key to break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

print(len(results.multi_hand_landmarks))

[landmark {
  x: 0.8968068361282349
  y: 1.0146245956420898
  z: -8.279963594759465e-07
}
landmark {
  x: 0.8315862417221069
  y: 0.9758496284484863
  z: 0.01681501232087612
}
landmark {
  x: 0.7972166538238525
  y: 0.898264467716217
  z: 0.016285087913274765
}
landmark {
  x: 0.7665612697601318
  y: 0.8454241752624512
  z: 0.00561297032982111
}
landmark {
  x: 0.7412075996398926
  y: 0.7989298701286316
  z: -0.004731133580207825
}
landmark {
  x: 0.885753333568573
  y: 0.775733232498169
  z: 0.026947569102048874
}
landmark {
  x: 0.8115337491035461
  y: 0.7107788324356079
  z: 0.0037142569199204445
}
landmark {
  x: 0.7544031143188477
  y: 0.7258462905883789
  z: -0.013496773317456245
}
landmark {
  x: 0.7216585874557495
  y: 0.7647827863693237
  z: -0.020610807463526726
}
landmark {
  x: 0.9026695489883423
  y: 0.7761173844337463
  z: 0.004147236235439777
}
landmark {
  x: 0.8080688118934631
  y: 0.7026156783103943
  z: -0.01287606917321682
}
landmark {
  x: 0.7474258542060852
  y: 0

[landmark {
  x: 0.7950757145881653
  y: 0.7533833980560303
  z: -2.439488753225305e-07
}
landmark {
  x: 0.8036684393882751
  y: 0.6996100544929504
  z: 0.02641146630048752
}
landmark {
  x: 0.7916319370269775
  y: 0.6560523509979248
  z: 0.03761348873376846
}
landmark {
  x: 0.773188054561615
  y: 0.6337231397628784
  z: 0.04138607904314995
}
landmark {
  x: 0.7541595101356506
  y: 0.6211019158363342
  z: 0.04508350417017937
}
landmark {
  x: 0.7994259595870972
  y: 0.63111412525177
  z: 0.02843562141060829
}
landmark {
  x: 0.7763242721557617
  y: 0.571455717086792
  z: 0.029182616621255875
}
landmark {
  x: 0.7502204775810242
  y: 0.5796210765838623
  z: 0.03224504739046097
}
landmark {
  x: 0.7420371770858765
  y: 0.5993043184280396
  z: 0.03555325046181679
}
landmark {
  x: 0.7844066619873047
  y: 0.6242755055427551
  z: 0.012511316686868668
}
landmark {
  x: 0.758682906627655
  y: 0.5533515810966492
  z: 0.012616856954991817
}
landmark {
  x: 0.7299677729606628
  y: 0.5656253099

[landmark {
  x: 0.8372852206230164
  y: 0.7852185964584351
  z: -4.4200388060744444e-07
}
landmark {
  x: 0.8307827115058899
  y: 0.7177638411521912
  z: 0.0651138424873352
}
landmark {
  x: 0.8081973791122437
  y: 0.6719655990600586
  z: 0.09511025249958038
}
landmark {
  x: 0.7853938937187195
  y: 0.6426332592964172
  z: 0.10899540036916733
}
landmark {
  x: 0.7650449872016907
  y: 0.6199243068695068
  z: 0.12106060236692429
}
landmark {
  x: 0.8108361959457397
  y: 0.6218295693397522
  z: 0.08518962562084198
}
landmark {
  x: 0.7731253504753113
  y: 0.5617263317108154
  z: 0.10033252090215683
}
landmark {
  x: 0.7407029271125793
  y: 0.5715587139129639
  z: 0.11002794653177261
}
landmark {
  x: 0.7295952439308167
  y: 0.5914114117622375
  z: 0.11754368990659714
}
landmark {
  x: 0.7952428460121155
  y: 0.6129297614097595
  z: 0.05317801982164383
}
landmark {
  x: 0.7450969219207764
  y: 0.5391849279403687
  z: 0.0681733712553978
}
landmark {
  x: 0.7093113660812378
  y: 0.557502686

[landmark {
  x: 0.8785816431045532
  y: 0.8415395617485046
  z: -7.331945539590379e-07
}
landmark {
  x: 0.8222658634185791
  y: 0.8455435037612915
  z: 0.019484657794237137
}
landmark {
  x: 0.7915577292442322
  y: 0.8112189173698425
  z: 0.0245366133749485
}
landmark {
  x: 0.7635707259178162
  y: 0.7919709086418152
  z: 0.020462488755583763
}
landmark {
  x: 0.7438080310821533
  y: 0.7730282545089722
  z: 0.015927091240882874
}
landmark {
  x: 0.840397834777832
  y: 0.6706104278564453
  z: 0.04055636748671532
}
landmark {
  x: 0.7836745381355286
  y: 0.6410583257675171
  z: 0.037480443716049194
}
landmark {
  x: 0.7520284652709961
  y: 0.6670775413513184
  z: 0.02975127100944519
}
landmark {
  x: 0.7431098222732544
  y: 0.7034889459609985
  z: 0.025562996044754982
}
landmark {
  x: 0.8417573571205139
  y: 0.6515900492668152
  z: 0.022845599800348282
}
landmark {
  x: 0.7718755602836609
  y: 0.6108705997467041
  z: 0.02495577000081539
}
landmark {
  x: 0.7345458269119263
  y: 0.6490

TypeError: object of type 'NoneType' has no len()

In [25]:
# Save output from latest frame to csv
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer

for i in range(len(results.multi_hand_landmarks)):
    hand_output = [[lm.x, lm.y, lm.z] for lm in results.multi_hand_landmarks[i].landmark]
    hand_output = np.array(hand_output)
#     print(hand_output)
    out = 'outputCSV/' + str(i) + '_handLandMarks_' + date + '.csv'
    np.savetxt(out, hand_output, delimiter=',')

In [9]:
# Break webcam feed if necessary
cap.release()
cv2.destroyAllWindows()

NameError: name 'cap' is not defined

## Video Footage Input

In [5]:
# Display input files
!ls ../videos

anim_face_hand_test.mp4  anim_hand_test.mp4
anim_face_test.mp4	 test_mediapipe.mp4


## Face Mesh Data extraction - Video

In [61]:
# Global variables
N_FACEMESH_LANDMARKS = 478

# Set input and output directory
dir_in = '../videos/'
out = 'test_mediapipe.mp4'

# Initialize capture and specifications
vid = cv2.VideoCapture(dir_in + 'anim_face_test.mp4')
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)

if (vid.isOpened() == False):
    print("Error reading file. \n")

# Output dimensions
frame_width = int(vid.get(3))
frame_height = int(vid.get(4))
n_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

# Codec issues
codec = cv2.VideoWriter_fourcc('M','J','P','G')
# codec = int(vid.get(cv2.CAP_PROP_FOURCC)) # mp4 linux incompatibilities

output = cv2.VideoWriter(filename=dir_in+out, fourcc=codec, fps=30, 
                         frameSize=(frame_width, frame_height), isColor=True)

# Define output array
m_size = n_frames * N_FACEMESH_LANDMARKS
output_matrix = np.empty((m_size, 3))

with mp_facemesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5, 
                          refine_landmarks=True) as face:
    frame_count = 0
    while vid.isOpened():
        ret, frame = vid.read()
        
        # Block to insert data
        index_start = frame_count * N_FACEMESH_LANDMARKS
        index_end = (frame_count + 1) * N_FACEMESH_LANDMARKS
        
        # While frames exist
        if ret == True:
            # Recolor (Format)
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Detections
            results = face.process(image)
            
            # Store landmark data for one face
#             for i in range(len(results.multi_face_landmarks)): # multiple faces    
            face_output = [[lm.x, lm.y, lm.z] for lm in results.multi_face_landmarks[0].landmark]
            face_output = np.array(face_output) 
            output_matrix[index_start:index_end] = face_output

        
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            
            # Draw results
            if results.multi_face_landmarks:
                for face_landmarks in results.multi_face_landmarks:
                    mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks, 
                                              connections=mp_facemesh.FACEMESH_TESSELATION,
                                              landmark_drawing_spec=None,
                                              connection_drawing_spec=
                                              mp_drawing_styles.get_default_face_mesh_tesselation_style())

                    mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                              connections=mp_facemesh.FACEMESH_CONTOURS,
                                              landmark_drawing_spec=None,
                                              connection_drawing_spec=
                                              mp_drawing_styles.get_default_face_mesh_contours_style())

                    mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                              connections=mp_facemesh.FACEMESH_IRISES,
                                              landmark_drawing_spec=None,
                                              connection_drawing_spec=
                                              mp_drawing_styles.get_default_face_mesh_iris_connections_style())

            # Visualize and save output
            output.write(image)
            cv2.imshow('Frame', image)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            frame_count += 1

        else:
            break
        
vid.release()
output.release()
cv2.destroyAllWindows()

# Save landmark data to CSV
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer
out = 'outputCSV/FULL_FaceMeshLandmarks_' + date + '.csv'
np.savetxt(out, output_matrix, delimiter=',')

## Hand landmarks example

In [67]:
# Global variables
N_HAND_LANDMARKS = 21

# Set input and output directory
dir_in = '../videos/'
out = 'test_mediapipe_hand.mp4'

# Initialize capture and specifications
vid = cv2.VideoCapture(dir_in + 'anim_hand_test.mp4')
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)
node_spec = mp_draw.DrawingSpec(color=[0, 0, 255], thickness=2, circle_radius=2)

if (vid.isOpened() == False):
    print("Error reading file. \n")

# Output dimensions
frame_width = int(vid.get(3))
frame_height = int(vid.get(4))
n_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

# Codec issues
codec = cv2.VideoWriter_fourcc('M','J','P','G')
# codec = int(vid.get(cv2.CAP_PROP_FOURCC)) # mp4 linux incompatibilities

output = cv2.VideoWriter(filename=dir_in+out, fourcc=codec, fps=30, 
                         frameSize=(frame_width, frame_height), isColor=True)

m_size = n_frames * N_HAND_LANDMARKS
output_matrix_left = np.empty((m_size, 3))
output_matrix_right = np.empty((m_size, 3))

with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as hands:
    frame_count = 0
    while vid.isOpened():
        ret, frame = vid.read()
        
        index_start = frame_count * N_HAND_LANDMARKS
        index_end = (frame_count + 1) * N_HAND_LANDMARKS
        
        # While frames exist
        if ret == True:
            # Recolor (Format)
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Detections
            results = hands.process(image)
            
            # Store landmark data for two hands
            if results.multi_hand_landmarks:
#                 print(results.multi_handedness)
#                 for i in range(len(results.multi_hand_landmarks)): # multiple hands
#                     print(results.multi_hand_landmarks[i])
                hand_output = [[lm.x, lm.y, lm.z] for lm in results.multi_hand_landmarks[0].landmark]
                hand_output = np.array(hand_output) 
                output_matrix_right[index_start:index_end] = hand_output

        
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            
            # Draw results
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(image=image, landmark_list=hand_landmarks, 
                                          connections=mp_hands.HAND_CONNECTIONS,
                                          landmark_drawing_spec=node_spec,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_hand_connections_style())

            # Visualize and save output
            output.write(image)
            cv2.imshow('Frame', image)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            frame_count += 1

        else:
            break
        
vid.release()
output.release()
cv2.destroyAllWindows()

# Save landmark data to CSV
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer
out = 'outputCSV/FULL_RIGHT_HandLandmarks_' + date + '.csv'
np.savetxt(out, output_matrix_right, delimiter=',')

196


In [15]:
# Exit openCV if necessary
vid.release()
cv2.destroyAllWindows()