## Imports

In [1]:
import mediapipe as mp
import cv2
import numpy as np
from datetime import datetime

## Mediapipe utilities setup

In [2]:
mp_draw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic
mp_facemesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands

## Live Webcam Feed

In [3]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    cv2.imshow('Webcam Feed', frame)
    
    # Press 'Q' key to break
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

## Face Mesh Overlay

In [4]:
cap = cv2.VideoCapture(0)
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)

# Initialize Face Mesh model
with mp_facemesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5, refine_landmarks=True) as face:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor (Format)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Detections
        results = face.process(image)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.multi_face_landmarks:
#             print(results.multi_face_landmarks)
            
            for face_landmarks in results.multi_face_landmarks:
#                 print("yep")
                mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks, 
                                          connections=mp_facemesh.FACEMESH_TESSELATION,
                                          landmark_drawing_spec=None,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_face_mesh_tesselation_style())
                
                mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                          connections=mp_facemesh.FACEMESH_CONTOURS,
                                          landmark_drawing_spec=None,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_face_mesh_contours_style())
                
                mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                          connections=mp_facemesh.FACEMESH_IRISES,
                                          landmark_drawing_spec=None,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_face_mesh_iris_connections_style())
        
        cv2.imshow('Face Mesh Feed', image)
    
        # Press 'Q' key to break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

In [29]:
print(len(results.multi_face_landmarks[0].landmark))

478


In [7]:
# Save output from latest frame to csv
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer

for i in range(len(results.multi_face_landmarks)):
    face_output = [[lm.x, lm.y, lm.z] for lm in results.multi_face_landmarks[i].landmark]
    face_output = np.array(face_output)
    out = 'outputCSV/'+str(i)+'_FaceMeshLandmarks_bro_' + date + '.csv'
    np.savetxt(out, face_output, delimiter=',')

In [33]:
# Save output to image
cv2.imshow('image', image)

## Hand LandMarks Overlay

In [24]:
cap = cv2.VideoCapture(0)
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)
node_spec = mp_draw.DrawingSpec(color=[0, 0, 255], thickness=2, circle_radius=2)

# Initialize Hand Tracking model
with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5, model_complexity=1) as hands:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor (Format)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Detections
        results = hands.process(image)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.multi_hand_landmarks:
#             print(results.multi_hand_landmarks)
#             print('X')
            
            for hand_landmarks in results.multi_hand_landmarks:
# #                 print("yep")
                mp_draw.draw_landmarks(image=image, landmark_list=hand_landmarks, 
                                          connections=mp_hands.HAND_CONNECTIONS,
                                          landmark_drawing_spec=node_spec,
                                          connection_drawing_spec=
                                          mp_drawing_styles.get_default_hand_connections_style())

        cv2.imshow('Hand Feed', image)
    
        # Press 'Q' key to break
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

print(results.multi_hand_landmarks)

[landmark {
  x: 0.7346022725105286
  y: 0.6931289434432983
  z: 4.656520502521744e-09
}
landmark {
  x: 0.6604050993919373
  y: 0.6600062847137451
  z: -0.00305660767480731
}
landmark {
  x: 0.6022018194198608
  y: 0.5863128304481506
  z: -0.009187880903482437
}
landmark {
  x: 0.5676661133766174
  y: 0.528475284576416
  z: -0.023864444345235825
}
landmark {
  x: 0.547491192817688
  y: 0.4697604775428772
  z: -0.03737421706318855
}
landmark {
  x: 0.6624128818511963
  y: 0.4880448281764984
  z: 0.01916721649467945
}
landmark {
  x: 0.6173908710479736
  y: 0.41898179054260254
  z: -0.012994122691452503
}
landmark {
  x: 0.5820684432983398
  y: 0.41567301750183105
  z: -0.03822685405611992
}
landmark {
  x: 0.5527299642562866
  y: 0.4336656630039215
  z: -0.04937809333205223
}
landmark {
  x: 0.6909274458885193
  y: 0.47017666697502136
  z: 0.0022037893068045378
}
landmark {
  x: 0.6617285013198853
  y: 0.3590472340583801
  z: -0.03121979720890522
}
landmark {
  x: 0.619350254535675
  y

In [25]:
# Save output from latest frame to csv
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer

for i in range(len(results.multi_hand_landmarks)):
    hand_output = [[lm.x, lm.y, lm.z] for lm in results.multi_hand_landmarks[i].landmark]
    hand_output = np.array(hand_output)
#     print(hand_output)
    out = 'outputCSV/' + str(i) + '_handLandMarks_' + date + '.csv'
    np.savetxt(out, hand_output, delimiter=',')

In [9]:
# Break webcam feed if necessary
cap.release()
cv2.destroyAllWindows()

NameError: name 'cap' is not defined

## Video Footage Input

In [5]:
# Display input files
!ls ../videos

anim_face_hand_test.mp4  anim_hand_test.mp4
anim_face_test.mp4	 test_mediapipe.mp4


In [53]:
# Global variables
N_FACEMESH_LANDMARKS = 478

# Set input and output directory
dir_in = '../videos/'
out = 'test_mediapipe.mp4'

# Initialize capture and specifications
vid = cv2.VideoCapture(dir_in + 'anim_face_test.mp4')
drawing_spec = mp_draw.DrawingSpec(thickness=1, circle_radius=1)

if (vid.isOpened() == False):
    print("Error reading file. \n")

# Output dimensions
frame_width = int(vid.get(3))
frame_height = int(vid.get(4))
n_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

# Codec issues
codec = cv2.VideoWriter_fourcc('M','J','P','G')
# codec = int(vid.get(cv2.CAP_PROP_FOURCC)) # mp4 linux incompatibilities

output = cv2.VideoWriter(filename=dir_in+out, fourcc=codec, fps=30, 
                         frameSize=(frame_width, frame_height), isColor=True)

m_size = n_frames * N_FACEMESH_LANDMARKS
output_matrix = np.empty((m_size, 3))

with mp_facemesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5, 
                          refine_landmarks=True) as face:
    frame_count = 0
    while vid.isOpened():
        ret, frame = vid.read()
        
        index_start = frame_count * N_FACEMESH_LANDMARKS
        index_end = (frame_count + 1) * N_FACEMESH_LANDMARKS
        
        # While frames exist
        if ret == True:
            # Recolor (Format)
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Detections
            results = face.process(image)
            
            # Store landmark data for one face
#             for i in range(len(results.multi_face_landmarks)): # multiple faces    
            face_output = [[lm.x, lm.y, lm.z] for lm in results.multi_face_landmarks[0].landmark]
            face_output = np.array(face_output) 
            print(index_start, index_end)
            output_matrix[index_start:index_end] = face_output

        
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            
            # Draw results
            if results.multi_face_landmarks:
                for face_landmarks in results.multi_face_landmarks:
                    mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks, 
                                              connections=mp_facemesh.FACEMESH_TESSELATION,
                                              landmark_drawing_spec=None,
                                              connection_drawing_spec=
                                              mp_drawing_styles.get_default_face_mesh_tesselation_style())

                    mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                              connections=mp_facemesh.FACEMESH_CONTOURS,
                                              landmark_drawing_spec=None,
                                              connection_drawing_spec=
                                              mp_drawing_styles.get_default_face_mesh_contours_style())

                    mp_draw.draw_landmarks(image=image, landmark_list=face_landmarks,
                                              connections=mp_facemesh.FACEMESH_IRISES,
                                              landmark_drawing_spec=None,
                                              connection_drawing_spec=
                                              mp_drawing_styles.get_default_face_mesh_iris_connections_style())

            # Visualize and save output
            output.write(image)
            cv2.imshow('Frame', image)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            frame_count += 1

        else:
            break
        
vid.release()
output.release()
cv2.destroyAllWindows()

# Save landmark data to CSV
date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer
out = 'outputCSV/FULL_FaceMeshLandmarks_' + date + '.csv'
np.savetxt(out, output_matrix, delimiter=',')

0 478
478 956
956 1434
1434 1912
1912 2390
2390 2868
2868 3346
3346 3824
3824 4302
4302 4780
4780 5258
5258 5736
5736 6214
6214 6692
6692 7170
7170 7648
7648 8126
8126 8604
8604 9082
9082 9560
9560 10038
10038 10516
10516 10994
10994 11472
11472 11950
11950 12428
12428 12906
12906 13384
13384 13862
13862 14340
14340 14818
14818 15296
15296 15774
15774 16252
16252 16730
16730 17208
17208 17686
17686 18164
18164 18642
18642 19120
19120 19598
19598 20076
20076 20554
20554 21032
21032 21510
21510 21988
21988 22466
22466 22944
22944 23422
23422 23900
23900 24378
24378 24856
24856 25334
25334 25812
25812 26290
26290 26768
26768 27246
27246 27724
27724 28202
28202 28680
28680 29158
29158 29636
29636 30114
30114 30592
30592 31070
31070 31548
31548 32026
32026 32504
32504 32982
32982 33460
33460 33938
33938 34416
34416 34894
34894 35372
35372 35850
35850 36328
36328 36806
36806 37284
37284 37762
37762 38240
38240 38718
38718 39196
39196 39674
39674 40152
40152 40630
40630 41108
41108 41586
4158

In [47]:
print(frame)

None


In [45]:
# Save output from latest frame to csv
# date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p") # unique identifyer

# for i in range(len(results.multi_face_landmarks)):
#     face_output = [[lm.x, lm.y, lm.z] for lm in results.multi_face_landmarks[i].landmark]
#     face_output = np.array(face_output)
#     out = 'outputCSV/'+str(i)+'_FaceMeshLandmarks_bro_' + date + '.csv'
#     np.savetxt(out, face_output, delimiter=',')

print(vid.get(cv2.CAP_PROP_FRAME_COUNT))

print(len(results.multi_face_landmarks[0].landmark))


# Get the N frames in video input

# Create empty array the size of (N * 478 (facemesh landmarks) x 3)

# Maybe append row containing the N-frames

# Fill in blocks of array with frame data

# export to CSV
N_FACE_LANDMARKS = 478

vid = cv2.VideoCapture(dir_in + 'test_mediapipe.mp4')
n_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
print('There are', n_frames, 'frames in the video.')

output_matrix = np.empty(((N_FACE_LANDMARKS * n_frames), 3))
print(np.shape(output_matrix))
vid.release()

print(output_matrix[:1])
output_matrix[0:2] = [[3, 4, 5], [2, 2, 0]]
print(output_matrix[:3])

0.0
478
There are 118 frames in the video.
(56404, 3)
[[4.67606871e-310 4.67606768e-310 4.67606871e-310]]
[[3.00000000e+00 4.00000000e+00 5.00000000e+00]
 [2.00000000e+00 2.00000000e+00 0.00000000e+00]
 [2.41696328e+35 1.65310612e+40 5.14291268e+25]]


In [15]:
# Exit openCV if necessary
vid.release()
cv2.destroyAllWindows()