<h1>Import and Installing Dependencies</h1>

In [1]:
pip install tensorflow==2.14.0 opencv-python mediapipe scikit-learn matplotlib

Note: you may need to restart the kernel to use updated packages.


In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

<h1>2. Keypoints using MP Hoslistic</h1>

In [3]:
mp_holistic = mp.solutions.holistic  # Holistic model (for face, hands, and pose)
mp_drawing = mp.solutions.drawing_utils  # Drawing utilities (for drawing landmarks)


In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #color convertion
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) #color conversion 
    return image, results

# opencv gives out a feed of vgi(bgr format) and mediapipe-detecion needs a format of rgb 
# so we convert vgi to rgb and set it to unwritable so that a bit of memory and make detection 
# then se it back to writable and convert it back to vgi

In [5]:
def draw_landmarks(frame, results):
    mp_drawing.draw_landmarks(frame, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) #Draw face connections
    mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) #Draw pose connections
    mp_drawing.draw_landmarks(frame, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) #Draw lefthand connections
    mp_drawing.draw_landmarks(frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) #Draw righthand connections
    

In [6]:
def draw_styled_landmarks(frame, results):
    mp_drawing.draw_landmarks(frame, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                              mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(0,255,128), thickness=1, circle_radius=1)) #Draw face connections,with styles
    
    mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(0,0,255), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(0,255,128), thickness=2, circle_radius=4)) #Draw pose connections
    
    mp_drawing.draw_landmarks(frame, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(255,51,51), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(0,255,128), thickness=2, circle_radius=4)) #Draw lefthand connections
    
    mp_drawing.draw_landmarks(frame, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(255,51,51), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(0,255,128), thickness=2, circle_radius=4)) #Draw righthand connections

In [7]:
cv2.namedWindow("sign detection camera")
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened(): # try to get the first frame
        #read feed
        ret,frame = cap.read()

        #make detections
        image, results = mediapipe_detection(frame, holistic)

        #Draw landmarks
        draw_styled_landmarks(image, results)

        #show screen
        cv2.imshow('sign detection camera', image)

        #break gracefully
        if cv2.waitKey(10) & 0xff ==ord('q'):
            break

    
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1732020666.798594  259713 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1732020666.864129  259945 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732020666.878940  259945 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732020666.881075  259945 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732020666.881195  259946 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1732020666.881518  259950 inference_feedback_manager.cc:114] Feedback manager requires a mod

In [8]:
# plt.imshow(frame)

In [9]:
# draw_styled_landmarks(frame, results)

In [10]:
# plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

<h1>Extract Keypoints Values</h1>

In [11]:
results.face_landmarks

landmark {
  x: 0.50320673
  y: 0.619583428
  z: -0.0361103341
}
landmark {
  x: 0.507031202
  y: 0.548869133
  z: -0.0595944896
}
landmark {
  x: 0.505044937
  y: 0.572627604
  z: -0.0330460779
}
landmark {
  x: 0.498796195
  y: 0.478242248
  z: -0.0418761596
}
landmark {
  x: 0.507922
  y: 0.526689887
  z: -0.0625065193
}
landmark {
  x: 0.508535504
  y: 0.498324037
  z: -0.0568889827
}
landmark {
  x: 0.509415567
  y: 0.432722211
  z: -0.0231473222
}
landmark {
  x: 0.425983399
  y: 0.43118161
  z: 0.0183099713
}
landmark {
  x: 0.510790706
  y: 0.3829301
  z: -0.0131105734
}
landmark {
  x: 0.512055933
  y: 0.354584038
  z: -0.0139039801
}
landmark {
  x: 0.515235841
  y: 0.267216891
  z: 0.00367058511
}
landmark {
  x: 0.502732396
  y: 0.629459262
  z: -0.0348850302
}
landmark {
  x: 0.502195179
  y: 0.636968613
  z: -0.0311833452
}
landmark {
  x: 0.50188452
  y: 0.640169561
  z: -0.026364103
}
landmark {
  x: 0.501621366
  y: 0.641328335
  z: -0.0263138842
}
landmark {
  x: 0.50

In [12]:
# pose=[]
# for res in results.pose_landmarks.landmark:
#     test= np.array([res.x, res.y, res.z, res.visibility])
#     pose.append(test)


pose= np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)


#same for left hand but hands doesnt have visibility factor
lh= np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)

#same for left hand but hands doesnt have visibility factor
rh= np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)


#same for face but face has more arrays 
face=np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)


In [13]:
pose

array([ 4.91934597e-01,  5.69648266e-01, -8.15049529e-01,  9.99959171e-01,
        5.26479244e-01,  4.70276684e-01, -7.69052088e-01,  9.99891818e-01,
        5.45857310e-01,  4.72908139e-01, -7.68495500e-01,  9.99903440e-01,
        5.64826131e-01,  4.77262974e-01, -7.68016815e-01,  9.99879420e-01,
        4.63754952e-01,  4.62671369e-01, -7.54907548e-01,  9.99903142e-01,
        4.41159397e-01,  4.60589767e-01, -7.53631234e-01,  9.99886334e-01,
        4.24200833e-01,  4.61076707e-01, -7.53845513e-01,  9.99851525e-01,
        5.96776307e-01,  5.06843328e-01, -4.60074723e-01,  9.99883771e-01,
        3.97531122e-01,  4.99700576e-01, -3.65188062e-01,  9.99926686e-01,
        5.24582028e-01,  6.71915829e-01, -6.98886037e-01,  9.99872923e-01,
        4.46594477e-01,  6.72222018e-01, -6.73170328e-01,  9.99903142e-01,
        7.43107200e-01,  9.41390157e-01, -3.25357944e-01,  9.97164607e-01,
        2.53447503e-01,  9.55788255e-01, -9.93335173e-02,  9.92301881e-01,
        9.95560288e-01,  

In [14]:
lh

array([ 8.12201917e-01,  5.81031859e-01, -5.77264359e-07,  7.36015737e-01,
        5.18393159e-01,  3.49531360e-02,  6.84232354e-01,  4.17587578e-01,
        4.51308042e-02,  6.46378398e-01,  3.50225717e-01,  4.55841236e-02,
        6.12510562e-01,  3.06026310e-01,  4.44122292e-02,  7.08329201e-01,
        2.75555998e-01,  2.49663834e-02,  6.15777135e-01,  2.04530269e-01,
        7.74654699e-03,  5.49433231e-01,  2.08065674e-01, -6.54380489e-03,
        5.02407134e-01,  2.26676866e-01, -1.40785566e-02,  7.16839373e-01,
        2.71351814e-01, -3.64405033e-03,  6.05340481e-01,  1.90580532e-01,
       -1.79946441e-02,  5.33059359e-01,  2.01899350e-01, -2.72031333e-02,
        4.84640330e-01,  2.31088042e-01, -3.26192677e-02,  7.13794231e-01,
        3.01109821e-01, -3.16684283e-02,  6.06719971e-01,  2.09810346e-01,
       -4.48964797e-02,  5.38195491e-01,  2.17670441e-01, -5.05560078e-02,
        4.90739286e-01,  2.40003929e-01, -5.31200990e-02,  6.99491978e-01,
        3.61231953e-01, -

In [15]:
rh

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [16]:
face

array([ 0.50320673,  0.61958343, -0.03611033, ...,  0.59483451,
        0.43156654,  0.02324907])

In [17]:
def extract_keypoints(results):
    pose= np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    
    #same for face but face has more arrays 
    face=np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)

    #same for left hand but hands doesnt have visibility factor
    lh= np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)

    #same for left hand but hands doesnt have visibility factor
    rh= np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)


    
    return np.concatenate([pose,face,lh,rh])


In [18]:
np.zeros(21*3).shape

(63,)