In [1]:
import  cv2         as cv
import  numpy       as np
import  mediapipe   as mp
import  os
import  time
from    matplotlib  import  pyplot  as plt




### Read Camera Data

In [3]:
# Models to detect limbs and draw on screen
mp_h = mp.solutions.holistic
mp_d = mp.solutions.drawing_utils



def mediapipe_detection( image, model ):
    '''Detect hands and head position
    '''
    image = cv.cvtColor( image, cv.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv.cvtColor( image, cv.COLOR_RGB2BGR)
    return image, results


def draw_skeleton( image, results ):
    '''Draw the detected limbs on screen
    '''
    mp_d.draw_landmarks( image, results.face_landmarks,         mp_h.FACEMESH_CONTOURS)
    mp_d.draw_landmarks( image, results.left_hand_landmarks,    mp_h.HAND_CONNECTIONS)
    mp_d.draw_landmarks( image, results.right_hand_landmarks,   mp_h.HAND_CONNECTIONS)
    mp_d.draw_landmarks( image, results.pose_landmarks,         mp_h.POSE_CONNECTIONS)

def transform_data( results ):
    '''Error handle the data
    '''
    if results.pose_landmarks:
        pose = np.array([[result.x, result.y, result.z] for result in results.pose_landmarks.landmark]).flatten()
    else:
        pose = np.zeros(132)

    if results.left_hand_landmarks:
        left_h = np.array([[result.x, result.y, result.z] for result in results.left_hand_landmarks.landmark]).flatten() 
    else:
        left_h = np.zeros(63)

    if results.right_hand_landmarks:
        right_h = np.array([[result.x, result.y, result.z] for result in results.right_hand_landmarks.landmark]).flatten()
    else:
        right_h = np.zeros(63)
    
    if results.face_landmarks:
        head = np.array([[result.x, result.y, result.z] for result in results.face_landmarks.landmark]).flatten()
    else:
        head = np.zeros(1404)

    return np.concatenate([pose, head, left_h, right_h])


#TODO Replace with video files, to read from downloaded data

# 0: iPhone, 1: webcam
capture = cv.VideoCapture(1)

# Read camera
with mp_h.Holistic(min_detection_confidence=0.7,  min_tracking_confidence=0.7) as holistic:
    while capture.isOpened():

        _, frame = capture.read()

        image, results = mediapipe_detection( frame, holistic )

        draw_skeleton( image, results )

        data = transform_data( results )
        print(data)

        cv.imshow("ASL", image)
        if cv.waitKey(10) & 0xFF == ord('q'):
            break
capture.release()
cv.destroyAllWindows()

I0000 00:00:1738427094.263411 4632215 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2
W0000 00:00:1738427094.367243 4647195 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738427094.385951 4647194 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738427094.390961 4647197 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738427094.391476 4647193 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738427094.391662 4647192 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support 

[ 0.40817562  0.68645096 -1.04954422 ...  0.          0.
  0.        ]
[ 0.40816143  0.68658286 -1.12921929 ...  0.          0.
  0.        ]
[ 0.40833142  0.68772829 -1.2542541  ...  0.          0.
  0.        ]
[ 0.40770492  0.68976188 -1.20236421 ...  0.          0.
  0.        ]
[ 0.40775698  0.69024169 -1.17156458 ...  0.          0.
  0.        ]
[ 0.40735668  0.69119829 -1.15772843 ...  0.          0.
  0.        ]
[ 0.40729868  0.69094419 -1.12485492 ...  0.          0.
  0.        ]
[ 0.4070864   0.69172198 -1.17515934 ...  0.          0.
  0.        ]
[ 0.40713057  0.69183654 -1.15686107 ...  0.          0.
  0.        ]
[ 0.40715823  0.69224906 -1.14557135 ...  0.          0.
  0.        ]
[ 0.40703329  0.6925227  -1.05527854 ...  0.          0.
  0.        ]
[ 0.40698248  0.69308633 -0.98989081 ...  0.          0.
  0.        ]
[ 0.40671021  0.69399917 -1.02943242 ...  0.          0.
  0.        ]
[ 0.40664309  0.69426453 -1.03056371 ...  0.          0.
  0.        ]
[ 0.40

2025-02-01 11:25:03.111 Python[45583:4632215] _TIPropertyValueIsValid called with 16 on nil context!
2025-02-01 11:25:03.111 Python[45583:4632215] imkxpc_getApplicationProperty:reply: called with incorrect property value 16, bailing.
2025-02-01 11:25:03.111 Python[45583:4632215] Text input context does not respond to _valueForTIProperty:


[ 0.40936521  0.71806282 -0.84297651 ...  0.          0.
  0.        ]
[ 0.40955302  0.7169866  -0.88089788 ...  0.          0.
  0.        ]
[ 0.40973645  0.71665686 -0.89791262 ...  0.          0.
  0.        ]
[ 0.4098973   0.71652097 -0.89692014 ...  0.          0.
  0.        ]
[ 0.40995684  0.71645129 -0.91343814 ...  0.          0.
  0.        ]
[ 0.410007    0.71643507 -0.91651213 ...  0.          0.
  0.        ]
[ 0.41003954  0.71631986 -0.90781033 ...  0.          0.
  0.        ]
[ 0.41007832  0.71617234 -0.89050537 ...  0.          0.
  0.        ]
[ 0.41010153  0.71610278 -0.86787868 ...  0.          0.
  0.        ]
[ 0.41012308  0.71598226 -0.85439879 ...  0.          0.
  0.        ]
[ 0.41012937  0.71577853 -0.83595449 ...  0.          0.
  0.        ]
[ 0.41008034  0.71541768 -0.8020767  ...  0.          0.
  0.        ]
[ 0.40975192  0.71518469 -0.78414989 ...  0.          0.
  0.        ]
[ 0.4092392   0.71472168 -0.77021307 ...  0.          0.
  0.        ]
[ 0.40

2025-02-01 11:25:19.727 Python[45583:4632215] _TIPropertyValueIsValid called with 16 on nil context!
2025-02-01 11:25:19.727 Python[45583:4632215] imkxpc_getApplicationProperty:reply: called with incorrect property value 16, bailing.
2025-02-01 11:25:19.727 Python[45583:4632215] Text input context does not respond to _valueForTIProperty:


[ 0.41995037  0.71140254 -1.27452862 ...  0.          0.
  0.        ]
[ 0.42685807  0.70225793 -1.3020041  ...  0.          0.
  0.        ]
[ 0.43232077  0.69676191 -1.31730998 ...  0.          0.
  0.        ]
[ 0.43458456  0.70177144 -1.32255411 ...  0.          0.
  0.        ]
[ 0.43717426  0.69936317 -1.31990743 ...  0.          0.
  0.        ]
[ 0.43841201  0.6977402  -1.27665341 ...  0.          0.
  0.        ]
[ 0.43829742  0.69924116 -1.30849433 ...  0.          0.
  0.        ]
[ 0.43768808  0.69977051 -1.3073864  ...  0.          0.
  0.        ]
[ 0.43753091  0.70135558 -1.3244648  ...  0.          0.
  0.        ]
[ 0.43709078  0.70139933 -1.31641173 ...  0.          0.
  0.        ]
[ 0.43694004  0.71189278 -1.20639002 ...  0.          0.
  0.        ]
[ 0.43844694  0.70492214 -1.31043851 ...  0.          0.
  0.        ]
[ 0.4384369   0.69663388 -1.30775118 ...  0.          0.
  0.        ]
[ 0.43847531  0.70052665 -1.32419205 ...  0.          0.
  0.        ]
[ 0.43