## mediapipe hand example

In [4]:
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# For static images:
IMAGE_FILES = []
with mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.5) as hands:
  for idx, file in enumerate(IMAGE_FILES):
    # Read an image, flip it around y-axis for correct handedness output (see
    # above).
    image = cv2.flip(cv2.imread(file), 1)
    # Convert the BGR image to RGB before processing.
    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Print handedness and draw hand landmarks on the image.
    print('Handedness:', results.multi_handedness)
    if not results.multi_hand_landmarks:
      continue
    image_height, image_width, _ = image.shape
    annotated_image = image.copy()
    for hand_landmarks in results.multi_hand_landmarks:
      print('hand_landmarks:', hand_landmarks)
      print(
          f'Index finger tip coordinates: (',
          f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
          f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
      )
      mp_drawing.draw_landmarks(
          annotated_image,
          hand_landmarks,
          mp_hands.HAND_CONNECTIONS,
          mp_drawing_styles.get_default_hand_landmarks_style(),
          mp_drawing_styles.get_default_hand_connections_style())
    cv2.imwrite(
        '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
    # Draw hand world landmarks.
    if not results.multi_hand_world_landmarks:
      continue
    for hand_world_landmarks in results.multi_hand_world_landmarks:
      mp_drawing.plot_landmarks(
        hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)

# For webcam input:
cap = cv2.VideoCapture(1)
with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()


## palm detection with cv dnn

In [5]:
import cv2
import time
import numpy as np

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def draw_rect(frame, regressor, classificator, stride, anchor_count, column, row, anchor, offset):
    index = (int(row * 128 / stride) + column) * anchor_count + anchor + offset

    score = sigmoid(regressor[index][0])
    if score < 0.5: return

    x, y, w, h = classificator[index][:4]

    x += (column + 0.5) * stride - w / 2
    y += (row    + 0.5) * stride - h / 2
    x = int(x)
    y = int(y)
    w = int(w)
    h = int(h)
    frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1)

net = cv2.dnn.readNet('palm_detection.onnx')
outNames = net.getUnconnectedOutLayersNames()
print(outNames)
cap = cv2.VideoCapture(1)
while True:
    time_start = time.time()
    ret, frame = cap.read()

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, dsize=(128, 128))
    #tensor = (frame / 127.5 - 1.0).reshape((128, 128, 3)).transpose(2, 0, 1)

    tensor = (frame / 127.5 - 1.0).reshape((128, 128, 3))
    print(tensor.shape)
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    print(blob.shape)

    #blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), 1.0, (128, 128), (127.5, 127.5, 127.5), swapRB=True)
    net.setInput(blob)
    preds = net.forward(outNames)
    regressor = preds[0]
    classifier = preds[1]
    """
    print(regressor.shape)
    print(classifier.shape)
    print()
    """
    for y in range(16):
        for x in range(16):
            for a in range(2):
                draw_rect(frame, regressor[0], classifier[0], 8, 2, x, y, a, 0)

    for y in range(8):
        for x in range(8):
            for a in range(6):
                draw_rect(frame, regressor[0], classifier[0], 16, 6, x, y, a, 512)
    
    frame = cv2.resize(frame, dsize=(640, 480))

    if cv2.waitKey(1) == ord('q'):
        break
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    time_cur = time.time()
    cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
    cv2.imshow('Camera Streaming', frame)

cap.release()
cv2.destroyAllWindows()


('classificators', 'regressors')
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)


  


(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 12

In [5]:
import torch.onnx 
from blazepalm import BlazePalm

#Function to Convert to ONNX 
def Convert_ONNX(): 

    # set the model to inference mode 
    model.eval() 

    # Let's create a dummy input tensor  
    dummy_input = torch.randn(1, 3, 256, 256, requires_grad=True)  

    # Export the model   
    torch.onnx.export(model,         # model being run 
         dummy_input,       # model input (or a tuple for multiple inputs) 
         "ImageClassifier.onnx",       # where to save the model  
         export_params=True,  # store the trained parameter weights inside the model file 
         opset_version=12,    # the ONNX version to export the model to 
         do_constant_folding=True,  # whether to execute constant folding for optimization 
         input_names = ['modelInput'],   # the model's input names 
         output_names = ['regressors', 'classificators'], # the model's output names 
         dynamic_axes={'modelInput' : {0 : 'batch_size'},    # variable length axes 
                                'modelOutput' : {0 : 'batch_size'}}) 
    print(" ") 
    print('Model has been converted to ONNX') 


if __name__ == "__main__": 

    # Let's build our model 
    #train(5) 
    #print('Finished Training') 

    # Test which classes performed well 
    #testAccuracy() 

    # Let's load the model we just created and test the accuracy per label 
    model = BlazePalm() 
    path = "blazepalm.pth" 
    model.load_state_dict(torch.load(path)) 
    print(model)
    # Test with batch of images 
    #testBatch() 
    # Test how the classes performed 
    #testClassess() 
 
    # Conversion to ONNX 
    Convert_ONNX() 


BlazePalm(
  (backbone1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): BlazeBlock(
      (convs): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
        (1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
      )
      (act): ReLU(inplace=True)
    )
    (3): BlazeBlock(
      (convs): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
        (1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
      )
      (act): ReLU(inplace=True)
    )
    (4): BlazeBlock(
      (convs): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
        (1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
      )
      (act): ReLU(inplace=True)
    )
    (5): BlazeBlock(
      (convs): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3



 
Model has been converted to ONNX


In [14]:
import cv2
import time
import numpy as np
import traceback


def get_color_filtered_boxes(image):
    # 이미지를 HSV 색 공간으로 변환
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 살색 영역을 마스크로 만들기
    skin_mask = cv2.inRange(hsv_image, lower_skin, upper_skin)
    # 모폴로지 연산을 위한 구조 요소 생성
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    # 모폴로지 열림 연산 적용
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel)

    # 마스크를 이용하여 살색 영역 추출
    skin_image = cv2.bitwise_and(image, image, mask=skin_mask)

    # 살색 영역에 대한 바운딩 박스 추출
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]
        
    # 크기가 작은 박스와 큰 박스 제거
    color_boxes = []
    for (x, y, w, h) in bounding_boxes:
        if w * h > 100 * 100:
            # 약간 박스 더크게
            #color_boxes.append((x - 30, y - 30, w + 60, h + 60))
            center_x = int((x + x + w) / 2)
            center_y = int((y + y + h) / 2)

            large = 0
            if w > h:
                large = w                
            if w < h:                
                large = h
            
            large = int(large * 0.7)
            color_boxes.append((center_x - large, center_y - large, 2 * large,  2 * large))


    return color_boxes


def landmark_inference(img, cbox_ratio_width, cbox_ratio_height):
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
    tensor = img / 127.5 - 1.0
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    net.setInput(blob)
    preds = net.forward(outNames)
    #lms = preds[2].reshape(21, 3) 
    lms = preds[0].reshape(21, 3) 

    lms_ratio = []
    for lm in lms:
        """
        x = lm[0] * cbox_ratio_width * lm_infer_width
        y = lm[1] * cbox_ratio_height * lm_infer_height
        z = lm[2] * cbox_ratio_height * lm_infer_height
        """
        x = lm[0] * cbox_ratio_width 
        y = lm[1] * cbox_ratio_height 
        z = lm[2] * cbox_ratio_height


        lms_ratio.append([x, y, z])
    return lms_ratio

def draw_circle(image, color_box, lms):
    x, y, w, h = color_box

    # 이미지에 원 그리기
    for point in lms:
        pt_x, pt_y, _ = point
        cv2.circle(image, (int(x + pt_x), int(y + pt_y)), 5, (0, 0, 255), -1) # 빨간색 원 그리기




# 살색 영역의 범위 지정 (HSV 색 공간)
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

img_width = 640
img_height = 480
lm_infer_width = 224
lm_infer_height = 224

#net = cv2.dnn.readNet('blazehand.onnx')
net = cv2.dnn.readNet('hand_landmark.onnx')

outNames = net.getUnconnectedOutLayersNames()
print(outNames)
cap = cv2.VideoCapture(1)

while True:
    time_start = time.time()
    try:
        ret, frame = cap.read()
        frame = cv2.resize(frame, (img_width, img_height))

        skin_image = frame.copy()
        # 크기가 작은 박스와 큰 박스 제거
        color_boxes = get_color_filtered_boxes(skin_image)

        # 바운딩 박스를 이미지에 그리기
        for (x, y, w, h) in color_boxes:
            cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


        for idx, color_box in enumerate(color_boxes):
            x, y, w, h = color_box
            cbox_ratio_width = w / lm_infer_width
            cbox_ratio_height = h / lm_infer_height

            roi = frame[y:y+h, x:x+w]
            lms = landmark_inference(roi, cbox_ratio_width, cbox_ratio_height)
            
            draw_circle(frame, color_box, lms)

        if cv2.waitKey(1) == ord('q'):
            break
        time_cur = time.time()
        cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
        
        #frame = cv2.resize(frame, (320, 240))
        cv2.imshow('Camera Streaming', frame)
        cv2.imshow('Skin Extraction', skin_image)
    except Exception as e:
        traceback.print_exc()

cap.release()
cv2.destroyAllWindows()





('Identity', 'Identity_1', 'Identity_2')


Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 119, in <module>
    lms = landmark_inference(roi, cbox_ratio_width, cbox_ratio_height)
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 48, in landmark_inference
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'

Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 119, in <module>
    lms = landmark_inference(roi, cbox_ratio_width, cbox_ratio_height)
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 48, in landmark_inference
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\s

In [15]:
import cv2
import time
import numpy as np
import traceback


def get_color_filtered_boxes(image):
    # 이미지를 HSV 색 공간으로 변환
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 살색 영역을 마스크로 만들기
    skin_mask = cv2.inRange(hsv_image, lower_skin, upper_skin)
    # 모폴로지 연산을 위한 구조 요소 생성
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    # 모폴로지 열림 연산 적용
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel)

    # 마스크를 이용하여 살색 영역 추출
    skin_image = cv2.bitwise_and(image, image, mask=skin_mask)

    # 살색 영역에 대한 바운딩 박스 추출
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]
        
    # 크기가 작은 박스와 큰 박스 제거
    color_boxes = []
    for (x, y, w, h) in bounding_boxes:
        if w * h > 100 * 100:
            # 약간 박스 더크게
            #color_boxes.append((x - 30, y - 30, w + 60, h + 60))
            center_x = int((x + x + w) / 2)
            center_y = int((y + y + h) / 2)

            large = 0
            if w > h:
                large = w                
            if w < h:                
                large = h
            
            large = int(large * 0.7)
            color_boxes.append((center_x - large, center_y - large, 2 * large,  2 * large))


    return color_boxes


def landmark_inference(img, cbox_ratio_width, cbox_ratio_height):
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
    tensor = img / 127.5 - 1.0
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    net.setInput(blob)
    preds = net.forward(outNames)
    #lms = preds[2].reshape(21, 3) 
    lms = preds[0].reshape(21, 3) 

    lms_ratio = []
    for lm in lms:
        """
        x = lm[0] * cbox_ratio_width * lm_infer_width
        y = lm[1] * cbox_ratio_height * lm_infer_height
        z = lm[2] * cbox_ratio_height * lm_infer_height
        """
        x = lm[0] * cbox_ratio_width 
        y = lm[1] * cbox_ratio_height 
        z = lm[2] * cbox_ratio_height


        lms_ratio.append([x, y, z])
    return lms_ratio

def draw_circle(image, color_box, lms):
    x, y, w, h = color_box

    # 이미지에 원 그리기
    for point in lms:
        pt_x, pt_y, _ = point
        cv2.circle(image, (int(x + pt_x), int(y + pt_y)), 5, (0, 0, 255), -1) # 빨간색 원 그리기




# 살색 영역의 범위 지정 (HSV 색 공간)
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

img_width = 640
img_height = 480
lm_infer_width = 224
lm_infer_height = 224

#net = cv2.dnn.readNet('blazehand.onnx')
net = cv2.dnn.readNet('hand_landmark.onnx')

outNames = net.getUnconnectedOutLayersNames()
print(outNames)
cap = cv2.VideoCapture(1)

while True:
    time_start = time.time()
    try:
        ret, frame = cap.read()
        frame = cv2.resize(frame, (img_width, img_height))

        skin_image = frame.copy()
        # 크기가 작은 박스와 큰 박스 제거
        color_boxes = get_color_filtered_boxes(skin_image)

        # 바운딩 박스를 이미지에 그리기
        for (x, y, w, h) in color_boxes:
            cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


        for idx, color_box in enumerate(color_boxes):
            x, y, w, h = color_box
            cbox_ratio_width = w / lm_infer_width
            cbox_ratio_height = h / lm_infer_height

            roi = frame[y:y+h, x:x+w]
            lms = landmark_inference(roi, cbox_ratio_width, cbox_ratio_height)
            
            draw_circle(frame, color_box, lms)

        if cv2.waitKey(1) == ord('q'):
            break
        time_cur = time.time()
        cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
        
        #frame = cv2.resize(frame, (320, 240))
        cv2.imshow('Camera Streaming', frame)
        cv2.imshow('Skin Extraction', skin_image)
    except Exception as e:
        traceback.print_exc()

cap.release()
cv2.destroyAllWindows()





('Identity', 'Identity_1', 'Identity_2')


Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 119, in <module>
    lms = landmark_inference(roi, cbox_ratio_width, cbox_ratio_height)
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 48, in landmark_inference
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'

Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 119, in <module>
    lms = landmark_inference(roi, cbox_ratio_width, cbox_ratio_height)
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_13964\586881377.py", line 48, in landmark_inference
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\s

In [19]:
import cv2
import time
import numpy as np
import traceback


def get_color_filtered_boxes(image):
    # 이미지를 HSV 색 공간으로 변환
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 살색 영역을 마스크로 만들기
    skin_mask = cv2.inRange(hsv_image, lower_skin, upper_skin)
    # 모폴로지 연산을 위한 구조 요소 생성
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    # 모폴로지 열림 연산 적용
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel)

    # 마스크를 이용하여 살색 영역 추출
    skin_image = cv2.bitwise_and(image, image, mask=skin_mask)

    # 살색 영역에 대한 바운딩 박스 추출
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]
        
    # 크기가 작은 박스와 큰 박스 제거
    color_boxes = []
    for (x, y, w, h) in bounding_boxes:
        if w * h > 100 * 100:
            # 약간 박스 더크게
            #color_boxes.append((x - 30, y - 30, w + 60, h + 60))
            center_x = int((x + x + w) / 2)
            center_y = int((y + y + h) / 2)

            large = 0
            if w > h:
                large = w                
            if w < h:                
                large = h
            
            large = int(large * 0.7)
            color_boxes.append((center_x - large, center_y - large, 2 * large,  2 * large))


    return color_boxes


def landmark_inference(img):
    img = cv2.resize(img, (lm_infer_width, lm_infer_height))
    tensor = img / 127.5 - 1.0
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    net.setInput(blob)
    preds = net.forward(outNames)

    return preds


# 살색 영역의 범위 지정 (HSV 색 공간)
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

img_width = 640
img_height = 480
lm_infer_width = 256
lm_infer_height = 256

net = cv2.dnn.readNet('blazehand.onnx')
#net = cv2.dnn.readNet('hand_landmark.onnx')

outNames = net.getUnconnectedOutLayersNames()
print(outNames)


cap = cv2.VideoCapture(1)

while True:
    time_start = time.time()
    try:
        roi = None
        ret, frame = cap.read()
        frame = cv2.resize(frame, (img_width, img_height))

        skin_image = frame.copy()
        # 크기가 작은 박스와 큰 박스 제거
        color_boxes = get_color_filtered_boxes(skin_image)

        # 바운딩 박스를 이미지에 그리기
        for (x, y, w, h) in color_boxes:
            cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


        for idx, color_box in enumerate(color_boxes):
            x, y, w, h = color_box
            cbox_ratio_width = w / lm_infer_width
            cbox_ratio_height = h / lm_infer_height

            roi = frame[y:y+h, x:x+w]
            preds = landmark_inference(roi)
        


        if cv2.waitKey(1) == ord('q'):
            break
        time_cur = time.time()
        cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
        
        #frame = cv2.resize(frame, (320, 240))
        if roi is not None:
            cv2.imshow('roi', roi)
        cv2.imshow('Camera Streaming', frame)
        cv2.imshow('Skin Extraction', skin_image)
    except Exception as e:
        traceback.print_exc()

cap.release()
cv2.destroyAllWindows()





('hand_flag', 'handedness', 'landmarks')


In [20]:
preds

(array([[0.9993316]], dtype=float32),
 array([[0.48108682]], dtype=float32),
 array([[[ 4.50226784e-01,  7.85057008e-01,  3.31874180e-05],
         [ 5.83694041e-01,  7.39253998e-01, -3.06623820e-02],
         [ 6.73868120e-01,  6.48970187e-01, -5.44039942e-02],
         [ 7.34243929e-01,  5.86161733e-01, -7.42683411e-02],
         [ 8.15915108e-01,  5.45038402e-01, -9.72389579e-02],
         [ 5.52618921e-01,  5.03809810e-01, -5.85338026e-02],
         [ 5.81535757e-01,  3.64380479e-01, -9.31414962e-02],
         [ 6.10450625e-01,  2.98195392e-01, -1.16928555e-01],
         [ 6.40476525e-01,  2.46485025e-01, -1.34737328e-01],
         [ 4.64370787e-01,  5.14175415e-01, -6.42016307e-02],
         [ 4.25285399e-01,  3.73875260e-01, -9.84646976e-02],
         [ 4.21652466e-01,  3.00596625e-01, -1.24569692e-01],
         [ 4.21233892e-01,  2.48289600e-01, -1.46726280e-01],
         [ 3.91082466e-01,  5.56109786e-01, -7.33227208e-02],
         [ 3.17008317e-01,  4.44480956e-01, -1.03735864

In [2]:
## blazehand by color filtering

In [7]:
import cv2
import time
import numpy as np
import traceback


def get_color_filtered_boxes(image):
    # 이미지를 HSV 색 공간으로 변환
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 살색 영역을 마스크로 만들기
    skin_mask = cv2.inRange(hsv_image, lower_skin, upper_skin)
    # 모폴로지 연산을 위한 구조 요소 생성
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    # 모폴로지 열림 연산 적용
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel)

    # 마스크를 이용하여 살색 영역 추출
    skin_image = cv2.bitwise_and(image, image, mask=skin_mask)

    # 살색 영역에 대한 바운딩 박스 추출
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]
        
    # 크기가 작은 박스와 큰 박스 제거
    color_boxes = []
    for (x, y, w, h) in bounding_boxes:
        if w * h > 100 * 100:
            # 약간 박스 더크게
            #color_boxes.append((x - 30, y - 30, w + 60, h + 60))
            center_x = int((x + x + w) / 2)
            center_y = int((y + y + h) / 2)

            large = 0
            if w > h:
                large = w                
            if w < h:                
                large = h
            
            large = int(large * 0.7)
            color_boxes.append((center_x - large, center_y - large, 2 * large,  2 * large))


    return color_boxes


def landmark_inference(img):
    #tensor = img / 127.5 - 1.0
    tensor = img / 256
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    net.setInput(blob)
    preds = net.forward(outNames)

    return preds

def denormalize_landmarks(landmarks):
    landmarks[:,:,:2] *= 256
    return landmarks


def draw_landmarks(img, points, connections=[], color=(0, 255, 0), size=2):
    points = points[:,:2]
    for point in points:
        x, y = point
        x, y = int(x), int(y)
        cv2.circle(img, (x, y), size, color, thickness=size)

    for connection in connections:
        x0, y0 = points[connection[0]]
        x1, y1 = points[connection[1]]
        x0, y0 = int(x0), int(y0)
        x1, y1 = int(x1), int(y1)
        cv2.line(img, (x0, y0), (x1, y1), (0,0,0), size)




HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (5, 6), (6, 7), (7, 8),
    (9, 10), (10, 11), (11, 12),
    (13, 14), (14, 15), (15, 16),
    (17, 18), (18, 19), (19, 20),
    (0, 5), (5, 9), (9, 13), (13, 17), (0, 17)
]






# 살색 영역의 범위 지정 (HSV 색 공간)
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

img_width = 640
img_height = 480
lm_infer_width = 256
lm_infer_height = 256

net = cv2.dnn.readNet('blazehand.onnx')
#net = cv2.dnn.readNet('hand_landmark.onnx')

outNames = net.getUnconnectedOutLayersNames()
print(outNames)


cap = cv2.VideoCapture(1)

while True:
    time_start = time.time()
    try:
        roi = None
        ret, frame = cap.read()
        frame = cv2.resize(frame, (img_width, img_height))

        skin_image = frame.copy()
        # 크기가 작은 박스와 큰 박스 제거
        color_boxes = get_color_filtered_boxes(skin_image)

        # 바운딩 박스를 이미지에 그리기
        for (x, y, w, h) in color_boxes:
            cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


        for idx, color_box in enumerate(color_boxes):
            x, y, w, h = color_box
            cbox_ratio_width = w / lm_infer_width
            cbox_ratio_height = h / lm_infer_height

            roi = frame[y:y+h, x:x+w]
            roi = cv2.resize(roi, (lm_infer_width, lm_infer_height))
            lm_input = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)


            preds = landmark_inference(lm_input)

            landmarks = preds[2]
            flag = preds[0]
            denorm_landmarks = denormalize_landmarks(landmarks)

            for i in range(len(flag)):
                landmark, flag = denorm_landmarks[i], flag[i]
                
                if flag>.5:
                    draw_landmarks(roi, landmark[:,:2], HAND_CONNECTIONS, size=2)




        if cv2.waitKey(1) == ord('q'):
            break
        time_cur = time.time()
        cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
        
        #frame = cv2.resize(frame, (320, 240))
        if roi is not None:
            cv2.imshow('roi', roi)
        cv2.imshow('Camera Streaming', frame)
        cv2.imshow('Skin Extraction', skin_image)
    except Exception as e:
        traceback.print_exc()

cap.release()
cv2.destroyAllWindows()





('hand_flag', 'handedness', 'landmarks')


Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_16948\293099564.py", line 132, in <module>
    roi = cv2.resize(roi, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'

Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_16948\293099564.py", line 132, in <module>
    roi = cv2.resize(roi, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'

Traceback (most recent call last):
  File "C:\Users\addinedu\AppData\Local\Temp\ipykernel_16948\293099564.py", line 132, in <module>
    roi = cv2.resize(roi, (lm_infer_width, lm_infer_height))
cv2.error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modul

## palm detection

In [17]:
import cv2
import time
import numpy as np

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def draw_rect(frame, regressor, classificator, stride, anchor_count, column, row, anchor, offset):
    index = (int(row * 128 / stride) + column) * anchor_count + anchor + offset

    score = sigmoid(regressor[index][0])
    if score < 0.5: return

    x, y, w, h = classificator[index][:4]

    x += (column + 0.5) * stride - w / 2
    y += (row    + 0.5) * stride - h / 2
    x = int(x)
    y = int(y)
    w = int(w)
    h = int(h)
    frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1)

net = cv2.dnn.readNet('palm_detection.onnx')
outNames = net.getUnconnectedOutLayersNames()
print(outNames)
cap = cv2.VideoCapture(1)
while True:
    time_start = time.time()
    ret, frame = cap.read()

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, dsize=(128, 128))
    #tensor = (frame / 127.5 - 1.0).reshape((128, 128, 3)).transpose(2, 0, 1)

    tensor = (frame / 127.5 - 1.0).reshape((128, 128, 3))
    print(tensor.shape)
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    print(blob.shape)

    #blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), 1.0, (128, 128), (127.5, 127.5, 127.5), swapRB=True)
    net.setInput(blob)
    preds = net.forward(outNames)
    regressor = preds[0]
    classifier = preds[1]
    """
    print(regressor.shape)
    print(classifier.shape)
    print()
    """
    for y in range(16):
        for x in range(16):
            for a in range(2):
                draw_rect(frame, regressor[0], classifier[0], 8, 2, x, y, a, 0)

    for y in range(8):
        for x in range(8):
            for a in range(6):
                draw_rect(frame, regressor[0], classifier[0], 16, 6, x, y, a, 512)
    
    frame = cv2.resize(frame, dsize=(640, 480))

    if cv2.waitKey(1) == ord('q'):
        break
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    time_cur = time.time()
    cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
    cv2.imshow('Camera Streaming', frame)

cap.release()
cv2.destroyAllWindows()


('classificators', 'regressors')
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)


  


(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 128, 3)
(1, 3, 128, 128)
(128, 12

## palm detection

In [5]:
import cv2
import numpy as np

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def draw_rect(frame, regressor, classificator, stride, anchor_count, column, row, anchor, offset):
    index = (int(row * 128 / stride) + column) * anchor_count + anchor + offset

    score = sigmoid(regressor[index][0])
    if score < 0.5: return

    x, y, w, h = classificator[index][:4]

    x += (column + 0.5) * stride - w / 2
    y += (row    + 0.5) * stride - h / 2
    x = int(x)
    y = int(y)
    w = int(w)
    h = int(h)
    frame = cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1)

net = cv2.dnn.readNet('palm_detection.onnx')
outNames = net.getUnconnectedOutLayersNames()
print(outNames)



# 카메라 기본 세팅
cap = cv2.VideoCapture(0)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

# 128 x 128 크기로 패딩 붙이기
def pad_resize(frame):
    # 이미지 높이, 너비 구하기
    height, width = frame.shape[:2]

    # 이미지를 가로, 세로 중 큰 쪽을 기준으로 정사각형으로 만들기
    square_size = max(height, width)
    pad_bottom = square_size - height
    pad_right = square_size - width

    # 이미지 가운데에 검은색 패딩 붙이기
    pad_top = pad_bottom // 2
    pad_left = pad_right // 2
    padded = cv2.copyMakeBorder(frame, pad_top, pad_bottom - pad_top, pad_left, pad_right - pad_left, cv2.BORDER_CONSTANT, 0)

    # 128 x 128 크기로 리사이즈하기
    resized = cv2.resize(padded, (128, 128))

    return resized

while True:
    # 카메라 영상 받아오기
    ret, frame = cap.read()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # 128 x 128 크기로 패딩 붙이기
    resized = pad_resize(frame)
    #resized = cv2.resize(frame, (128, 128))


    #tensor = (resized / 128).reshape((128, 128, 3))

    tensor = (resized / 127.5 - 1.0).reshape((128, 128, 3))
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    net.setInput(blob)
    preds = net.forward(outNames)
    regressor = preds[0]
    classifier = preds[1]

    for y in range(16):
        for x in range(16):
            for a in range(2):
                draw_rect(resized, regressor[0], classifier[0], 8, 2, x, y, a, 0)

    for y in range(8):
        for x in range(8):
            for a in range(6):
                draw_rect(resized, regressor[0], classifier[0], 16, 6, x, y, a, 512)
    
    frame = cv2.resize(resized, dsize=(640, 640))



    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)


    # 영상 출력하기
    cv2.imshow('frame', frame)

    # ESC 키 입력 시 종료하기
    if cv2.waitKey(1) == 27:
        break

# 메모리 해제하기
cap.release()
cv2.destroyAllWindows()


('classificators', 'regressors')


  """


In [5]:
import cv2
import time
import numpy as np
import traceback


def get_color_filtered_boxes(image):
    # 이미지를 HSV 색 공간으로 변환
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 살색 영역을 마스크로 만들기
    skin_mask = cv2.inRange(hsv_image, lower_skin, upper_skin)
    # 모폴로지 연산을 위한 구조 요소 생성
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    # 모폴로지 열림 연산 적용
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel)

    # 마스크를 이용하여 살색 영역 추출
    skin_image = cv2.bitwise_and(image, image, mask=skin_mask)

    # 살색 영역에 대한 바운딩 박스 추출
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]
        
    # 크기가 작은 박스와 큰 박스 제거
    color_boxes = []
    for (x, y, w, h) in bounding_boxes:
        if w * h > 100 * 100:
            # 약간 박스 더크게
            #color_boxes.append((x - 30, y - 30, w + 60, h + 60))
            center_x = int((x + x + w) / 2)
            center_y = int((y + y + h) / 2)

            large = 0
            if w > h:
                large = w                
            if w < h:                
                large = h
            
            large = int(large * 0.7)
            color_boxes.append((center_x - large, center_y - large, 2 * large,  2 * large))


    return color_boxes


def landmark_inference(img):
    #tensor = img / 127.5 - 1.0
    tensor = img / 256
    blob = cv2.dnn.blobFromImage(tensor.astype(np.float32), swapRB=False, crop=False)
    net.setInput(blob)
    preds = net.forward(outNames)

    return preds

def denormalize_landmarks(landmarks):
    landmarks[:,:,:2] *= 256
    return landmarks


def draw_landmarks(img, points, connections=[], color=(0, 255, 0), size=2):
    points = points[:,:2]
    for point in points:
        x, y = point
        x, y = int(x), int(y)
        cv2.circle(img, (x, y), size, color, thickness=size)

    for connection in connections:
        x0, y0 = points[connection[0]]
        x1, y1 = points[connection[1]]
        x0, y0 = int(x0), int(y0)
        x1, y1 = int(x1), int(y1)
        cv2.line(img, (x0, y0), (x1, y1), (0,0,0), size)




HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (5, 6), (6, 7), (7, 8),
    (9, 10), (10, 11), (11, 12),
    (13, 14), (14, 15), (15, 16),
    (17, 18), (18, 19), (19, 20),
    (0, 5), (5, 9), (9, 13), (13, 17), (0, 17)
]






# 살색 영역의 범위 지정 (HSV 색 공간)
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

img_width = 640
img_height = 480
lm_infer_width = 256
lm_infer_height = 256

net = cv2.dnn.readNet('blazehand.onnx')
#net = cv2.dnn.readNet('hand_landmark.onnx')

outNames = net.getUnconnectedOutLayersNames()
print(outNames)


cap = cv2.VideoCapture(1)

while True:
    time_start = time.time()
    try:
        roi = None
        ret, frame = cap.read()
        frame = cv2.resize(frame, (img_width, img_height))

        skin_image = frame.copy()
        # 크기가 작은 박스와 큰 박스 제거
        color_boxes = get_color_filtered_boxes(skin_image)

        # 바운딩 박스를 이미지에 그리기
        for (x, y, w, h) in color_boxes:
            cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


        for idx, color_box in enumerate(color_boxes):
            x, y, w, h = color_box
            cbox_ratio_width = w / lm_infer_width
            cbox_ratio_height = h / lm_infer_height

            roi = frame[y:y+h, x:x+w]
            roi = cv2.resize(roi, (lm_infer_width, lm_infer_height))
            lm_input = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)


            preds = landmark_inference(lm_input)

            landmarks = preds[2]
            flag = preds[0]
            denorm_landmarks = denormalize_landmarks(landmarks)

            for i in range(len(flag)):
                landmark, flag = denorm_landmarks[i], flag[i]
                
                if flag>.5:
                    draw_landmarks(roi, landmark[:,:2], HAND_CONNECTIONS, size=2)




        if cv2.waitKey(1) == ord('q'):
            break
        time_cur = time.time()
        cv2.putText(frame, f"time spend: {time_cur - time_start}", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (125, 125, 125), 2)
        
        #frame = cv2.resize(frame, (320, 240))
        if roi is not None:
            cv2.imshow('roi', roi)
        cv2.imshow('Camera Streaming', frame)
        cv2.imshow('Skin Extraction', skin_image)
    except Exception as e:
        traceback.print_exc()

cap.release()
cv2.destroyAllWindows()





('hand_flag', 'handedness', 'landmarks')
