In [20]:
import cv2
import torch
import pandas
import numpy as np

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

WARNING_TXT_ORG = (30,30)
DERECTION_ORG = (430,450)

COLOR_RED = (0,0,255)
COLOR_GREEN = (0,255,0)
COLOR_BLUE = (255,0,0)


# 그리기
def draw_landmarks_on_image(tools_dect, hand_dect, frame):
    annotated_image = np.copy(frame)
    
    boxes = tools_dect.xyxyn[0].numpy()
    filtered_boxes = boxes[ boxes[:,4]>0.6 ] # 인식률이 60%이상만 마스킹
    
    # tool 박스 그리기
    H, W = annotated_image.shape
    for i in range(len(filtered_boxes)):
        x1 = int(filtered_boxes[i,0] * W)
        y1 = int(filtered_boxes[i,1] * H)
        x2 = int(filtered_boxes[i,2] * W)
        y2 = int(filtered_boxes[i,3] * H)
        
        box_cls = filtered_boxes[i,5]
        if box_cls == 0:
            draw_ractangle(annotated_image, (x1,y1), (x2,y2), (0,0,0))
        elif box_cls == 1:
            draw_ractangle(annotated_image, (x1,y1), (x2,y2), COLOR_BLUE)
        elif box_cls == 2:
            draw_ractangle(annotated_image, (x1,y1), (x2,y2), (255,255,0))

    # 손 랜드마크 그리기
    landmarks_list = hand_dect.hand_landmarks
    for landmark in landmarks_list:
        landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        for lm in landmark: 
            landmarks_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=lm.z)])
            
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            landmarks_proto,
            solutions.hands.HAND_CONNECTIONS,
            solutions.drawing_styles.get_default_hand_landmarks_style()
        )

    return annotated_image

# 겹칩 확인
# xyxy: x, y, 너비, 높이
# aknife : 0; blade: 1; handle: 2
def detection_box(tools_dect, hand_dect, frame):
    boxes = tools_dect.xyxyn[0].numpy()
    
    # boolean 마스킹
    # 인식률이 50% 이상, 손잡이인 경우만 가져옴
    conf_mask = boxes[:,4] > 0.5
    blade_mask = boxes[:,5] == 1
    handle_mask = boxes[:,5] == 2

    blade_xy = boxes[conf_mask & blade_mask]
    handle_xy = boxes[conf_mask & handle_mask]

    # 이전 프레임 좌표 저장
    global handle_pixel, blade_pixel
    H, W = annotated_image.shape
    if handle_xy.shape[0] > 0:
        handle_pixel = save_pixel(handle_xy, H, W)
        
    if blade_xy.shape[0] > 0:
        blade_pixel = save_pixel(blade_xy, H, W)

    # 손이 겹쳤는지 확인
    annotated_image = np.copy(frame)
    hand_list = hand_dect.hand_landmarks
    
    flag_blade = False
    flag_handle = False
    for hand in hand_list:
        middle_x = (int(hand[0].x*W) + int(hand[9].x*W)) // 2
        middle_y = (int(hand[0].y*H) + int(hand[9].y*H)) // 2
        cv2.circle(annotated_image, (middle_x, middle_y), 5, COLOR_BLUE, -1, cv2.LINE_AA)
        
        # 손잡이 확인
        for handle in handle_pixel:
            box_middle_x = (handle[2] - handle[0]) // 2
            box_middle_y = (handle[1] - handle[3]) // 2
            
            if handle[0] <= middle_x <= handle[2] and handle[1] <= middle_y <= handle[3]:
                flag_handle = True
                break
                
            if middle_x < box_middle_x:
                if middle_y < box_middle_y:
                    draw_text(annotated_image, "Right-Up", DERECTION_ORG, COLOR_BLUE)
                else:
                    draw_text(annotated_image, "Right-Down", DERECTION_ORG, COLOR_BLUE)
            elif middle_x > box_middle_x:
                if middle_y < box_middle_y:
                    draw_text(annotated_image, "Left-Up", DERECTION_ORG, COLOR_BLUE)
                else:
                    draw_text(annotated_image, "Left-Down", DERECTION_ORG, COLOR_BLUE)
            
        
        for lm in hand:
            lm_x = int(lm.x * W)
            lm_y = int(lm.y * H)
            
            # 날 확인
            for blade in blade_pixel:
                if blade[0] <= lm_x <= blade[2] and blade[1] <= lm_y <= blade[3]:
                    flag_blade = True
                    break
            

    if flag_blade:
        if flag_handle:
            draw_text(annotated_image, "DETECTED", WARNING_TXT_ORG, COLOR_GREEN)
        else:
            draw_text(annotated_image, "DANGER", WARNING_TXT_ORG, COLOR_RED)
    elif flag_handle:
        draw_text(annotated_image, "DETECTED", WARNING_TXT_ORG, COLOR_GREEN)
                    
    return annotated_image

def save_pixel(boxes, H, W):
    lr = []
    for i in range(len(boxes)):
        x1 = int(boxes[i, 0] * W)
        y1 = int(boxes[i, 1] * H)
        x2 = int(boxes[i, 2] * W)
        y2 = int(boxes[i, 3] * H)
        lr.append([x1,y1,x2,y2])
            
    return lr

def check_inside(box, x, y):
    if box[0] <= x <= box[2] and box[1] <= y <= box[3]:
        return True

def draw_ractangle(frame, xy1, xy2, color):
    cv2.rectangle(frame, xy1, xy2, color, 2)

def draw_text(frame, text, org, color):
    cv2.putText(frame, text, org, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3, cv2.LINE_AA)
            

# 손 랜드마크 모델 가져오기
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
hand_detector = vision.HandLandmarker.create_from_options(options)
# 칼 구분 모델 가져오기
tools_detector = torch.load("knife_total.pt", map_location="cpu", weights_only=False)["model"].float().eval()

# 캠 열기
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("웹 캠을 열 수 없습니다")
    exit()

# 실행부
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_bgr = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_bgr)

    # 모델 결과 가져오기
    tensor_frame = torch.from_numpy(frame)
    tools_result = tools_detector(tensor_frame)
    hand_result = hand_detector.detect(mp_image)
    
    drawing_image = draw_landmarks_on_image(tools_result, hand_result, frame_bgr)
    annotated_image = detection_box(tools_result, hand_result, drawing_image)

    cv2.imshow("hand_land", annotated_image)

    # q키를 누르면 종료
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
    
# 종료 시 자원 해제
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

RuntimeError: Given groups=1, weight of size [32, 3, 6, 6], expected input[1, 480, 640, 3] to have 3 channels, but got 480 channels instead