In [None]:
# 전처리 및 후처리 추가
import cv2, torch, pandas, numpy as np, threading, queue, ncnn, sounddevice as sd, scipy.io.wavfile

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

IMG_SIZE = 320
CONF_TH = 0.7
FRAME_SKIP = 2
IOU_TH = 0.25

WARNING_TXT_ORG = (30,30)
DERECTION_ORG = (430,450)

COLOR_RED = (0,0,255)
COLOR_GREEN = (0,255,0)
COLOR_BLUE = (255,0,0)

handle_pixel = []
blade_pixel = []

class Sound(threading.Thread):
    def __init__(self):
        super().__init__()
    def run(self):
        winsound.Beep(frequency=500, duration=100)

# 그리기
def draw_landmarks_on_image(tools_dect, hand_dect, frame):
    landmarks_list = hand_dect.hand_landmarks
    annotated_image = np.copy(frame)

    # 도구 박스 그리기
    for (x1,y1,x2,y2), s, _ in tools_dect:
        cv2.rectangle(annotated_image, (x1,y1), (x2,y2), (0,0,255), 2)

    # 손 랜드마크 그리기
    for landmark in landmarks_list:
        landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        for lm in landmark: 
            landmarks_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=lm.z)])
            
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            landmarks_proto,
            solutions.hands.HAND_CONNECTIONS,
            solutions.drawing_styles.get_default_hand_landmarks_style()
        )

    return annotated_image

# 겹칩 확인
def detection_box(tools_dect, hand_dect, frame):
    # boolean 마스킹
    # 날과 손잡이인 경우만 가져옴
    blade_xy = [ tools[0] for tools in tools_dect if tools[2] == 0 ]
    handle_xy = [ tools[0] for tools in tools_dect if tools[2] == 1 ]

    # 이전 프레임 좌표 저장
    global handle_pixel, blade_pixel
    if len(handle_xy) > 0:
        handle_pixel = save_pixel(handle_xy)
        
    if len(blade_xy) > 0:
        blade_pixel = save_pixel(blade_xy)

    # 손이 겹쳤는지 확인
    annotated_image = np.copy(frame)
    hand_list = hand_dect.hand_landmarks
    H, W, _ = annotated_image.shape
    
    flag_blade = False
    flag_handle = False
    for hand in hand_list:
        middle_x = (int(hand[0].x*W) + int(hand[9].x*W)) // 2
        middle_y = (int(hand[0].y*H) + int(hand[9].y*H)) // 2
        cv2.circle(annotated_image, (middle_x, middle_y), 5, COLOR_BLUE, -1, cv2.LINE_AA)
        
        # 손잡이 확인
        for handle in handle_pixel:
            handle_middle_x = (handle[0] + handle[2]) // 2
            handle_middle_y = (handle[1] + handle[3]) // 2
            cv2.circle(annotated_image, (handle_middle_x, handle_middle_y), 5, (255,255,0), -1, cv2.LINE_AA)

            if check_inside(handle, middle_x, middle_y):
                flag_handle = True
                break
                
            if middle_x < handle_middle_x:
                if middle_y < handle_middle_y:
                    draw_text(annotated_image, "Right-Up", DERECTION_ORG, COLOR_BLUE)
                else:
                    draw_text(annotated_image, "Right-Down", DERECTION_ORG, COLOR_BLUE)
                    
            elif middle_x > handle_middle_x:
                if middle_y < handle_middle_y:
                    draw_text(annotated_image, "Left-Up", DERECTION_ORG, COLOR_BLUE)
                else:
                    draw_text(annotated_image, "Left-Down", DERECTION_ORG, COLOR_BLUE)
            
        
        for lm in hand:
            lm_x = int(lm.x * W)
            lm_y = int(lm.y * H)
            
            # 날 확인
            for blade in blade_pixel:
                if check_inside(blade, lm_x, lm_y):
                    flag_blade = True
                    break
                            
    if flag_blade:
        if flag_handle:
            draw_text(annotated_image, "DETECTED", WARNING_TXT_ORG, COLOR_GREEN)
        else:
            sd.play(danger_data, danger_samplerate)
            draw_text(annotated_image, "DANGER", WARNING_TXT_ORG, COLOR_RED)
    elif flag_handle:
        draw_text(annotated_image, "DETECTED", WARNING_TXT_ORG, COLOR_GREEN)

    return annotated_image

def save_pixel(boxes):
    lr = []
    for x1,y1,x2,y2 in boxes:
        lr.append([x1,y1,x2,y2])
    return lr

def check_inside(box, x, y):
    if box[0] <= x <= box[2] and box[1] <= y <= box[3]: 
        return True
    else: return False

def draw_text(image, text, org, color):
    cv2.putText(image, text, org, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3, cv2.LINE_AA)

def warnning_sound(sound_queue):
    def danger_play(state):
        if state == "stop": return
        elif state == "danger":
            sd.play(danger_data, danger_samplerate)
            danger_play(state)
        
    while True:
        try:
            state = sound_queue.get(timeout=0.1)
            if state == "quit": break
            danger_play(state)
                
        except queue.Empty:
            continue
            
    

# 전처리
def letterbox(img, new=IMG_SIZE, color=(114,114,114)):
    h, w = img.shape[:2]
    r = min(new / h, new / w)
    nh, nw = int(round(h * r)), int(round(w * r))
    resized = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_AREA)
    canvas = np.full((new, new, 3), color, dtype=np.uint8)
    top = (new - nh) // 2
    left = (new - nw) // 2
    canvas[top:top+nh, left:left+nw] = resized
    return canvas, r, left, top

def nms(dets, iou_th=IOU_TH):
    # dets: [([x1,y1,x2,y2], score, cls_id), ...] in letterbox 좌표
    if not dets: return []
    boxes = np.array([d[0] for d in dets], dtype=np.float32)
    scores = np.array([d[1] for d in dets], dtype=np.float32)
    order = scores.argsort()[::-1]
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        if order.size == 1: break
        rest = order[1:]
        xx1 = np.maximum(boxes[i,0], boxes[rest,0])
        yy1 = np.maximum(boxes[i,1], boxes[rest,1])
        xx2 = np.minimum(boxes[i,2], boxes[rest,2])
        yy2 = np.minimum(boxes[i,3], boxes[rest,3])
        inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
        area_i = (boxes[i,2] - boxes[i,0]) * (boxes[i,3] - boxes[i,1])
        area_r = (boxes[rest,2] - boxes[rest,0]) * (boxes[rest,3] - boxes[rest,1])
        iou = inter / (area_i + area_r - inter + 1e-6)
        order = rest[iou <= iou_th]
    return [dets[k] for k in keep]

def tools_inference(frame):
    H, W, _ = frame.shape
    img_lbx, r, lpad, tpad = letterbox(frame, IMG_SIZE)
    img_rgb = cv2.cvtColor(img_lbx, cv2.COLOR_BGR2RGB)
    
    input_mat = ncnn.Mat.from_pixels(img_rgb, ncnn.Mat.PixelType.PIXEL_RGB, IMG_SIZE, IMG_SIZE)
    input_mat.substract_mean_normalize([0,0,0], [1/255.0, 1/255.0, 1/255.0])

    # 추론 시작
    ex = net.create_extractor()
    ex.input("in0", input_mat)
    _, result = ex.extract("out0")

    arr = result.numpy()
    D, N = arr.shape
    A = arr.T  # (N, D) : 한 행이 한 후보
    cx, cy, w, h = A[:, 0], A[:, 1], A[:, 2], A[:, 3]

    cls_scores = A[:, 4:7]              # (N,3)
    cls_ids = np.argmax(cls_scores, axis=1)
    scores  = cls_scores[np.arange(N), cls_ids]
    
    # 인식률로 가져오기
    keep = scores >= CONF_TH
    if not np.any(keep): return []
    cx = cx[keep]; cy = cy[keep]; w = w[keep]; h = h[keep]
    scores = scores[keep]; cls_ids = cls_ids[keep]

    # cxcywh -> xyxy (레터박스된 IMG_SIZE 기준)
    x1 = cx - w/2; y1 = cy - h/2
    x2 = cx + w/2; y2 = cy + h/2

    # 원본 좌표로 역변환
    x1 = (x1 - lpad) / r; y1 = (y1 - tpad) / r
    x2 = (x2 - lpad) / r; y2 = (y2 - tpad) / r

    # 클리핑
    x1 = np.clip(x1, 0, W); y1 = np.clip(y1, 0, H)
    x2 = np.clip(x2, 0, W); y2 = np.clip(y2, 0, H)

    dets = [(
        [int(x1[i]), int(y1[i]), int(x2[i]), int(y2[i])], 
        float(scores[i]), int(cls_ids[i])
        ) 
        for i in range(len(scores))
    ]
    dets = nms(dets, IOU_TH)
    
    return dets


# ===================================================
# 손 랜드마크 모델 가져오기
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
hand_detector = vision.HandLandmarker.create_from_options(options)
# ===================================================
# 칼 구분 모델 가져오기 (ncnn 네트)
net = ncnn.Net()
net.opt.num_threads = 4
net.opt.use_fp16_storage = True
net.opt.use_fp16_arithmetic = True

net.load_param("knife_total_320.ncnn.param")
net.load_model("knife_total_320.ncnn.bin")
# ===================================================

warnning_queue = queue.Queue()
lock = threading.Lock()
warnning_thread = threading.Thread(target=warnning_sound, args=(warnning_queue,))
warnning_thread.daemon = True
warnning_thread.start()

# 캠 열기
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("웹 캠을 열 수 없습니다")
    exit()

fid = 0
danger_samplerate, danger_data = scipy.io.wavfile.read('danger.wav')

# 실행부
while True:
    ret, frame = cap.read()
    if not ret: break
    
    frame_bgr = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_bgr)

    # 프레임 스킵
    fid += 1
    if fid % (FRAME_SKIP + 1) == 1:
        tools_result = tools_inference(frame)
        hand_result = hand_detector.detect(mp_image)

    # 결과 그리기
    sound_state = "stay"
    drawing_image = draw_landmarks_on_image(tools_result, hand_result, frame)
    annotated_image = detection_box(tools_result, hand_result, drawing_image)

    cv2.imshow("hand_land", annotated_image)
    
    # q키를 누르면 종료
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
    
# 종료 시 자원 해제
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)