In [22]:
import cv2, torch, pandas, numpy as np, threading, queue, ncnn, sounddevice as sd, scipy.io.wavfile

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

IMG_SIZE = 320
CONF_TH = 0.7
FRAME_SKIP = 2
IOU_TH = 0.25

WARNING_TXT_ORG = (30,30)
DERECTION_ORG = (430,450)

COLOR_RED = (0,0,255)
COLOR_GREEN = (0,255,0)
COLOR_BLUE = (255,0,0)

HANDLE_PIXEL = []
BLADE_PIXEL = []

In [23]:
class CV2_CAM:
    def __init__(self, frame):
        self.annotated_image = np.copy(frame)
        self.H, self.W, self.C = frame.shape

In [24]:
class Detector(CV2_CAM):
    def __init__(self, frame):
        super().__init__(frame)
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_bgr)
            
        self.hand_result = hand_detector.detect(mp_image).hand_landmarks
        self.tools_result = self.tools_inference()

    def get_result(self):
        return self.tools_result, self.hand_result

    def tools_inference(self):
        img_lbx, r, lpad, tpad = self.letterbox(IMG_SIZE)
        img_rgb = cv2.cvtColor(img_lbx, cv2.COLOR_BGR2RGB)
        
        input_mat = ncnn.Mat.from_pixels(img_rgb, ncnn.Mat.PixelType.PIXEL_RGB, IMG_SIZE, IMG_SIZE)
        input_mat.substract_mean_normalize([0,0,0], [1/255.0, 1/255.0, 1/255.0])
    
        # 추론 시작
        ex = net.create_extractor()
        ex.input("in0", input_mat)
        _, result = ex.extract("out0")
    
        arr = result.numpy()
        D, N = arr.shape
        A = arr.T  # (N, D) : 한 행이 한 후보
        cx, cy, w, h = A[:, 0], A[:, 1], A[:, 2], A[:, 3]
    
        cls_scores = A[:, 4:7]              # (N,3)
        cls_ids = np.argmax(cls_scores, axis=1)
        scores  = cls_scores[np.arange(N), cls_ids]
        
        # 인식률로 가져오기
        keep = scores >= CONF_TH
        if not np.any(keep): return []
        cx = cx[keep]; cy = cy[keep]; w = w[keep]; h = h[keep]
        scores = scores[keep]; cls_ids = cls_ids[keep]
    
        # cxcywh -> xyxy (레터박스된 IMG_SIZE 기준)
        x1 = cx - w/2; y1 = cy - h/2
        x2 = cx + w/2; y2 = cy + h/2
    
        # 원본 좌표로 역변환
        x1 = (x1 - lpad) / r; y1 = (y1 - tpad) / r
        x2 = (x2 - lpad) / r; y2 = (y2 - tpad) / r
    
        # 클리핑
        x1 = np.clip(x1, 0, self.W); y1 = np.clip(y1, 0, self.H)
        x2 = np.clip(x2, 0, self.W); y2 = np.clip(y2, 0, self.H)
    
        dets = [(
            [int(x1[i]), int(y1[i]), int(x2[i]), int(y2[i])], 
            float(scores[i]), int(cls_ids[i])
            ) 
            for i in range(len(scores))
        ]
        dets = self.nms(dets, IOU_TH)
        
        return dets

    # 전처리
    def letterbox(self, new, color=(114,114,114)):
        r = min(new / self.H, new / self.W)
        nh, nw = int(round(self.H * r)), int(round(self.W * r))
        resized = cv2.resize(self.annotated_image, (nw, nh), interpolation=cv2.INTER_AREA)
        canvas = np.full((new, new, 3), color, dtype=np.uint8)
        top = (new - nh) // 2
        left = (new - nw) // 2
        canvas[top:top+nh, left:left+nw] = resized
        return canvas, r, left, top

    def nms(self, dets, iou_th):
        # dets: [([x1,y1,x2,y2], score, cls_id), ...] in letterbox 좌표
        if not dets: return []
        boxes = np.array([d[0] for d in dets], dtype=np.float32)
        scores = np.array([d[1] for d in dets], dtype=np.float32)
        order = scores.argsort()[::-1]
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            if order.size == 1: break
            rest = order[1:]
            xx1 = np.maximum(boxes[i,0], boxes[rest,0])
            yy1 = np.maximum(boxes[i,1], boxes[rest,1])
            xx2 = np.minimum(boxes[i,2], boxes[rest,2])
            yy2 = np.minimum(boxes[i,3], boxes[rest,3])
            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
            area_i = (boxes[i,2] - boxes[i,0]) * (boxes[i,3] - boxes[i,1])
            area_r = (boxes[rest,2] - boxes[rest,0]) * (boxes[rest,3] - boxes[rest,1])
            iou = inter / (area_i + area_r - inter + 1e-6)
            order = rest[iou <= iou_th]
        return [dets[k] for k in keep]

In [25]:
class Draw(CV2_CAM):
    def __init__(self, frame, tools_result, hand_result):
        super().__init__(frame)
        self.tools_result = tools_result
        self.hand_result = hand_result

    def draw_on_cv(self):
        # 도구 박스 그리기
        for (x1,y1,x2,y2), _, _ in self.tools_result:
            cv2.rectangle(self.annotated_image, (x1,y1), (x2,y2), (0,0,255), 2)

        # 손 랜드마크 그리기
        for landmark in self.hand_result:
            landmarks_proto = landmark_pb2.NormalizedLandmarkList()
            for lm in landmark: 
                landmarks_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=lm.z)])
                
            solutions.drawing_utils.draw_landmarks(
                self.annotated_image,
                landmarks_proto,
                solutions.hands.HAND_CONNECTIONS,
                solutions.drawing_styles.get_default_hand_landmarks_style()
            )
    
        return self.annotated_image

    def draw_circle(self, center, color):
        cv2.circle(self.annotated_image, center, 5, color, -1, cv2.LINE_AA)

In [26]:
class CheckBox(CV2_CAM):
    def __init__(self, frame, tools_result, hand_result):
        super().__init__(frame)
        self.flag = self.flag_init()
        self.hand_result = hand_result

        blade_xy = [ tools[0] for tools in tools_result if tools[2] == 0 ]
        handle_xy = [ tools[0] for tools in tools_result if tools[2] == 1 ]

        global HANDLE_PIXEL, BLADE_PIXEL
        if len(handle_xy) > 0:
            HANDLE_PIXEL = self.save_pixel(handle_xy)
        
        if len(blade_xy) > 0:
            BLADE_PIXEL = self.save_pixel(blade_xy)

    def flag_init(self):
        obj = {
            "handle": False,
            "blade": False
        }

        return obj

    def save_pixel(self, boxes):
        lr = []
        for x1,y1,x2,y2 in boxes:
            lr.append([x1,y1,x2,y2])
        return lr
    # ===========================================================
    def detect_box(self):
        for hand in self.hand_result:
            self.check_handle(hand)

            for lm in hand:
                self.check_blade(lm)

        self.check_flag()
        return self.annotated_image
    
    def check_handle(self, hand):
        middle_x = int(hand[9].x * self.W)
        middle_y = int(hand[9].y * self.H)
        cv2.circle(self.annotated_image, (middle_x, middle_y), 5, COLOR_BLUE, -1, cv2.LINE_AA)
        
        for handle in HANDLE_PIXEL:
            handle_middle_x = (handle[0] + handle[2]) // 2
            handle_middle_y = (handle[1] + handle[3]) // 2 
            cv2.circle(self.annotated_image, (handle_middle_x, handle_middle_y), 5, (255,255,0), -1, cv2.LINE_AA)

            if self.check_inside(handle, middle_x, middle_y):
                self.flag["handle"] = True
                break
                
            if middle_x < handle_middle_x:
                if middle_y < handle_middle_y:
                    self.draw_text("Right-Up", DERECTION_ORG, COLOR_BLUE)
                else:
                    self.draw_text("Right-Down", DERECTION_ORG, COLOR_BLUE)
                    
            elif middle_x > handle_middle_x:
                if middle_y < handle_middle_y:
                    self.draw_text("Left-Up", DERECTION_ORG, COLOR_BLUE)
                else:
                    self.draw_text("Left-Down", DERECTION_ORG, COLOR_BLUE)
                    
    def check_blade(self, lm):
        lm_x = int(lm.x * self.W)
        lm_y = int(lm.y * self.H)
        
        # 날 확인
        for blade in BLADE_PIXEL:
            if self.check_inside(blade, lm_x, lm_y):
                self.flag["blade"] = True
                break
                
    def check_flag(self):
        if self.flag["blade"]:
            if self.flag["handle"]:
                self.draw_text("DETECTED", WARNING_TXT_ORG, COLOR_GREEN)
            else:
                self.draw_text("DANGER", WARNING_TXT_ORG, COLOR_RED)
        elif self.flag["handle"]:
            self.draw_text("DETECTED", WARNING_TXT_ORG, COLOR_GREEN)
    # ===========================================================
    def check_inside(self, box, x, y):
        if box[0] <= x <= box[2] and box[1] <= y <= box[3]: 
            return True
        else: return False
            
    def draw_text(self, text, org, color):
        cv2.putText(self.annotated_image, text, org, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3, cv2.LINE_AA)

In [27]:
def show_cv2(cap):
    global FID
    while True:
        ret, frame = cap.read()
        if not ret: break

        # 프레임 스킵
        FID += 1
        if FID % (FRAME_SKIP + 1) == 1:
            tools_result, hand_result = Detector(frame).get_result()

        # result 그리기
        drawing_image = Draw(frame, tools_result, hand_result).draw_on_cv()
        # 감지 확인
        annotated_image = CheckBox(drawing_image, tools_result, hand_result).detect_box()
        
        cv2.imshow("hand_land", annotated_image)

        # q키를 누르면 종료
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

# 손 랜드마크 모델 가져오기
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
hand_detector = vision.HandLandmarker.create_from_options(options)
# ===================================================
# 칼 구분 모델 가져오기 (ncnn 네트)
net = ncnn.Net()
net.opt.num_threads = 4
net.opt.use_fp16_storage = True
net.opt.use_fp16_arithmetic = True

net.load_param("knife_total_320.ncnn.param")
net.load_model("knife_total_320.ncnn.bin")
# ===================================================
# 캠 열기
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("웹 캠을 열 수 없습니다")
    exit()
FID = 0
show_cv2(cap)

# 종료 시 자원 해제
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

-1