In [None]:
import cv2, torch, pandas, numpy as np, threading, queue, ncnn, sounddevice as sd, scipy.io.wavfile
import RPi.GPIO as GPIO

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

IMG_SIZE = 320
CONF_TH = 0.7
FRAME_SKIP = 2
IOU_TH = 0.25

WARNING_TXT_ORG = (30,30)
DERECTION_ORG = (430,450)

COLOR_RED = (0,0,255)
COLOR_GREEN = (0,255,0)
COLOR_BLUE = (255,0,0)

LED_NUMBER = [2,3,4,14,15,18,17,27,22]
DETECT_PADDING = 30
TOOLS_NAME = ["Knife", "Fork", "Ladle", "Plate"]
TARGET_PIXEL = []
BLADE_PIXEL = []

class CV2_CAM:
    def __init__(self, frame):
        self.annotated_image = np.copy(frame)
        self.H, self.W, self.C = frame.shape
        
    def draw_text(self, text, org, color):
        cv2.putText(self.annotated_image, text, org, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3, cv2.LINE_AA)

class Detector(CV2_CAM):
    def __init__(self, frame):
        super().__init__(frame)
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_bgr)
            
        self.hand_result = hand_detector.detect(mp_image).hand_landmarks
        self.tools_result = self.tools_inference()

    def get_result(self):
        return self.tools_result, self.hand_result

    def tools_inference(self):
        img_lbx, r, lpad, tpad = self.letterbox(IMG_SIZE)
        img_rgb = cv2.cvtColor(img_lbx, cv2.COLOR_BGR2RGB)
        
        input_mat = ncnn.Mat.from_pixels(img_rgb, ncnn.Mat.PixelType.PIXEL_RGB, IMG_SIZE, IMG_SIZE)
        input_mat.substract_mean_normalize([0,0,0], [1/255.0, 1/255.0, 1/255.0])
    
        # 異붾줎 ?쒖옉
        ex = net.create_extractor()
        ex.input("in0", input_mat)
        _, result = ex.extract("out0")
    
        arr = result.numpy()
        D, N = arr.shape
        A = arr.T  # (N, D) : ???됱씠 ???꾨낫
        cx, cy, w, h = A[:, 0], A[:, 1], A[:, 2], A[:, 3]
    
        cls_scores = A[:, 4:7]              # (N,3)
        cls_ids = np.argmax(cls_scores, axis=1)
        scores  = cls_scores[np.arange(N), cls_ids]
        
        # ?몄떇瑜좊줈 媛?몄삤湲?        
        keep = scores >= CONF_TH
        if not np.any(keep): return []
        cx = cx[keep]; cy = cy[keep]; w = w[keep]; h = h[keep]
        scores = scores[keep]; cls_ids = cls_ids[keep]
    
        # cxcywh -> xyxy (?덊꽣諛뺤뒪??IMG_SIZE 湲곗?)
        x1 = cx - w/2; y1 = cy - h/2
        x2 = cx + w/2; y2 = cy + h/2
    
        # ?먮낯 醫뚰몴濡??????        
        x1 = (x1 - lpad) / r; y1 = (y1 - tpad) / r
        x2 = (x2 - lpad) / r; y2 = (y2 - tpad) / r
    
        # ?대━??        x1 = np.clip(x1, 0, self.W); y1 = np.clip(y1, 0, self.H)
        x2 = np.clip(x2, 0, self.W); y2 = np.clip(y2, 0, self.H)
    
        dets = [(
            [int(x1[i]), int(y1[i]), int(x2[i]), int(y2[i])], 
            float(scores[i]), int(cls_ids[i])
            ) 
            for i in range(len(scores))
        ]
        dets = self.nms(dets, IOU_TH)
        
        return dets

    # ?꾩쿂由?    
    def letterbox(self, new, color=(114,114,114)):
        r = min(new / self.H, new / self.W)
        nh, nw = int(round(self.H * r)), int(round(self.W * r))
        resized = cv2.resize(self.annotated_image, (nw, nh), interpolation=cv2.INTER_AREA)
        canvas = np.full((new, new, 3), color, dtype=np.uint8)
        top = (new - nh) // 2
        left = (new - nw) // 2
        canvas[top:top+nh, left:left+nw] = resized
        return canvas, r, left, top

    def nms(self, dets, iou_th):
        # dets: [([x1,y1,x2,y2], score, cls_id), ...] in letterbox 醫뚰몴
        if not dets: return []
        boxes = np.array([d[0] for d in dets], dtype=np.float32)
        scores = np.array([d[1] for d in dets], dtype=np.float32)
        order = scores.argsort()[::-1]
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            if order.size == 1: break
            rest = order[1:]
            xx1 = np.maximum(boxes[i,0], boxes[rest,0])
            yy1 = np.maximum(boxes[i,1], boxes[rest,1])
            xx2 = np.minimum(boxes[i,2], boxes[rest,2])
            yy2 = np.minimum(boxes[i,3], boxes[rest,3])
            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
            area_i = (boxes[i,2] - boxes[i,0]) * (boxes[i,3] - boxes[i,1])
            area_r = (boxes[rest,2] - boxes[rest,0]) * (boxes[rest,3] - boxes[rest,1])
            iou = inter / (area_i + area_r - inter + 1e-6)
            order = rest[iou <= iou_th]
        return [dets[k] for k in keep]

class Draw(CV2_CAM):
    def __init__(self, frame, tools_result, hand_result):
        super().__init__(frame)
        self.tools_result = tools_result
        self.hand_result = hand_result

    def draw_on_cv(self):
        # ?꾧뎄 諛뺤뒪 洹몃━湲?        
        self.draw_box(self.tools_result)

        # ???쒕뱶留덊겕 洹몃━湲?        
        for landmark in self.hand_result:
            landmarks_proto = landmark_pb2.NormalizedLandmarkList()
            for lm in landmark: 
                landmarks_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=lm.z)])
                
            solutions.drawing_utils.draw_landmarks(
                self.annotated_image,
                landmarks_proto,
                solutions.hands.HAND_CONNECTIONS,
                solutions.drawing_styles.get_default_hand_landmarks_style()
            )

        self.draw_text(TOOLS_NAME[TARGET], (30,65),(255,255,0))
    
        return self.annotated_image

    def draw_box(self, result):
        for (x1,y1,x2,y2), _, _ in result:
            cv2.rectangle(self.annotated_image, (x1,y1), (x2,y2), (0,0,255), 2)

class CheckBox(CV2_CAM):
    def __init__(self, frame, tools_result, hand_result):
        super().__init__(frame)
        self.flag = self.flag_init()
        self.hand_result = hand_result
        self.setting_target(tools_result)

        blade_xy = [ tools[0] for tools in tools_result if tools[2] == 0 ]
        global BLADE_PIXEL
        if len(blade_xy) > 0:
            BLADE_PIXEL = self.save_pixel(blade_xy)

    def setting_target(self, tools_result):
        target = 2
        if TOOLS_NAME[TARGET] == "Knife": target = 2
        elif TOOLS_NAME[TARGET] == "Fork": target = 1
        elif TOOLS_NAME[TARGET] == "Ladle": target = 3
        elif TOOLS_NAME[TARGET] == "Pork": target = 4
        
        target_xy = [ tools[0] for tools in tools_result if tools[2] == target ]
        if len(target_xy) > 0:
            TARGET_PIXEL = self.save_pixel(target_xy)
    
    def flag_init(self):
        obj = {
            "handle": False,
            "blade": False,
            "Left": False,
            "Right": False,
            "Up": False,
            "Down": False
        }

        return obj

    def save_pixel(self, boxes):
        lr = []
        for x1,y1,x2,y2 in boxes:
            lr.append([x1,y1,x2,y2])
        return lr
    # ===========================================================
    def detect_box(self):
        for hand in self.hand_result:
            self.check_handle(hand)

            for lm in hand:
                self.check_blade(lm)

        self.check_flag()
        return self.annotated_image
    
    def check_handle(self, hand):
        middle_x = int(hand[9].x * self.W)
        middle_y = int(hand[9].y * self.H)
        cv2.circle(self.annotated_image, (middle_x, middle_y), 5, COLOR_BLUE, -1, cv2.LINE_AA)
        
        for target in TARGET_PIXEL:
            target_middle_x = (handle[0] + handle[2]) // 2
            target_middle_y = (handle[1] + handle[3]) // 2 
            cv2.circle(self.annotated_image, (target_middle_x, target_middle_y), 5, (255,255,0), -1, cv2.LINE_AA)

            if middle_x < handle_middle_x-DETECT_PADDING: self.flag["Left"] = True
            elif middle_x > handle_middle_x+DETECT_PADDING: self.flag["Right"] = True
                    
            if middle_y < handle_middle_y-DETECT_PADDING: self.flag["Up"] = True
            elif middle_y > handle_middle_y+DETECT_PADDING: self.flag["Down"] = True
                    
    def check_blade(self, lm):
        lm_x = int(lm.x * self.W)
        lm_y = int(lm.y * self.H)
        
        # ???뺤씤
        for blade in BLADE_PIXEL:
            if self.check_inside(blade, lm_x, lm_y):
                self.flag["blade"] = True
                break
                
    def check_flag(self):
        self.All_LED(False)
        if self.flag["Left"]:
            if self.flag["Up"]: GPIO.output(2, True)
            elif self.flag["Down"]: GPIO.output(4, True)
            else: GPIO.output(3, True)
        elif self.flag["Right"]:
            if self.flag["Up"]: GPIO.output(17, True)
            elif self.flag["Down"]: GPIO.output(22, True)
            else: GPIO.output(27, True)
        else:
            if self.flag["Up"]: GPIO.output(14, True)
            elif self.flag["Down"]: GPIO.output(18, True)
    
        if self.flag["blade"]:
            if self.flag["handle"]:
                self.All_LED(True)
            else:
                GPIO.output(15, True)
        elif self.flag["handle"]:
            self.All_LED(True)
    # ===========================================================
    def check_inside(self, box, x, y):
        if box[0] <= x <= box[2] and box[1] <= y <= box[3]: 
            return True
        else: return False

    def All_LED(self, boolean):
        global LED_NUMBER
        for led in LED_NUMBER:
            GPIO.output(led, boolean)

def button_callback(channel):
    global TARGET, TARGET_PIXEL
    if TARGET < len(TOOLS_NAME):
        TARGET += 1
    if TARGET >= len(TOOLS_NAME):
        TARGET = 0
    print(TARGET)

def show_cv2(cap):
    global FID
    while True:
        ret, frame = cap.read()
        if not ret: break

        # ?꾨젅???ㅽ궢
        FID += 1
        if FID % (FRAME_SKIP + 1) == 1:
            tools_result, hand_result = Detector(frame).get_result()

        # result 洹몃━湲?        
        drawing_image = Draw(frame, tools_result, hand_result).draw_on_cv()
        # 媛먯? ?뺤씤
        annotated_image = CheckBox(drawing_image, tools_result, hand_result).detect_box()
        
        cv2.imshow("hand_land", annotated_image)

        # q?ㅻ? ?꾨Ⅴ硫?醫낅즺
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

# ???쒕뱶留덊겕 紐⑤뜽 媛?몄삤湲?
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
hand_detector = vision.HandLandmarker.create_from_options(options)
# ===================================================
# # ?꾧뎄 援щ텇 紐⑤뜽 媛?몄삤湲?(ncnn ?ㅽ듃)
net = ncnn.Net()
net.opt.num_threads = 4
net.opt.use_fp16_storage = True
net.opt.use_fp16_arithmetic = True

net.load_param("kitchen_tools_best02_100.ncnn.param")
net.load_model("kitchen_tools_best02_100.ncnn.bin")
# ===================================================
# 罹??닿린
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("??罹좎쓣 ?????놁뒿?덈떎")
    exit()
# ===================================================
GPIO.setmode(GPIO.BCM)
for led in LED_NUMBER:
    GPIO.setup(led,GPIO.OUT)
GPIO.setup(21, GPIO.IN, pull_up_down=GPIO.PUD_DOWN)
GPIO.add_event_detect(21, GPIO.RISING, callback=button_callback, bouncetime=500)
# ===================================================
FID = 0
TARGET = 0
show_cv2(cap)

# 醫낅즺 ???먯썝 ?댁젣
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)