In [2]:
import cv2
import numpy as np
from keras.models import load_model
import time
import paho.mqtt.client as mqtt
from collections import Counter
import mediapipe as mp
import tkinter as tk
from tkinter import Label, Button
from PIL import Image, ImageTk

In [None]:
# 配置 MQTT
#MQTT_USERNAME = "MY_USERNAME"
#MQTT_PASSWORD = "MY_PASSWORD"
MQTT_TOPIC = MQTT_USERNAME + "/feeds/esp32-deeplearing"
MQTT_BROKER = "io.adafruit.com"

# 初始化 MQTT 客戶端
client = mqtt.Client(protocol=mqtt.MQTTv311)
client.username_pw_set(MQTT_USERNAME, MQTT_PASSWORD)
client.connect(MQTT_BROKER, 1883, 60)

  client = mqtt.Client(protocol=mqtt.MQTTv311)


<MQTTErrorCode.MQTT_ERR_SUCCESS: 0>

In [4]:
# 模型與標籤路徑
MODEL_PATH = "../LargeData/models/keras_Model.h5"
LABELS_PATH = "../LargeData/models/labels.txt"

# 加載模型與標籤
model = load_model(MODEL_PATH, compile=False)
class_names = open(LABELS_PATH, "r").readlines()

# 初始化 MediaPipe Hand 模組
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)

# 初始化攝影機
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("無法讀取攝影機影像，請檢查設備或設備 ID")
    exit()

# 狀態管理
flag = 0  # 0: 初始, 1: 分類中
start_time = None
results = []  # 用於統計三秒內的分類結果

In [5]:
def extract_landmarks(frame):
    """
    從影像中提取手部特徵點
    """
    results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    if results.multi_hand_landmarks:
        landmarks = []
        for hand_landmarks in results.multi_hand_landmarks:
            for lm in hand_landmarks.landmark:
                landmarks.append([lm.x, lm.y, lm.z])
        return np.array(landmarks).flatten(), results.multi_hand_landmarks
    return None, None

In [6]:
def classify_frame(frame):
    """
    使用模型對單幀進行分類，並返回手部特徵點
    """
    landmarks, hand_landmarks = extract_landmarks(frame)
    if landmarks is not None:
        data = np.expand_dims(landmarks, axis=0)  # 增加 batch 維度
        prediction = model.predict(data)
        index = np.argmax(prediction)
        label = class_names[index].strip()
        confidence = prediction[0][index]
        return label, confidence, hand_landmarks
    return "No Hand Detected", 0.0, None

In [7]:
def select_best_result(results):
    """
    根據三秒內的分類結果統計最常見的結果
    """
    if not results:
        return None, None
    label_counter = Counter([result[0] for result in results])
    best_label = label_counter.most_common(1)[0][0]
    confidences = [result[1] for result in results if result[0] == best_label]
    avg_confidence = sum(confidences) / len(confidences)
    return best_label, avg_confidence

In [8]:
def update_video():
    global flag, start_time, results
    ret, frame = cap.read()
    if not ret:
        return

    if flag == 1:
        if start_time is None:
            start_time = time.time()
            results = []

        elapsed_time = time.time() - start_time
        label, confidence, hand_landmarks = classify_frame(frame)
        if label != "No Hand Detected":
            results.append((label, confidence))

        # 顯示即時分類結果
        cv2.putText(frame, f"Label: {label}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Confidence: {confidence:.2f}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # 畫出手部關鍵點
        if hand_landmarks:
            for landmarks in hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame,
                    landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2)
                )

        # 超過三秒，選擇最佳結果並傳送到 MQTT
        if elapsed_time >= 3:
            best_label, avg_confidence = select_best_result(results)
            if best_label:
                client.publish(MQTT_TOPIC, best_label)
                info_text = f"已傳送到 MQTT: {best_label}\n準確度: {avg_confidence:.2f}"
                label_info.config(text=info_text)
                print(info_text)
            flag = 0
            start_time = None
            results = []

    # 將影像顯示於 Canvas
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(frame_rgb)
    imgtk = ImageTk.PhotoImage(image=img)
    canvas.create_image(0, 0, anchor=tk.NW, image=imgtk)
    canvas.imgtk = imgtk

    root.after(10, update_video)

In [9]:
# 按鈕功能
def start_translation():
    global flag
    flag = 1
    label_info.config(text="開始翻譯...")
    print("開始翻譯")

def quit_program(event=None):
    root.destroy()
    cap.release()
    cv2.destroyAllWindows()

In [10]:
# GUI 初始化
root = tk.Tk()
root.title("手語辨識介面")

# OpenCV 視窗區域
canvas = tk.Canvas(root, width=640, height=480, bg="black")
canvas.grid(row=0, column=0, rowspan=10)

# 綁定鍵盤事件
root.bind("s", lambda event: start_translation())
root.bind("q", quit_program)

# 資訊顯示區域
label_info = Label(root, text="資訊顯示區域", font=("Helvetica", 12), fg="blue",
                   bd=2, relief="solid", width=20, height=2)
label_info.grid(row=0, column=1, rowspan=6, sticky="NSEW")

btn_start = Button(root, text="開始翻譯 (S)", command=start_translation, width=15, height=2)
btn_start.grid(row=6, column=1, rowspan=2, sticky="NSEW")

btn_quit = Button(root, text="退出 (Q)", command=quit_program, width=15, height=2)
btn_quit.grid(row=8, column=1, rowspan=2, sticky="NSEW")

# 啟動視窗更新
root.after(10, update_video)
root.mainloop()

開始翻譯
已傳送到 MQTT: Y
準確度: 1.00
開始翻譯
已傳送到 MQTT: G
準確度: 1.00
開始翻譯
已傳送到 MQTT: M
準確度: 0.80
開始翻譯
已傳送到 MQTT: A
準確度: 0.87
