In [1]:
import cv2
import mediapipe as mp
import numpy as np
from time import sleep
from pynput.keyboard import Controller, Key

In [2]:
# 设置显示分辨率
class CameraDisplay:
    def __init__(self, camera, width, height):
        self.camera = camera
        self.width = width
        self.height = height

    def set_resolution(self):
        self.camera.set(3, self.width)
        self.camera.set(4, self.height)

# 定义显示键盘的内容
KEYBOARD_LAYOUT = [
    ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "-", "+"],           
    ["Q", "W", "E", "R", "T", "Y", "U", "I", "O", "P", "[", "]"],           
    ["A", "S", "D", "F", "G", "H", "J", "K", "L", ";", "ENTER"],           
    ["Z", "X", "C", "V", "B", "N", "M", ",", ".", "/", "BACK"]
]

keyboard = Controller()

# 设置半透明效果
def apply_transparency(image, start_pos, end_pos, blue=0, green=255, red=0, alpha=0.8):    
    colors = [blue, green, red]    
    for i in range(3):        
        image[start_pos[1]:end_pos[1], start_pos[0]:end_pos[0], i] = (
            image[start_pos[1]:end_pos[1], start_pos[0]:end_pos[0], i] * alpha + 
            colors[i] * (1-alpha)
        )    
    return image

# 绘制键盘
def draw_keyboard(image, key_start=(50, 50), key_end=(120, 120), text_pos=(62, 107), text_color=(255, 255, 255)):
    for row_idx, row in enumerate(KEYBOARD_LAYOUT):
        for col_idx, key in enumerate(row):
            # 特殊处理回车键和退格键
            is_special_key = (row_idx == 2 and col_idx == 10) or (row_idx == 3 and col_idx == 10)
            
            start_x = key_start[0] + 90 * col_idx
            start_y = key_start[1] + 90 * row_idx
            end_x = key_end[0] + (90 * (col_idx + 1) if is_special_key else 90 * col_idx)
            end_y = key_end[1] + 90 * row_idx
            
            image = apply_transparency(
                image, 
                (start_x, start_y),
                (end_x, end_y),
                alpha=0.7
            )
            
            font_scale = 2 if is_special_key else 3
            cv2.putText(
                image, key,
                (text_pos[0] + 90 * col_idx, text_pos[1] + 90 * row_idx),
                fontFace=cv2.FONT_HERSHEY_PLAIN,
                fontScale=font_scale,
                color=text_color,
                thickness=2
            )
                
# 检测是否按下按键
def is_key_pressed(finger_base, thumb_tip, thumb_base):
    threshold_distance = np.linalg.norm((thumb_tip - finger_base), ord=2)
    current_distance = np.linalg.norm((thumb_base - finger_base), ord=2)
    
    is_pressed = current_distance > threshold_distance
    if is_pressed:
        print(current_distance)
    
    return is_pressed

# 返回按键的值并输出打印
def handle_key_press(image, finger_pos, is_pressed):
    x_pos, y_pos = finger_pos[0], finger_pos[1]
    for row_idx, row in enumerate(KEYBOARD_LAYOUT):
        for col_idx, key in enumerate(row):
            key_x_start = 50 + 90 * col_idx
            key_x_end = 120 + 90 * col_idx
            key_y_start = 50 + 90 * row_idx
            key_y_end = 120 + 90 * row_idx
            
            # 判断手指是否移动到某个按键的区域内
            if key_x_start < x_pos < key_x_end and key_y_start < y_pos < key_y_end:
                is_enter = row_idx == 2 and col_idx == 10
                is_backspace = row_idx == 3 and col_idx == 10
                
                # 处理特殊键位
                if is_enter or is_backspace:
                    key_width = 90 * 2
                    apply_transparency(
                        image,
                        start_pos=(key_x_start, key_y_start),
                        end_pos=(key_x_start + key_width, key_y_end),
                        alpha=0
                    )
                    
                    if is_pressed:
                        key_to_press = Key.enter if is_enter else Key.backspace
                        keyboard.press(key_to_press)
                        cv2.putText(
                            image, key,
                            (60 + 90 * col_idx, 107 + 90 * row_idx),
                            fontFace=cv2.FONT_HERSHEY_PLAIN,
                            fontScale=3,
                            color=(255, 0, 0),
                            thickness=3
                        )
                        keyboard.release(key_to_press)
                        sleep(0.1)
                else:
                    # 处理普通按键
                    apply_transparency(
                        image,
                        start_pos=(key_x_start, key_y_start),
                        end_pos=(key_x_end, key_y_end),
                        alpha=0
                    )
                    if is_pressed:
                        keyboard.press(key)
                        cv2.putText(
                            image, key,
                            (60 + 90 * col_idx, 107 + 90 * row_idx),
                            fontFace=cv2.FONT_HERSHEY_PLAIN,
                            fontScale=3,
                            color=(255, 0, 0),
                            thickness=3
                        )
                        keyboard.release(key)
                        sleep(0.1)

def main():
    camera = cv2.VideoCapture(0)
    landmarks_array = np.arange(42).reshape(21, -1)  # 改为更明确的名称

    if not camera.isOpened():
        print("Cannot open camera")
        exit()
    
    display = CameraDisplay(camera, width=1280, height=720)
    display.set_resolution()

    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()
    mp_draw = mp.solutions.drawing_utils
    landmark_style = mp_draw.DrawingSpec(color=(255, 0, 0), thickness=int(10))
    connection_style = mp_draw.DrawingSpec(color=(0, 255, 0), thickness=int(5))

    while True:
        success, frame = camera.read()
        if not success:
            print("Can't receive frame (stream end?). Exiting ...")
            break

        # 翻转图像
        frame = cv2.flip(frame, 1)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        hand_detection = hands.process(frame_rgb)
        draw_keyboard(frame)
        
        if hand_detection.multi_hand_landmarks:
            for hand_idx, hand_landmarks in enumerate(hand_detection.multi_hand_landmarks):
                mp_draw.draw_landmarks(
                    frame, 
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    landmark_drawing_spec=landmark_style,
                    connection_drawing_spec=connection_style
                )
                
                # 更新坐标数组
                for landmark_idx, landmark in enumerate(hand_landmarks.landmark):
                    x_pos = int(landmark.x * display.width)                    
                    y_pos = int(landmark.y * display.height)                    
                    landmarks_array[landmark_idx] = [x_pos, y_pos]  # 使用新的数组名

                is_pressed = is_key_pressed(
                    landmarks_array[11],  # 手指基部
                    landmarks_array[4],   # 拇指尖
                    landmarks_array[3]    # 拇指基部
                )
                handle_key_press(frame, landmarks_array[8], is_pressed)
                
        cv2.imshow('Virtual Keyboard', frame)
        if cv2.waitKey(1) == ord('q'):            
            break
            
    camera.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    main()

204.44314613114327
168.81350656863924
161.14899937635357
186.10212250267324
185.31055015837603
189.16923639957952
188.2153022471871
187.62729012593024
193.89172236070317
197.89138435010253
198.48677537810926
193.75241933973368
201.64572894063488
199.7223072167954
186.33571852975479
184.67809832245945
115.3819743287486
111.01801655587259
107.18675291284833
117.88977903109328
115.10430052782563
123.96773773849388
138.97481786280562
151.90786681406595
148.48905683584903
150.136604464068
149.40214188558343
142.72000560538106
135.4400236266961
158.21820375671064
150.88074761214565
214.5250568115529
214.03737991294884
198.27506146764904
148.48568954616468
145.01379244747721
146.8945199794737
163.1349134918703
154.5736070614903
152.6106156202772
184.22811946062956
166.97305171793442
176.2980430974774
178.01123560045303
170.79227148791014
176.7823520603796
175.1142484208524
176.23280057923384
174.9771413642365
171.40011668607463
151.43315356948756
141.01418368376991
115.35163631262454
140.6164

In [None]:
q