In [6]:
import torch
from mmpose.apis import MMPoseInferencer
import cv2
import time
import datetime
import os
import warnings
warnings.simplefilter(action='ignore')

def is_finger_up(finger_tip, knuckle):
    return finger_tip < knuckle

def detect_fingers(keypoints):
    fingers = ['index', 'middle', 'ring', 'pinky']
    finger_states = {}
    for i, finger in enumerate(fingers):
        tip_index = (i + 1) * 4 + 3
        base_index = (i + 1) * 4 + 1
        if tip_index < len(keypoints):
            finger_tip = keypoints[tip_index][1]
            knuckle = keypoints[base_index][1]
            finger_states[finger] = is_finger_up(finger_tip, knuckle)
        else:
            finger_states[finger] = False
    return finger_states

def is_fist(finger_states):
    return all(not state for state in finger_states.values())

def draw_text(frame, text, start_time, position=(5, 50), font=cv2.FONT_HERSHEY_SIMPLEX, font_scale=0.6, color=(0, 0, 0), thickness=1):
    if time.time() - start_time <= 2:
        cv2.putText(frame, text, position, font, font_scale, color, thickness, cv2.LINE_AA)

inferencer = MMPoseInferencer(
    pose2d='rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256',
    pose2d_weights=r'C:\Users\eog30\anaconda3\envs\mmpose-env\Lib\site-packages\mmpose\.mim\configs\hand_2d_keypoint\rtmpose\coco_wholebody_hand\rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth',
    device='cuda:0'
)

cap = cv2.VideoCapture(0)

off = False
black_and_white = False
night_vision = False
fist_start_time = None
fist_detected = False
pose_start_time = None
pose_detected = False
shot = False
message = ""
message_start_time = 0
save_directory = r"C:\Users\eog30\Desktop\CV\mmpose\Images"
cv2.namedWindow('Webcam Feed', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Webcam Feed', 1920, 1080)

while True:
    ret, frame = cap.read()
    result_generator = inferencer(frame)

    for result in result_generator:
        if isinstance(result, dict) and 'predictions' in result:
            predictions = result['predictions']
            if isinstance(predictions, list) and len(predictions) > 0:
                keypoints = predictions[0][0]['keypoints']
                for i, keypoint in enumerate(keypoints):
                    x, y = keypoint
                    cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 0), -1)
        if 'predictions' in result and len(result['predictions']) > 0:
            keypoints = result['predictions'][0][0]['keypoints']
            finger_states = detect_fingers(keypoints)

        if is_fist(finger_states):
            if not fist_detected:
                fist_detected = True
                fist_start_time = time.time()
            elif time.time() - fist_start_time >= 2:
                if not pose_detected:
                    pose_detected = True
                    pose_start_time = time.time()
        else:
            if fist_detected and pose_detected:
                num_fingers_up = sum(finger_states.values())
                if time.time() - pose_start_time >= 2:
                    x = f"Detected {num_fingers_up} fingers up."
                    if num_fingers_up == 1:
                        message = "Detected 1 finger up. The image will be taken after 2 seconds."
                        message_start_time = time.time()
                        shot = True
                    elif num_fingers_up == 2:   
                        night_vision = False
                        black_and_white = not black_and_white
                        message = f"{x} Gray scale mode on." if black_and_white else f"{x} Back to original."
                        message_start_time = time.time()
                    elif num_fingers_up ==3:
                        black_and_white = False
                        night_vision = not night_vision
                        message = f"{x} Night vision mode on." if black_and_white else f"{x} Back to original."
                        message_start_time = time.time()
                    elif num_fingers_up == 4:
                        message = f"{x} Exiting..."
                        message_start_time = time.time()
                        off = True 
                    pose_detected = False
                    fist_detected = False
            else:
                fist_detected = False
                pose_detected = False
                
    key = cv2.waitKey(1) & 0xFF

    if black_and_white:
        night_vision = False
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
    if night_vision:
        black_and_white = False
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        equalized = cv2.equalizeHist(gray)
        frame = cv2.applyColorMap(equalized, cv2.COLORMAP_HSV)
        
    if shot and time.time() - message_start_time >2:
        
        current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        filename = f"{current_time}.jpg"
        if not os.path.exists(save_directory):
            os.makedirs(save_directory)
        file_path = os.path.join(save_directory, filename)
        if black_and_white:
            f = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(file_path, f)
        elif night_vision:
            g = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            e = cv2.equalizeHist(g)
            f = cv2.applyColorMap(e, cv2.COLORMAP_HSV)
            cv2.imwrite(file_path, f)
        else:
            cv2.imwrite(file_path, frame)
        message = "Image saved successfully."
        message_start_time = time.time()
        shot = False
        
    if message:
        draw_text(frame, message, message_start_time)
                            
    cv2.imshow('Webcam Feed', frame)
    
    if time.time() - message_start_time < 2:
        continue
        
    if (key == ord('q')) | off :
        print("Exiting...")
        break

cap.release()
cv2.destroyAllWindows()

Loads checkpoint by local backend from path: C:\Users\eog30\anaconda3\envs\mmpose-env\Lib\site-packages\mmpose\.mim\configs\hand_2d_keypoint\rtmpose\coco_wholebody_hand\rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth
Loads checkpoint by http backend from path: https://download.openmmlab.com/mmpose/mmdet_pretrained/ssdlite_mobilenetv2_scratch_600e_onehand-4f9f8686_20220523.pth


Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Exiting...
