In [1]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize Mediapipe Pose and Hands modules
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
pose = mp_pose.Pose()
hands = mp_hands.Hands()

def detect_action(landmarks):
    if not landmarks:
        return "Unknown"

    left_wrist = landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value]
    left_shoulder = landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value]
    right_wrist = landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value]
    right_shoulder = landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
    left_ankle = landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value]
    right_ankle = landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value]
    left_knee = landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value]
    right_knee = landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value]
    left_hip = landmarks[mp_pose.PoseLandmark.LEFT_HIP.value]
    right_hip = landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value]
    left_elbow = landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value]
    right_elbow = landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value]

    # Action detection with adjusted thresholds
    if left_wrist.y < left_shoulder.y - 0.1:
        return "Left Hand Raised"
    
    if right_wrist.y < right_shoulder.y - 0.1:
        return "Right Hand Raised"
    
    if left_wrist.y < left_elbow.y - 0.1 and left_wrist.y < left_shoulder.y - 0.1:
        return "Left Arm Stretching"

    if right_wrist.y < right_elbow.y - 0.1 and right_wrist.y < right_shoulder.y - 0.1:
        return "Right Arm Stretching"

    if (left_ankle.y > left_knee.y + 0.05 and right_ankle.y > right_knee.y + 0.05 and 
        left_wrist.y > left_shoulder.y and right_wrist.y > right_shoulder.y):
        return "Standing"

    if left_knee.y > left_hip.y + 0.05 and right_knee.y > right_hip.y + 0.05:
        return "Sitting"

    # Detect T-Pose
    if (abs(left_wrist.x - left_shoulder.x) > 0.2 and 
        abs(right_wrist.x - right_shoulder.x) > 0.2 and
        abs(left_wrist.y - right_wrist.y) < 0.1):
        return "T-Pose"

    # Detect Easy Pose (relaxed arms)
    if (abs(left_wrist.y - left_shoulder.y) < 0.1 and
        abs(right_wrist.y - right_shoulder.y) < 0.1):
        return "Easy Pose"

    return "Unknown"

def detect_gesture(hand_landmarks):
    if not hand_landmarks:
        return "Unknown Gesture"

    thumb_tip = hand_landmarks[mp_hands.HandLandmark.THUMB_TIP]
    index_tip = hand_landmarks[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = hand_landmarks[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = hand_landmarks[mp_hands.HandLandmark.PINKY_TIP]

    thumb_ip = hand_landmarks[mp_hands.HandLandmark.THUMB_IP]
    index_dip = hand_landmarks[mp_hands.HandLandmark.INDEX_FINGER_DIP]
    middle_dip = hand_landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_DIP]
    ring_dip = hand_landmarks[mp_hands.HandLandmark.RING_FINGER_DIP]
    pinky_dip = hand_landmarks[mp_hands.HandLandmark.PINKY_DIP]

    # Calculate distances between landmarks for gesture detection
    thumb_index_dist = np.linalg.norm(np.array([thumb_tip.x, thumb_tip.y]) - np.array([index_tip.x, index_tip.y]))
    thumb_middle_dist = np.linalg.norm(np.array([thumb_tip.x, thumb_tip.y]) - np.array([middle_tip.x, middle_tip.y]))

    # Gesture 1: Thumbs Up
    if thumb_tip.y < thumb_ip.y and index_tip.y > index_dip.y:
        return "Thumbs Up"

    # Gesture 2: OK
    # Set a threshold distance for the "OK" gesture
    threshold = 0.05

    # Detect "OK" Gesture
    if thumb_index_dist < threshold and thumb_middle_dist > 0.1:
        return "OK"

    # Gesture 3: Fist
    if (thumb_tip.y > thumb_ip.y and index_tip.y > index_dip.y and 
        middle_tip.y > middle_dip.y and ring_tip.y > ring_dip.y and 
        pinky_tip.y > pinky_dip.y):
        return "Fist"

    # Gesture 4: Good/Nice (All Fingers Straight Up)
    if (thumb_tip.y < thumb_ip.y and index_tip.y < index_dip.y and 
        middle_tip.y < middle_dip.y and ring_tip.y < ring_dip.y and 
        pinky_tip.y < pinky_dip.y):
        return "Good/Nice"

    # Gesture 5: Peace Sign (Index and Middle Fingers Up, Others Down)
    if (index_tip.y < index_dip.y and middle_tip.y < middle_dip.y and 
        ring_tip.y > ring_dip.y and pinky_tip.y > pinky_dip.y):
        return "Peace Sign"

    return "Unknown"

def process_image(img, mode):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    action = "Unknown"
    gesture = "Unknown Gesture"

    if mode == 'actions':
        # Perform pose detection
        pose_result = pose.process(img_rgb)
        if pose_result.pose_landmarks:
            landmarks = pose_result.pose_landmarks.landmark
            action = detect_action(landmarks)

            # Draw pose landmarks
            mp.solutions.drawing_utils.draw_landmarks(img, pose_result.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        
        # Display the action on the image
        cv2.putText(img, f'Action: {action}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    elif mode == 'gestures':
        # Perform hand detection
        hand_result = hands.process(img_rgb)
        if hand_result.multi_hand_landmarks:
            for hand_landmarks in hand_result.multi_hand_landmarks:
                gesture = detect_gesture(hand_landmarks.landmark)

                # Draw hand landmarks
                mp.solutions.drawing_utils.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        # Display the gesture on the image
        cv2.putText(img, f'Gesture: {gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    return img

def capture_image(mode):
    # Open webcam
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        raise RuntimeError("Could not open video device")

    # Set the video frame width and height
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    print("Press 'c' to capture an image and 'q' to quit")

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture image")
            break

        # Process the frame based on the selected mode
        processed_frame = process_image(frame, mode)

        # Display the processed webcam feed
        cv2.imshow('Webcam Feed', processed_frame)

        key = cv2.waitKey(1)
        if key == ord('c'):  # Press 'c' to capture the image
            filename = 'captured_image.jpg'
            cv2.imwrite(filename, processed_frame)
            print(f"Image captured and saved as '{filename}'")
            break
        elif key == ord('q'):  # Press 'q' to quit
            break

    cap.release()
    cv2.destroyAllWindows()

    return filename

def main():
    # Ask the user to choose the mode
    mode = input("Choose mode: 'actions' or 'gestures': ").strip().lower()

    if mode not in ['actions', 'gestures']:
        print("Invalid mode selected. Please choose 'actions' or 'gestures'.")
        return

    # Capture and process the photo based on the selected mode
    filename = capture_image(mode)

    # Display the final captured and processed image
    img = cv2.imread(filename)
    cv2.imshow('Processed Image', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


Choose mode: 'actions' or 'gestures':  gestures


Press 'c' to capture an image and 'q' to quit




UnboundLocalError: cannot access local variable 'filename' where it is not associated with a value