In [1]:
pip install mediapipe

Collecting mediapipeNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Using cached mediapipe-0.10.18-cp39-cp39-win_amd64.whl.metadata (9.9 kB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached opencv_contrib_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Using cached sounddevice-0.5.1-py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting CFFI>=1.0 (from sounddevice>=0.4.4->mediapipe)
  Using cached cffi-1.17.1-cp39-cp39-win_amd64.whl.metadata (1.6 kB)
Using cached mediapipe-0.10.18-cp39-cp39-win_amd64.whl (50.9 MB)
Using cached sounddevice-0.5.1-py3-none-win_amd64.whl (363 kB)
Using cached opencv_contrib_python-4.10.0.84-cp37-abi3-win_amd64.whl (45.5 MB)
Using cached cffi-1.17.1-cp39-cp39-win_amd64.whl (181 kB)
Installing collected packages: opencv-contrib-python, CFFI, sounddevice, mediapipe
Successfully installed CFFI-1.17.1 mediapipe-0.10.18 opencv-contrib-python-4.10.0.84 sounddevice-0.5.1


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)

crop_x, crop_y, crop_width, crop_height = 450, 150, 800, 800

#change count
save_frames = False
frame_count = 0
max_frames = 100

output_dir = "saved_frames"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    cropped_frame = frame[crop_y:crop_y + crop_height, crop_x:crop_x + crop_width]

    original_frame = cropped_frame.copy()
    frame_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    annotated_frame = cropped_frame.copy()
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(annotated_frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    combined_frame = np.hstack((original_frame, annotated_frame))

   
    cv2.imshow("Original and Annotated Preview", combined_frame)


    if save_frames:
        frame_filename = os.path.join(output_dir, f"{frame_count:04d}.png")
        cv2.imwrite(frame_filename, combined_frame)  
        frame_count += 1
        if frame_count >= max_frames:
            save_frames = False 
            print("Process complete")

    key = cv2.waitKey(1) & 0xFF
    if key == ord(' '):  
        save_frames = True
        frame_count = 0
        print("Started saving 100 frames...")
    elif key == 27:  
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import time
import csv
import os

# Initialize Mediapipe Hand solutions
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

# Open the webcam
cap = cv2.VideoCapture(0)

# Create directories for storing data
hand_images_dir = "hand_images"
points_dir = "hand_points"
os.makedirs(hand_images_dir, exist_ok=True)
os.makedirs(points_dir, exist_ok=True)

# Initialize a timer, CSV file, and frame counter
last_time = time.time()
csv_filename = "hand_coordinates_with_gesture.csv"
frame_counter = 1  # Frame counter for image naming

# Define the column names for hand landmarks
header = ["Gesture", "Image Name", "T0", "T1", "T2", "T3", "Thumb", "I3", "I2", "I1", "Index", 
          "M3", "M2", "M1", "Middle", "R3", "R2", "R1", "Ring", "L3", "L2", "L1", "Little"]

# Write header to CSV if it doesn't exist
with open(csv_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        print("Ignoring empty frame.")
        continue

    # Flip the frame horizontally and convert the BGR image to RGB
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with Mediapipe Hands
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        current_time = time.time()
        # Check if 0.5 seconds have passed
        if current_time - last_time >= 0.5:
            last_time = current_time
            for hand_landmarks in results.multi_hand_landmarks:
                all_points_coordinates = []

                # Create a black background for points
                black_canvas = frame.copy()
                black_canvas[:, :] = 0  # Set all pixels to black

                for idx, landmark in enumerate(hand_landmarks.landmark):
                    # Get normalized coordinates
                    x = int(landmark.x * frame.shape[1])
                    y = int(landmark.y * frame.shape[0])
                    all_points_coordinates.append((x, y))

                    # Draw a circle at each landmark on the black canvas
                    cv2.circle(black_canvas, (x, y), 2, (0, 255, 0), -1)  # Reduced circle size

                # Save the frame as an image
                hand_image_name = os.path.join(hand_images_dir, f"hand_{frame_counter}.png")
                point_image_name = os.path.join(points_dir, f"points_{frame_counter}.png")
                cv2.imwrite(hand_image_name, frame)
                cv2.imwrite(point_image_name, black_canvas)

                # Prepare row data with the gesture, image name, and points
                point_names = ["T0", "T1", "T2", "T3", "Thumb", "I3", "I2", "I1", "Index", 
                               "M3", "M2", "M1", "Middle", "R3", "R2", "R1", "Ring", "L3", "L2", "L1", "Little"]

                row_data = ["gesture", hand_image_name] + [
                    f"{all_points_coordinates[i][0]},{all_points_coordinates[i][1]}" if i < len(all_points_coordinates) else ""
                    for i in range(len(point_names))
                ]

                # Write the row to the CSV file
                with open(csv_filename, mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow(row_data)

                # Increment the frame counter
                frame_counter += 1

                # Print the collected data
                print("Gesture, Image Name, and All Points' Coordinates:", row_data)

                # Draw hand landmarks on the frame
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Display the frame without point names
    cv2.imshow("Hand Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

# Release the webcam and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


gesture,  Image Name, and All Points' Coordinates: ['click', 'hand_images\\hand_1.png', '319,359', '300,356', '283,332', '286,310', '296,297', '291,299', '288,270', '287,249', '287,233', '309,299', '307,284', '305,305', '303,324', '326,304', '324,292', '319,314', '317,330', '342,313', '339,301', '332,317', '329,329']
gesture,  Image Name, and All Points' Coordinates: ['click', 'hand_images\\hand_2.png', '315,324', '292,310', '279,287', '280,266', '292,252', '289,257', '287,230', '287,214', '288,200', '303,259', '302,236', '295,248', '290,261', '318,264', '313,250', '305,269', '302,285', '332,271', '326,257', '319,270', '314,282']
gesture,  Image Name, and All Points' Coordinates: ['click', 'hand_images\\hand_3.png', '313,324', '291,311', '277,288', '279,266', '292,253', '288,258', '287,229', '288,213', '289,198', '303,260', '303,240', '296,254', '291,268', '318,265', '314,253', '306,273', '301,289', '332,273', '327,261', '318,275', '313,287']
gesture,  Image Name, and All Points' Coord