In [None]:
pip install opencv-python

In [None]:
pip install --user mediapipe

In [31]:
import cv2
import mediapipe as mp
import time
import os
import threading
import json

In [32]:
# Function to display text and hand landmarks on the video frame
def display_info(frame, text, landmarks, sample_count, num_samples_per_expression):
    cv2.putText(frame, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, f"Sample {sample_count}/{num_samples_per_expression}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    for landmark in landmarks:
        x, y = int(landmark.x * frame.shape[1]), int(landmark.y * frame.shape[0])
        cv2.circle(frame, (x, y), 5, (0, 0, 255), -1)

In [33]:
# Function to collect data for each expression
def collect_data():
    expressions = ["hello", "thank_you", "sorry", "goodbye"]
    num_samples_per_expression = 300  # Change the number of samples to 300
    interval_seconds = 2

    # Initialize MediaPipe Hands
    mp_hands = mp.solutions.hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

    # Start capturing video from the camera
    cap = cv2.VideoCapture(0)

    # Get frame dimensions for visualization
    frame_width, frame_height = 640, 480  # Adjust as needed
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)

    for expression in expressions:
        expression_dir = f"data/{expression}"
        if not os.path.exists(expression_dir):
            os.makedirs(expression_dir)

    expression_count = 0
    sample_count = 0
    current_expression = expressions[expression_count]

    # Create a dictionary to store hand landmark data
    landmark_data = {}

    while expression_count < len(expressions):
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Detect hand landmarks
        results = mp_hands.process(frame_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                display_info(frame, f"Collecting data for: {current_expression}", hand_landmarks.landmark, sample_count, num_samples_per_expression)

                # Save the frame as an image and the hand landmarks as JSON
                image_filename = f"data/{current_expression}/{sample_count + 1}.png"
                json_filename = f"data/{current_expression}/{sample_count + 1}.json"
                cv2.imwrite(image_filename, frame)
                landmark_data['hand_landmarks'] = [(landmark.x, landmark.y, landmark.z) for landmark in hand_landmarks.landmark]
                with open(json_filename, 'w') as f:
                    json.dump(landmark_data, f)

                sample_count += 1
                if sample_count >= num_samples_per_expression:
                    expression_count += 1
                    if expression_count >= len(expressions):
                        break
                    current_expression = expressions[expression_count]
                    sample_count = 0

        cv2.imshow("Data Collection", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):  # Exit when 'q' key is pressed
            break

    print("Data collection completed.")

    # Release the camera and close OpenCV windows
    cap.release()
    cv2.destroyAllWindows()

In [34]:
collect_data()

Data collection completed.
