In [1]:
import cv2

cap = cv2.VideoCapture(0)  # Windows DirectShow

if not cap.isOpened():
    print("❌ Error: Could not open camera!")
    exit()

ret, frame = cap.read()
if not ret:
    print("❌ Error: Camera opened but failed to capture frame!")
else:
    print("✅ Camera is working! Frame captured.")

cap.release()


✅ Camera is working! Frame captured.


In [2]:
import mediapipe as mp
import numpy as np

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True,
                       max_num_hands=1,
                       min_detection_confidence=0.6)

def extract_landmarks(image_path):
    """
    Extract hand landmarks from the given image.

    :param image_path: Path to the image file.
    :return: A NumPy array of landmarks.
    """
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        # Extract landmarks for the first hand detected
        landmarks = results.multi_hand_landmarks[0].landmark
        # Convert landmarks to a NumPy array
        landmarks_array = np.array([(lm.x, lm.y, lm.z) for lm in landmarks])
        return landmarks_array
    return None

landmarks = extract_landmarks("hand.jpg")
print(landmarks.shape)

(21, 3)


In [3]:
import pandas as pd

# Read the CSV file
asl_data = pd.read_csv('../asl_dataset/A.csv')

# Display the first few rows of the dataframe
print(asl_data.head())

   A  0.6689752340316772  0.5654740333557129  -5.30485010585835e-07  \
0  A            0.668611            0.565372          -5.277825e-07   
1  A            0.669489            0.566128          -5.282753e-07   
2  A            0.671489            0.568260          -5.294892e-07   
3  A            0.671059            0.567165          -5.322947e-07   
4  A            0.672368            0.568899          -5.233154e-07   

   0.6109158396720886  0.5331746339797974  -0.015173280611634254  \
0            0.610713            0.532992              -0.015437   
1            0.611115            0.534201              -0.015478   
2            0.613247            0.536090              -0.015382   
3            0.612615            0.534281              -0.014610   
4            0.614054            0.535379              -0.015593   

   0.5732476711273193  0.45701250433921814  -0.018423033878207207  ...  \
0            0.572790             0.457128              -0.018826  ...   
1            0.5

In [3]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize Mediapipe hand detector
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

# Capture video from webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert image to RGB for Mediapipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            point_cloud = []  # Store landmarks as point cloud
            
            for lm in hand_landmarks.landmark:
                point_cloud.append([lm.x, lm.y, lm.z])  # Store XYZ coordinates

            point_cloud = np.array(point_cloud)  # Convert to NumPy array

            # Draw hand landmarks
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    cv2.imshow("Hand Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [12]:
import open3d as o3d
import numpy as np

# Define hand landmark connections based on Mediapipe's landmark indexing
hand_connections = [
    (0, 1), (1, 2), (2, 3), (3, 4),  # Thumb
    (0, 5), (5, 6), (6, 7), (7, 8),  # Index Finger
    (0, 9), (9, 10), (10, 11), (11, 12),  # Middle Finger
    (0, 13), (13, 14), (14, 15), (15, 16),  # Ring Finger
    (0, 17), (17, 18), (18, 19), (19, 20),  # Pinky Finger
]

def visualize_hand_point_cloud(points):
    """Visualizes the hand point cloud with landmark connections."""
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)

    # Create a LineSet to connect the hand landmarks
    lines = o3d.geometry.LineSet()
    lines.points = o3d.utility.Vector3dVector(points)
    lines.lines = o3d.utility.Vector2iVector(hand_connections)

    # Color settings for visualization
    pcd.paint_uniform_color([1, 0, 0])  # Red points
    lines.paint_uniform_color([0, 0, 1])  # Blue lines

    # Render the visualization
    o3d.visualization.draw_geometries([pcd, lines], window_name="Hand Point Cloud with Connections")

# Call the visualization function
visualize_hand_point_cloud(point_cloud)


In [13]:
import torch

def normalize_point_cloud(points):
    """Aligns hand point cloud to a standard coordinate system."""
    points = torch.tensor(points, dtype=torch.float32)

    # Step 1: Center the point cloud (translate wrist to origin)
    wrist = points[0].clone()  # Clone to avoid memory issues
    points = points.clone() - wrist  # Create a new tensor instead of modifying in-place

    # Step 2: Compute the hand orientation using PCA
    cov_matrix = torch.mm(points.T, points)
    eigvals, eigvecs = torch.linalg.eigh(cov_matrix)

    # Step 3: Rotate hand for consistency
    points = torch.mm(points, eigvecs)  

    return points.numpy()

# Normalize the extracted point cloud
normalized_pc = normalize_point_cloud(point_cloud)
visualize_hand_point_cloud(normalized_pc)
