In [None]:
import cv2
import os
import sys
import numpy as np
from data import *
from preprocess import *

In [None]:
# %run data.ipynb
# %run preprocess.ipynb

In [None]:

# Capture frames every 0.1 seconds and save it to local storage
def capture_frames():
    # Open the first available camera device
    cap = cv2.VideoCapture(0)

    # Set the width and height of the capture frame
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    # Set the desired frame rate (in milliseconds)
    frame_rate = 100 # 0.1 sec = 100 milliseconds

    # Specify the directory to save the captured frames
    save_dir = 'captured_frames/2'

    # Create the directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Initialize the frame counter
    frame_num = 0

    while True:
        # Read a new frame from the camera
        ret, frame = cap.read()

        # Check if the frame was successfully captured
        if not ret:
            break

        # Display the frame (optional)
        cv2.imshow('frame', frame)

        # Wait for the desired amount of time before capturing the next frame
        cv2.waitKey(frame_rate)

        # Save the frame as an image file
        filename = os.path.join(save_dir, f'frame_{frame_num}.jpg')
        cv2.imwrite(filename, frame)

        # Increment the frame counter
        frame_num += 1

        if frame_num >= 100:
            break

    # Release the camera and close all windows
    cap.release()
    cv2.destroyAllWindows()


# capture_frames()


In [None]:
path = 'captured_frames/'
# path = 'Miniset/'

images, labels = load_data(path, grayscale=False, rgb=False)
size_in_bytes = sys.getsizeof(images)

print(images.shape)
print("Images_Size = ", convert_bytes(size_in_bytes))

show_random_dataset(images, labels)

In [None]:

preprocess_images = get_preprocessed_set(images)

print("New_Shape = ", preprocess_images.shape)
show_random_dataset(preprocess_images, labels)

In [None]:
'''
    Skin detection
    * Decent performance
    * Detecting fans as well
'''

# Open the first available camera device
cap = cv2.VideoCapture(0)

# Set the width and height of the capture frame
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

# Define the lower and upper bounds of the skin color in HSV color space
skin_lower = np.array([0, 20, 70], dtype=np.uint8)
skin_upper = np.array([20, 255, 255], dtype=np.uint8)

while True:
    # Read a new frame from the camera
    ret, frame = cap.read()

    # Check if the frame was successfully captured
    if not ret:
        break

    # Convert the frame from BGR color space to HSV color space
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Create a mask to filter out the skin color
    mask = cv2.inRange(hsv, skin_lower, skin_upper)

    # Apply the mask to the original frame
    skin = cv2.bitwise_and(frame, frame, mask=mask)

    # Display the skin regions (optional)
    cv2.imshow('skin', skin)

    # Wait for user input to exit
    if cv2.waitKey(1) == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
'''
    Hand Detection using skin segmentation and contour-based hand-detection
    * Decent performance on both
    * Detecting fans as well
    * Combines face as well if hands get close to the face
'''

# Open the first available camera device
cap = cv2.VideoCapture(0)

# Set the width and height of the capture frame
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

# Define the lower and upper bounds of the skin color in HSV color space
skin_lower = np.array([0, 20, 70], dtype=np.uint8)
skin_upper = np.array([20, 255, 255], dtype=np.uint8)

# Define the kernel size for the morphological operations
kernel_size = 5

while True:
    # Read a new frame from the camera
    ret, frame = cap.read()

    # Check if the frame was successfully captured
    if not ret:
        break

    # Convert the frame from BGR color space to HSV color space
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Create a mask to filter out the skin color
    mask = cv2.inRange(hsv, skin_lower, skin_upper)

    # Apply morphological operations to remove noise and smooth the image
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=1)

    # Find contours in the binary image
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Draw a bounding box around the largest contour (assumed to be the hand)
    if len(contours) > 0:
        max_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(max_contour)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('frame', frame)

    # Wait for user input to exit
    if cv2.waitKey(1) == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
'''
    Another approach to detect hand while avoiding faces (using haarcascade)
    * Detecting both hands
    * Detecting face when hands not available or hands too close to face
    * Detecting fans
'''

# Load the pre-trained face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Define the lower and upper bounds of the skin color in HSV color space
skin_lower = np.array([0, 20, 70], dtype=np.uint8)
skin_upper = np.array([20, 255, 255], dtype=np.uint8)

# Define the kernel size for the morphological operations
kernel_size = 5

# Open the first available camera device
cap = cv2.VideoCapture(0)

while True:
    # Read a new frame from the camera
    ret, frame = cap.read()

    # Check if the frame was successfully captured
    if not ret:
        break

    # Convert the frame from BGR color space to HSV color space
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Create a mask to filter out the skin color
    mask = cv2.inRange(hsv, skin_lower, skin_upper)

    # Apply morphological operations to remove noise and smooth the image
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=1)

    # Detect faces in the grayscale image
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    # Remove the face region from the binary image
    for (x, y, w, h) in faces:
        mask[y:y+h, x:x+w] = 0

    # Find contours in the binary image
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Draw a bounding box around the largest contour (assumed to be the hand)
    if len(contours) > 0:
        max_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(max_contour)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('frame', frame)

    # Wait for user input to exit
    if cv2.waitKey(1) == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
# Hand detection using mediapipe hand tracking module


# import mediapipe as mp

# Create a video capture object
cap = cv2.VideoCapture(0)

# Initialize the Mediapipe hand tracking module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

while True:
    # Read a frame from the video capture
    ret, frame = cap.read()

    # Convert the frame to RGB format for use with Mediapipe
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with Mediapipe to detect hands
    results = hands.process(frame)

    # If hands were detected, extract the hand landmarks and draw them on the frame
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Extract the hand landmarks as a list of (x,y,z) tuples
            landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]

            # Draw a circle at each landmark location
            for lm in landmarks:
                x, y, z = lm
                cv2.circle(frame, (int(x*frame.shape[1]), int(y*frame.shape[0])), radius=5, color=(255, 0, 0), thickness=-1)

    # Show the frame
    cv2.imshow('Hand Detection', frame)

    # Wait for a key press and check if the 'q' key was pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
