In [2]:
import cv2
import os

# Function to collect data and save images
def collect_data(output_dir, num_samples):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap = cv2.VideoCapture(0)  # Webcam

    count = 0
    while count < num_samples:
        ret, frame = cap.read()
        cv2.imshow('Press the button and press "s" to save', frame)
        
        # Save frame when 's' is pressed
        if cv2.waitKey(1) & 0xFF == ord('s'):
            filename = os.path.join(output_dir, f'image_{count}.jpg')
            cv2.imwrite(filename, frame)
            print(f'Saved: {filename}')
            count += 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Set the output directory and number of samples to collect
output_directory = 'data/button_press_images'
num_samples = 100

# Collect data
collect_data(output_directory, num_samples)


Saved: data/button_press_images\image_0.jpg
Saved: data/button_press_images\image_1.jpg
Saved: data/button_press_images\image_2.jpg
Saved: data/button_press_images\image_3.jpg
Saved: data/button_press_images\image_4.jpg
Saved: data/button_press_images\image_5.jpg
Saved: data/button_press_images\image_6.jpg
Saved: data/button_press_images\image_7.jpg
Saved: data/button_press_images\image_8.jpg
Saved: data/button_press_images\image_9.jpg
Saved: data/button_press_images\image_10.jpg
Saved: data/button_press_images\image_11.jpg
Saved: data/button_press_images\image_12.jpg
Saved: data/button_press_images\image_13.jpg
Saved: data/button_press_images\image_14.jpg
Saved: data/button_press_images\image_15.jpg
Saved: data/button_press_images\image_16.jpg
Saved: data/button_press_images\image_17.jpg
Saved: data/button_press_images\image_18.jpg
Saved: data/button_press_images\image_19.jpg
Saved: data/button_press_images\image_20.jpg
Saved: data/button_press_images\image_21.jpg
Saved: data/button_p

In [4]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model

# Load MobileNetV2 pre-trained on ImageNet without top classification layer
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')

# Add custom classification head
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

# Combine base model with custom classification head
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1us/step


In [5]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Directory containing all collected images
data_dir = 'data/button_press_images'

# Directories for train, validation, and test data
train_dir = 'data/train'
val_dir = 'data/validation'
test_dir = 'data/test'

# Create directories if they don't exist
for directory in [train_dir, val_dir, test_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)

# Get a list of all image filenames
images = os.listdir(data_dir)

# Split data into train and test sets (80% train, 20% test)
train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)

# Split train set into train and validation sets (80% train, 20% validation)
train_images, val_images = train_test_split(train_images, test_size=0.2, random_state=42)

# Move images to respective directories
for img in train_images:
    src = os.path.join(data_dir, img)
    dst = os.path.join(train_dir, img)
    shutil.move(src, dst)

for img in val_images:
    src = os.path.join(data_dir, img)
    dst = os.path.join(val_dir, img)
    shutil.move(src, dst)

for img in test_images:
    src = os.path.join(data_dir, img)
    dst = os.path.join(test_dir, img)
    shutil.move(src, dst)

# Print number of images in each split
print(f'Train images: {len(os.listdir(train_dir))}')
print(f'Validation images: {len(os.listdir(val_dir))}')
print(f'Test images: {len(os.listdir(test_dir))}')

Train images: 64
Validation images: 16
Test images: 20


In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set directories for train, validation, and test data
train_dir = 'data/train'
val_dir = 'data/validation'
test_dir = 'data/test'

# Data generators with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Flow data from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# Print number of images found in each directory
print(f'Train images found: {len(train_generator.filenames)}')
print(f'Validation images found: {len(val_generator.filenames)}')
print(f'Test images found: {len(test_generator.filenames)}')



Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.
Train images found: 0
Validation images found: 0
Test images found: 0


In [15]:



import cv2
import mediapipe as mp
import math

# Initialize MediaPipe hand solution
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7, min_tracking_confidence=0.5)

# Initialize variables for hand shaking detection
prev_angle = None  # Previous angle of the pen
pen_detected = False  # Flag to track pen detection
pen_endpoints = [(0, 0), (0, 0)]  # Pen start and end points
nose_coords = (0, 0)  # Nose coordinates
click_count = 0  # Number of pen clicks
is_pen_down = False  # Flag to track if pen is down
shake_count=0

# Initialize MediaPipe face solution
mp_face = mp.solutions.face_mesh
face_mesh = mp_face.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
draw_face = mp.solutions.drawing_utils

# Custom face landmark connections
FACE_CONNECTIONS = [
    (10, 234), (234, 127), (127, 162), (162, 21), (21, 54), (54, 103),
    (103, 67), (67, 109), (109, 10), (10, 152), (152, 234), (152, 454),
    (454, 234), (152, 332), (332, 454), (454, 361), (361, 332), (234, 454)
]

# Initialize variables for vertical and horizontal shaking detection
vertical_shake_threshold = 20  # Threshold for vertical position change to detect shaking
horizontal_shake_threshold = 20  # Threshold for horizontal position change to detect shaking
vertical_shake_detected = False  # Flag to track vertical shaking detection
horizontal_shake_detected = False  # Flag to track horizontal shaking detection
prev_thumb_x, prev_thumb_y = 0, 0  # Previous thumb position
prev_index_x, prev_index_y = 0, 0  # Previous index finger position

# Start capturing video from the webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    # Flip the frame horizontally for a natural viewing experience
    frame = cv2.flip(frame, 1)

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    results_hands = hands.process(rgb_frame)

    # Process the frame and detect facial landmarks
    results_face = face_mesh.process(rgb_frame)

    # Draw the detected face landmarks and connections on the frame
    if results_face.multi_face_landmarks:
        for face_landmarks in results_face.multi_face_landmarks:
            draw_face.draw_landmarks(frame, face_landmarks, mp_face.FACEMESH_CONTOURS)

            # Get the nose coordinates using the correct index for the nose tip
            nose_landmark = face_landmarks.landmark[1]  # Use the index directly
            nose_x, nose_y = int(nose_landmark.x * frame.shape[1]), int(nose_landmark.y * frame.shape[0])
            nose_coords = (nose_x, nose_y)

    # Draw the custom hand landmarks on the frame
    if results_hands.multi_hand_landmarks:
        for hand_landmarks in results_hands.multi_hand_landmarks:
            # Get landmarks for thumb tip and index finger tip
            thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
            index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
            thumb_x, thumb_y = int(thumb_tip.x * frame.shape[1]), int(thumb_tip.y * frame.shape[0])

            # Calculate the distance between nose and thumb tip (pen's tip)
            distance = math.hypot(thumb_x - nose_x, thumb_y - nose_y) / 10  # Divide by 10 for better visualization

            # Calculate the angle of the pen with respect to the vertical line at 90 degrees
            pen_dx = index_tip.x - thumb_tip.x
            pen_dy = index_tip.y - thumb_tip.y
            angle_rad = math.atan2(pen_dy, pen_dx)
            angle_with_vertical = 90 - math.degrees(angle_rad)  # Subtract from 90 to get the angle with respect to vertical

            # Check if the pen is green (assuming RGB color values)
            if frame[thumb_y, thumb_x, 1] > frame[thumb_y, thumb_x, 0] and frame[thumb_y, thumb_x, 1] > frame[thumb_y, thumb_x, 2]:
                if not pen_detected:  # Pen is detected for the first time
                    print("Pen detected!")
                    cv2.putText(frame, 'Pen detected!', (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                    pen_detected = False
                                # Print the distance and angle
                    distance_text = f"Distance: {distance:.2f} pixels"
                    angle_text = f"Angle: {angle_with_vertical:.2f} degrees"
                    click_text = f"Clicks: {click_count}"
                    shake_text=f"Shake: {shake_count}"
                    cv2.putText(frame, distance_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                    cv2.putText(frame, angle_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                    cv2.putText(frame, click_text, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                    cv2.putText(frame, shake_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

            # Calculate position changes for vertical and horizontal shaking detection
                    if prev_thumb_x != 0 and prev_thumb_y != 0 and prev_index_x != 0 and prev_index_y != 0:
                           vertical_change = abs(thumb_y - prev_thumb_y)
                           horizontal_change = abs(thumb_x - prev_thumb_x)

                # Detect vertical shake
                    if vertical_change > vertical_shake_threshold:
                        vertical_shake_detected = True
                    else:
                        vertical_shake_detected = False

                # Detect horizontal shake
                    if horizontal_change > horizontal_shake_threshold:
                        horizontal_shake_detected = True
                        shake_count+=1
                    else:
                        horizontal_shake_detected = False

                        prev_thumb_x, prev_thumb_y = thumb_x, thumb_y
                        prev_index_x, prev_index_y = int(index_tip.x * frame.shape[1]), int(index_tip.y * frame.shape[0])

            # Visualize shaking on the frame
                    if vertical_shake_detected:
                        cv2.putText(frame, 'VERTICAL SHAKE DETECTED', (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                        shake_count+=1
                    if horizontal_shake_detected:
                        cv2.putText(frame, 'HORIZONTAL SHAKE DETECTED', (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # Display theframe
 
                    

                # Check if the pen is clicked (angle change from downward to upward)
                if angle_with_vertical < 45 and is_pen_down:
                    click_count += 1  # Increment click count
                    is_pen_down = False  # Pen is lifted after click

        else:
            pen_detected = False
            cv2.putText(frame, 'Pen Not detected!', (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    cv2.imshow('Hand Shake Detection', frame)
    pen_endpoints[0] = (thumb_x, thumb_y)
    is_pen_down = True  # Pen is down initially  

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and destroy any OpenCV windows
cap.release()
cv2.destroyAllWindows()

ModuleNotFoundError: No module named 'mediapipe'

In [13]:
!pip install mediapipe

