In [1]:
import cv2
import os

# Path to Hand folder
IMG_DIR = "Hand"
VID_DIR = "Handvid"

images = []

# Load images
for img in os.listdir(IMG_DIR):
    img_path = os.path.join(IMG_DIR, img)
    image = cv2.imread(img_path)
    if image is not None:
        images.append(image)

videos = []

# Load frames from videos
for vid in os.listdir(VID_DIR):
    vid_path = os.path.join(VID_DIR, vid)
    cap = cv2.VideoCapture(vid_path)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # Add check for valid frame
        if frame is not None:
            videos.append(frame)
    cap.release()

print(f"Loaded {len(videos)} frames")
print(f"Loaded {len(images)} images")

# Placeholder values for width and height (replace with actual values)
width = 224
height = 224

Loaded 1269 frames
Loaded 21 images


In [3]:
import cv2
import os

# Path to directory containing images
IMG_DIR = "Hand"

# List to store images
images = []

# Load images from directory
for img_name in os.listdir(IMG_DIR):
    img_path = os.path.join(IMG_DIR, img_name)
    image = cv2.imread(img_path)
    if image is not None:
        images.append(image)

# Placeholder values for width and height (replace with actual values)
width = 224
height = 224

# Define preprocess_images function
def preprocess_images(images, width, height):
    processed_images = []
    for image in images:
        resized_image = cv2.resize(image, (width, height))
        processed_images.append(resized_image)
    return processed_images

# Call preprocess_images function
processed_images = preprocess_images(images, width, height)


In [4]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model

# Initialize the ResNet50 model for feature extraction
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.layers[-1].output)

# Function to extract image features using the pre-trained ResNet50 model
def extract_image_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to read image: {image_path}")
        return None
    image = cv2.resize(image, (224, 224))
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)
    features = feature_extractor.predict(image)
    return features.flatten()

# Function to extract video features using the pre-trained ResNet50 model
def extract_video_features(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Unable to open video: {video_path}")
        return None
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_features = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = np.expand_dims(frame, axis=0)
        frame = preprocess_input(frame)
        features = feature_extractor.predict(frame)
        video_features.append(features)
    cap.release()
    if len(video_features) == 0:
        print(f"No frames found in video: {video_path}")
        return None
    video_features = np.mean(video_features, axis=0)
    return video_features.flatten()

# Process images in the "Hand" directory
image_dir = "Hand"
image_files = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('.jpg', '.jpeg', '.png'))]
image_features_list = []
image_labels = []
for image_file in image_files:
    image_features = extract_image_features(image_file)
    if image_features is not None:
        image_features_list.append(image_features)
        image_labels.append('image')  # Label for images

# Process videos in the "Handvid" directory
video_dir = "Handvid"
video_files = [os.path.join(video_dir, file) for file in os.listdir(video_dir) if file.endswith(('.mp4', '.avi'))]
video_features_list = []
video_labels = []
for video_file in video_files:
    video_features = extract_video_features(video_file)
    if video_features is not None:
        video_features_list.append(video_features)
        video_labels.append('video')  # Label for videos

# Combine image and video features and labels
all_features = image_features_list + video_features_list
all_labels = image_labels + video_labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)

# Train a Support Vector Machine classifier
classifier = SVC(kernel='linear')
classifier.fit(X_train, y_train)

# Evaluate the classifier
accuracy = classifier.score(X_test, y_test)
print("Accuracy:", accuracy)





Accuracy: 1.0


In [1]:
import cv2

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam")
    exit()

# Initialize the background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2()

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame")
        break
    
    # Apply background subtraction to detect the hand
    fg_mask = bg_subtractor.apply(frame)

    # Apply thresholding to obtain binary image
    _, thresh = cv2.threshold(fg_mask, 200, 255, cv2.THRESH_BINARY)

    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Draw bounding boxes around hand regions
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Display the frame with hand bounding boxes
    cv2.imshow("Hand Detection", frame)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Press 'q' to quit
        break

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()


In [2]:
import cv2
import numpy as np

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam")
    exit()

# Initialize the background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2()

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()
    
    # Check if the frame was read successfully
    if ret:
        # Apply background subtraction to detect the hand
        fg_mask = bg_subtractor.apply(frame)

        # Apply thresholding to obtain binary image
        _, thresh = cv2.threshold(fg_mask, 200, 255, cv2.THRESH_BINARY)

        # Find contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Draw bounding boxes around hand regions and calculate centroids
        centroids = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            centroids.append((x + w // 2, y + h // 2))

        # If two hands are detected
        if len(centroids) == 2:
            # Calculate the distance between the centroids
            distance = calculate_distance(centroids[0], centroids[1])
            cv2.putText(frame, f"Distance: {distance:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

            # Determine zoom gesture based on distance
            if distance < 100:  # Adjust threshold as needed
                cv2.putText(frame, "Zoom Out Gesture", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            else:
                cv2.putText(frame, "Zoom In Gesture", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Display the frame with hand bounding boxes
        cv2.imshow("Hand Detection", frame)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Press 'q' to quit
        break

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()


In [6]:
import cv2
import numpy as np

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Function to zoom in on an image
def zoom_in(image, zoom_factor=1.1):
    height, width = image.shape[:2]
    new_height = int(height * zoom_factor)
    new_width = int(width * zoom_factor)
    return cv2.resize(image, (new_width, new_height))

# Function to zoom out on an image
def zoom_out(image, zoom_factor=0.9):
    height, width = image.shape[:2]
    new_height = int(height * zoom_factor)
    new_width = int(width * zoom_factor)
    return cv2.resize(image, (new_width, new_height))

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam")
    exit()

# Initialize the background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2()

# Load a sample image (for demonstration)
ret, sample_image = cap.read()
if not ret:
    print("Error: Could not read image from webcam")
    exit()

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()
    
    # Check if the frame was read successfully
    if ret:
        # Apply background subtraction to detect the hand
        fg_mask = bg_subtractor.apply(frame)

        # Apply thresholding to obtain binary image
        _, thresh = cv2.threshold(fg_mask, 200, 255, cv2.THRESH_BINARY)

        # Find contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Draw bounding boxes around hand regions and calculate centroids
        centroids = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            centroids.append((x + w // 2, y + h // 2))

        # If two hands are detected
        if len(centroids) == 2:
            # Calculate the distance between the centroids
            distance = calculate_distance(centroids[0], centroids[1])

            # Determine zoom gesture based on distance
            if distance < 100:  # Adjust threshold as needed
                sample_image = zoom_out(sample_image)
            else:
                sample_image = zoom_in(sample_image)

        # Display the sample image with zooming
        cv2.imshow("Zoomed Image", sample_image)

        # Display the frame with hand bounding boxes
        cv2.imshow("Hand Detection", frame)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Press 'q' to quit
        break

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()


In [2]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam")
    exit()

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()

    # Check if the frame was read successfully
    if ret:
        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(rgb_frame)

        # If hand landmarks are detected
        if results.multi_hand_landmarks:
            # Loop through each detected hand
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw hand landmarks on the frame
                for landmark in hand_landmarks.landmark:
                    x, y = int(landmark.x * frame.shape[1]), int(landmark.y * frame.shape[0])
                    cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)

        # Display the frame with hand landmarks
        cv2.imshow("Hand Detection", frame)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Press 'q' to quit
        break

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()


In [3]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# Initialize variables
prev_distance = None
zoom_factor = 1.0  # Initial zoom factor

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        # Extract landmarks for the first hand
        hand_landmarks = results.multi_hand_landmarks[0].landmark

        # Extract thumb and index finger landmarks
        thumb = (int(hand_landmarks[4].x * frame.shape[1]), int(hand_landmarks[4].y * frame.shape[0]))
        index_finger = (int(hand_landmarks[8].x * frame.shape[1]), int(hand_landmarks[8].y * frame.shape[0]))

        # Calculate distance between thumb and index finger
        distance = calculate_distance(thumb, index_finger)

        # Update zoom factor based on distance change
        if prev_distance is not None:
            zoom_factor += (distance - prev_distance) / 150  # Adjust scaling factor as needed

        prev_distance = distance

        # Draw blue dots at thumb and index finger positions
        cv2.circle(frame, thumb, 5, (255, 0, 0), -1)
        cv2.circle(frame, index_finger, 5, (255, 0, 0), -1)

    # Display the frame
    cv2.imshow('Hand Gesture Zoom', frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


In [2]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Initialize variables
prev_distance = None
zoom_factor = 1.0  # Initial zoom factor

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        # Extract landmarks for the first hand
        hand_landmarks = results.multi_hand_landmarks[0].landmark

        # Extract thumb and index finger landmarks
        thumb = (int(hand_landmarks[4].x * frame.shape[1]), int(hand_landmarks[4].y * frame.shape[0]))
        index_finger = (int(hand_landmarks[8].x * frame.shape[1]), int(hand_landmarks[8].y * frame.shape[0]))

        # Calculate distance between thumb and index finger
        distance = calculate_distance(thumb, index_finger)

        # Update zoom factor based on change in distance between thumb and index finger
        if prev_distance is not None:
            delta_distance = distance - prev_distance
            if delta_distance > 0:
                # Zoom-in gesture: Distance increases over time
                zoom_factor *= 1.05  # Increase zoom factor by 5%
            elif delta_distance < 0:
                # Zoom-out gesture: Distance decreases over time
                zoom_factor *= 0.95  # Decrease zoom factor by 5%

        prev_distance = distance

        # Draw blue dots at thumb and index finger positions
        cv2.circle(frame, thumb, 5, (255, 0, 0), -1)
        cv2.circle(frame, index_finger, 5, (255, 0, 0), -1)

    # Resize the frame based on zoom factor
    resized_frame = cv2.resize(frame, None, fx=zoom_factor, fy=zoom_factor, interpolation=cv2.INTER_LINEAR)

    # Display the frame
    cv2.imshow('Hand Gesture Zoom', resized_frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 

In [18]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# Initialize variables
prev_distance = None
zoom_factor = 1.0  # Initial zoom factor

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Function to resize image based on zoom factor
def resize_image(image, zoom_factor):
    if zoom_factor <= 0:
        zoom_factor = 1
    new_width = int(image.shape[1] * zoom_factor)
    new_height = int(image.shape[0] * zoom_factor)
    return cv2.resize(image, (new_width, new_height))

# Open webcam
cap = cv2.VideoCapture(0)

# Load the image
image_path = "akash.jpg"
image = cv2.imread(image_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a later selfie-view display
    frame = cv2.flip(frame, 1)

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        # Extract landmarks for the first hand
        hand_landmarks = results.multi_hand_landmarks[0].landmark

        # Extract thumb and index finger landmarks
        thumb = (int(hand_landmarks[4].x * frame.shape[1]), int(hand_landmarks[4].y * frame.shape[0]))
        index_finger = (int(hand_landmarks[8].x * frame.shape[1]), int(hand_landmarks[8].y * frame.shape[0]))

        # Calculate distance between thumb and index finger
        distance = calculate_distance(thumb, index_finger)

        # Update zoom factor based on distance change
        if prev_distance is not None:
            zoom_factor += (distance - prev_distance) / 150  # Adjust scaling factor as needed
            image_resized = resize_image(image, zoom_factor)

        prev_distance = distance

        # Draw blue dots at thumb and index finger positions
        cv2.circle(frame, thumb, 5, (255, 0, 0), -1)
        cv2.circle(frame, index_finger, 5, (255, 0, 0), -1)

    # Display the webcam feed
    cv2.imshow('Hand Gesture Zoom', frame)

    # Display the image if it exists
    if 'image_resized' in locals():
        cv2.imshow('Image', image_resized)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()

In [19]:
import cv2
import numpy as np
# Load your trained machine learning model
# model = load_model('your_model.h5')  # Example, use appropriate loading function

# Initialize variables
prev_distance = None
zoom_factor = 1.0  # Initial zoom factor

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a later selfie-view display
    frame = cv2.flip(frame, 1)

    # Process frame for gesture recognition using your model
    # predictions = model.predict(frame)  # Example, replace with your inference code

    # Process predictions to recognize hand gestures

    # Adjust zoom factor based on recognized gestures

    # Resize image based on zoom factor

    # Display the frame
    cv2.imshow('Hand Gesture Zoom', frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np

# Load your trained machine learning model
# Replace this with your actual model loading code
def load_model():
    # Example function to load the model
    return None  # Placeholder for the model object

model = load_model()

# Initialize variables
prev_distance = None
zoom_factor = 1.0  # Initial zoom factor

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# Function to adjust zoom factor based on hand gestures
def adjust_zoom(predictions):
    # Placeholder function to adjust zoom factor based on predictions
    # You need to implement this based on your gesture recognition logic
    return zoom_factor  # Placeholder return value

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally for a later selfie-view display
    frame = cv2.flip(frame, 1)

    # Process frame for gesture recognition using your model
    # Replace this with your actual inference code
    # predictions = model.predict(frame)  # Example, replace with your inference code
    predictions = None  # Placeholder for predictions

    # Adjust zoom factor based on recognized gestures
    if predictions is not None:
        zoom_factor = adjust_zoom(predictions)

    # Resize image based on zoom factor
    frame_resized = cv2.resize(frame, None, fx=zoom_factor, fy=zoom_factor, interpolation=cv2.INTER_LINEAR)

    # Display the frame
    cv2.imshow('Hand Gesture Zoom', frame_resized)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


In [23]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model

# Initialize the ResNet50 model for feature extraction
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.layers[-1].output)

# Function to extract image features using the pre-trained ResNet50 model
def extract_image_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to read image: {image_path}")
        return None
    image = cv2.resize(image, (224, 224))
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)
    features = feature_extractor.predict(image)
    return features.flatten()

# Function to extract video features using the pre-trained ResNet50 model
def extract_video_features(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Unable to open video: {video_path}")
        return None
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_features = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = np.expand_dims(frame, axis=0)
        frame = preprocess_input(frame)
        features = feature_extractor.predict(frame)
        video_features.append(features)
    cap.release()
    if len(video_features) == 0:
        print(f"No frames found in video: {video_path}")
        return None
    video_features = np.mean(video_features, axis=0)
    return video_features.flatten()

# Process images in the "Hand" directory
image_dir = "Hand"
image_files = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('.jpg', '.jpeg', '.png'))]
image_features_list = []
image_labels = []
for image_file in image_files:
    image_features = extract_image_features(image_file)
    if image_features is not None:
        image_features_list.append(image_features)
        image_labels.append('image')  # Label for images

# Process videos in the "Handvid" directory
video_dir = "Handvid"
video_files = [os.path.join(video_dir, file) for file in os.listdir(video_dir) if file.endswith(('.mp4', '.avi'))]
video_features_list = []
video_labels = []
for video_file in video_files:
    video_features = extract_video_features(video_file)
    if video_features is not None:
        video_features_list.append(video_features)
        video_labels.append('video')  # Label for videos

# Combine image and video features and labels
all_features = image_features_list + video_features_list
all_labels = image_labels + video_labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)

# Train a Support Vector Machine classifier
classifier = SVC(kernel='linear')
classifier.fit(X_train, y_train)

# Save the trained classifier to the current directory
model_path = "svm_classifier.pkl"  # Define the path for the model file
joblib.dump(classifier, model_path)

# Evaluate the classifier
accuracy = classifier.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 1.0


In [7]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
import joblib

# Initialize the ResNet50 model for feature extraction
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.layers[-1].output)

# Function to extract image features using the pre-trained ResNet50 model
def extract_image_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to read image: {image_path}")
        return None
    image = cv2.resize(image, (224, 224))
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)
    features = feature_extractor.predict(image)
    return features.flatten()

# Load the saved SVM classifier model
model_filename = "svm_classifier.pkl"
if os.path.exists(model_filename):
    classifier = joblib.load(model_filename)
    print(f"Model loaded from {model_filename}")
else:
    print(f"Model file {model_filename} not found. Please train the model first.")
    exit()

# Function to predict the label of new data
def predict_label(features):
    label = classifier.predict([features])[0]
    return label

# Process images in the "Hand" directory
image_dir = "Hand"
image_features = []
for file in os.listdir(image_dir):
    if file.endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(image_dir, file)
        features = extract_image_features(image_path)
        if features is not None:
            image_features.append(features)



Model loaded from svm_classifier.pkl


In [10]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
import joblib

# Initialize the ResNet50 model for feature extraction
base_model = None  # No need to initialize the ResNet50 model for prediction

# Function to extract image features using the pre-trained ResNet50 model
def extract_image_features(image_dir):
    image_features_list = []
    for file in os.listdir(image_dir):
        if file.endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(image_dir, file)
            image = cv2.imread(image_path)
            if image is not None:
                image = cv2.resize(image, (224, 224))
                image = np.expand_dims(image, axis=0)
                image = preprocess_input(image)
                image_features = extract_features(image)
                if image_features is not None:
                    image_features_list.append(image_features)
                else:
                    print(f"Unable to extract features from image: {image_path}")
            else:
                print(f"Unable to read image: {image_path}")
    return image_features_list

# Function to extract features from an image
def extract_features(image):
    global base_model
    if base_model is None:
        # Initialize the ResNet50 model for feature extraction
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.layers[-1].output)
    features = feature_extractor.predict(image)
    return features.flatten() if features is not None else None

# Load the saved SVM classifier model
model_filename = "svm_classifier.pkl"
if os.path.exists(model_filename):
    classifier = joblib.load(model_filename)
    print(f"Model loaded from {model_filename}")
else:
    print(f"Model file {model_filename} not found. Please train the model first.")
    exit()

# Function to predict the label of new data
def predict_label(features):
    label = classifier.predict([features])[0]
    return label

# Example usage: Predict the label of images in a directory
image_dir = "Hand"  # Change this to the path of your image directory
image_features = extract_image_features(image_dir)
if image_features:
    for features in image_features:
        predicted_label = predict_label(features)
        print("Predicted label:", predicted_label)
else:
    print("No images found in the directory:", image_dir)


Model loaded from svm_classifier.pkl
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image
Predicted label: image


In [39]:
import cv2
import numpy as np
import pickle

# Load the trained SVM classifier from the .pkl file
with open('svm_model.pkl', 'rb') as f:
    svm_classifier = pickle.load(f)

# Placeholder values for image width and height (replace with actual values)
width = 224
height = 224

# Function to detect hands in a frame
def detect_hands(frame):
    # Placeholder for hand detection using a simple color-based approach
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    lower_bound = np.array([0, 40, 60])
    upper_bound = np.array([20, 150, 255])
    mask = cv2.inRange(hsv, lower_bound, upper_bound)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    hands = []
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area > 1000:  # Adjust threshold as needed
            x, y, w, h = cv2.boundingRect(cnt)
            hands.append((x, y, w, h))
    return hands

# Function to extract features from the detected hand
def extract_features(hand):
    # Placeholder for feature extraction using simple statistics
    x, y, w, h = hand
    roi = frame[y:y+h, x:x+w]
    resized_roi = cv2.resize(roi, (width, height))
    feature_vector = resized_roi.flatten()
    
    # Ensure the feature vector has the correct length (100352 features)
    if len(feature_vector) != 100352:
        # Pad or truncate the feature vector to the correct length
        if len(feature_vector) < 100352:
            feature_vector = np.pad(feature_vector, (0, 100352 - len(feature_vector)))
        else:
            feature_vector = feature_vector[:100352]
    
    return feature_vector

# Function to recognize gestures using the trained SVM classifier
def recognize_gesture(feature_vector):
    # Use the trained SVM classifier to predict the gesture label
    predicted_label = svm_classifier.predict([feature_vector])
    return predicted_label[0]

# Capture video stream from the device's camera
cap = cv2.VideoCapture(0)

# Check if the camera opened successfully
if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

while True:
    # Read a frame from the video stream
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to receive frame from camera.")
        break
    
    # Detect hands in the frame
    hands = detect_hands(frame)
    
    # Extract features from the detected hands and recognize gestures
    for hand in hands:
        x, y, w, h = hand  # Unpack the coordinates of the bounding box
        feature_vector = extract_features(hand)
        gesture_label = recognize_gesture(feature_vector)
        
        # Display the recognized gesture label on the frame
        cv2.putText(frame, gesture_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2, cv2.LINE_AA)
    
    # Display the video feed
    cv2.imshow('Hand Gesture Recognition', frame)
    
    # Check for key press to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()
