In [3]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
import keras # you can import keras as well
print("Keras version:", keras.__version__) # This will print the Keras version bundled with TF


TensorFlow version: 2.13.1
Keras version: 2.13.1


In [None]:
import tensorflow as tf
import cv2
import numpy as np
label=['closedFist', 'multiFingerBend', 'openPalm', 'singleFingerBend']

# Load the model
model = tf.keras.models.load_model('gesture_recogniser.keras')

# Webcam setup
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Cannot open webcam")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # Preprocess the frame (same as in your single image code):
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Color conversion is important
    img = cv2.resize(img, (256, 256))
    img = np.expand_dims(img, axis=0)
    img = img / 255.0

    # Make predictions:
    predictions = model.predict(img)
    predicted_label = np.argmax(predictions, axis=1)[0]
    predicted_label = label[predicted_label]  # Use your label mapping
    probability = np.max(predictions, axis=1)[0]

    # Display results on the frame:
    label_text = f"Label: {predicted_label} | Prob: {probability:.2f}"

    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.5  # Adjust as needed
    font_thickness = 2
    text_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0]

    text_x = 10  # X position
    text_y = 30  # Y position

    cv2.putText(frame, label_text, (text_x, text_y), font, font_scale, (0, 255, 0), font_thickness, cv2.LINE_AA) # Green color

    cv2.imshow('Webcam Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [22]:
import tensorflow as tf
import cv2
import numpy as np
label=['closedFist', 'multiFingerBend', 'openPalm', 'singleFingerBend']

# Function to preprocess the image (you may need to adjust based on your model's requirements)
def preprocess_image(image_path, target_size=(256, 256)):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, target_size)
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    image = image / 255.0  # Normalize image if required
    return image

# Prediction function
def predict_single_image(image_path, model):
    image = preprocess_image(image_path)
    predictions = model.predict(image)
    print(predictions)
    predicted_label = np.argmax(predictions, axis=1)[0]
    predicted_label=label[predicted_label]
    probability = np.max(predictions, axis=1)[0]
    return predicted_label, probability

# Load the model
model = tf.keras.models.load_model('gesture_recogniser.keras')

# Image path
image_path = "close_fist.jpeg"  # Replace with your image path

# Get prediction
predicted_label, probability = predict_single_image(image_path, model)

# Read the image again to display it
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Define the label and probability to display
label_text = f"Label: {predicted_label} | Prob: {probability:.2f}"

# Set font and size
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.3  # Adjust font scale to make it fit
font_thickness = 1
text_size = cv2.getTextSize(label_text, font, font_scale, font_thickness)[0]

# Calculate text position (top-left corner)
text_x = 1  # X position
text_y = 30  # Y position (ensure it's not too close to the edge)

# Add the label and probability to the image
cv2.putText(image, label_text, (text_x, text_y), font, font_scale, (255, 0, 0), font_thickness, cv2.LINE_AA)

# Display the image with the label
cv2.imshow("res",image)
cv2.waitKey(0)
cv2.destroyAllWindows()



[[3.92240928e-21 1.00829735e-11 9.99998450e-01 1.58998296e-06]]


In [None]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO

# Load YOLO model
model = YOLO("yolov8x.pt")

# Open video file
cap = cv2.VideoCapture("/home/suraj/PlayGround/myprograms/240335.mp4")

# Define target classes for detection
target_classes = ['car', 'bus', 'truck', 'person']

# Polygon points for ROI
pts = []
roi_set = False  # Flag to check if ROI is set

count = 0
number_of_photos = 3

# Function to draw ROI polygon
def draw_polygon(event, x, y, flags, param):
    global pts, roi_set
    if event == cv2.EVENT_LBUTTONDOWN:
        print(x, y)
        pts.append((x, y))
    elif event == cv2.EVENT_RBUTTONDOWN:  # Right-click to clear polygon
        pts = []
        roi_set = False  # Reset ROI flag
    if len(pts) >= 4 and not roi_set:  # Once polygon is complete
        roi_set = True

# Function to check if a point is inside the polygon
def inside_polygon(point, polygon):
    return cv2.pointPolygonTest(np.array(polygon, np.int32), (point[0], point[1]), False) >= 0

# Register mouse callback
cv2.namedWindow('Video')
cv2.setMouseCallback('Video', draw_polygon)

# Preprocess function for YOLO model
def preprocess(img):
    height, width = img.shape[:2]
    ratio = height / width
    return cv2.resize(img, (640, int(640 * ratio)))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_detected = frame.copy()
    frame = preprocess(frame)
    
    # Run YOLO detection
    results = model(frame)

    for result in results:
        for box in result.boxes:
            cls = int(box.cls[0])
            name = model.names[cls]

            if name in target_classes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                center_x = (x1 + x2) // 2
                center_y = (y1 + y2) // 2

                # Draw bounding box and labels
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 3)
                cv2.putText(frame, name, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
                cv2.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1)

                # Draw polygon ROI if set
                if roi_set:
                    frame_copy = frame.copy()
                    cv2.fillPoly(frame_copy, [np.array(pts)], (0, 255, 0))
                    frame = cv2.addWeighted(frame_copy, 0.2, frame, 0.8, 0)

                    # Check if person is inside polygon
                    if inside_polygon((center_x, center_y), pts) and name in target_classes:
                        mask = np.zeros_like(frame_detected)
                        points = np.array([[x1, y1], [x1, y2], [x2, y2], [x2, y1]])
                        mask = cv2.fillPoly(mask, [points], (255, 255, 255))
                        frame_detected = cv2.bitwise_and(frame_detected, mask)

                        # # Save detected image
                        # if count < number_of_photos:
                        #     cv2.imwrite(f"Detected Photos/detected_{count}.jpg", frame_detected)
                        #     count += 1

                        # Display warning text
                        cv2.putText(frame, "Target", (center_x, center_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                        cv2.putText(frame, "Person Detected!", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 3)

    cv2.imshow("Video", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Press 'q' to exit
        break
    elif key == ord('c'):  # Press 'c' to clear the polygon
        pts.clear()
        roi_set = False

cap.release()
cv2.destroyAllWindows()


### With time and movement detection

In [11]:
import cv2
import torch
import numpy as np
import time
from ultralytics import YOLO

# Load YOLO model
model = YOLO("yolov8n.pt")

# Open video file
cap = cv2.VideoCapture(0)

# Define target classes for detection
target_classes = ['car', 'bus', 'truck', 'person']

# Polygon points for ROI
pts = []
roi_set = False  # Flag to check if ROI is set
count = 0
number_of_photos = 3

# Tracking person position and time
person_in_roi = False
last_position = None
time_in_roi = 0  # In seconds
movement_threshold = 100  # Threshold for movement (in pixels)
time_threshold = 10  # Time threshold for alert (in seconds)

# Function to draw ROI polygon
def draw_polygon(event, x, y, flags, param):
    global pts, roi_set
    if event == cv2.EVENT_LBUTTONDOWN:
        print(x, y)
        pts.append((x, y))
    elif event == cv2.EVENT_RBUTTONDOWN:  # Right-click to clear polygon
        pts = []
        roi_set = False  # Reset ROI flag
    if len(pts) >= 4 and not roi_set:  # Once polygon is complete
        roi_set = True

# Function to check if a point is inside the polygon
def inside_polygon(point, polygon):
    return cv2.pointPolygonTest(np.array(polygon, np.int32), (point[0], point[1]), False) >= 0

# Register mouse callback
cv2.namedWindow('Video')
cv2.setMouseCallback('Video', draw_polygon)

# Preprocess function for YOLO model
def preprocess(img):
    height, width = img.shape[:2]
    ratio = height / width
    return cv2.resize(img, (640, int(640 * ratio)))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_detected = frame.copy()
    frame = preprocess(frame)
    
    # Run YOLO detection
    results = model(frame)

    for result in results:
        for box in result.boxes:
            cls = int(box.cls[0])
            name = model.names[cls]

            if name in target_classes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                center_x = (x1 + x2) // 2
                center_y = (y1 + y2) // 2

                # Draw bounding box and labels
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 3)
                cv2.putText(frame, name, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
                cv2.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1)

                # Draw polygon ROI if set
                if roi_set:
                    frame_copy = frame.copy()
                    cv2.fillPoly(frame_copy, [np.array(pts)], (0, 255, 0))
                    frame = cv2.addWeighted(frame_copy, 0.2, frame, 0.8, 0)

                    # Check if person is inside polygon
                    if inside_polygon((center_x, center_y), pts) and name == 'person':
                        mask = np.zeros_like(frame_detected)
                        points = np.array([[x1, y1], [x1, y2], [x2, y2], [x2, y1]])
                        mask = cv2.fillPoly(mask, [points], (255, 255, 255))
                        frame_detected = cv2.bitwise_and(frame_detected, mask)

                        # Save detected image
                        if count < number_of_photos:
                            cv2.imwrite(f"Detected Photos/detected_{count}.jpg", frame_detected)
                            count += 1

                        # Display warning text
                        cv2.putText(frame, "Target", (center_x, center_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                        cv2.putText(frame, "Person Detected!", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 3)

                        # Check if the person has been in the ROI without moving
                        if last_position is None:
                            last_position = (center_x, center_y)
                            time_in_roi = time.time()  # Start timer when person enters ROI
                        else:
                            # Check if person is moving
                            distance = np.linalg.norm(np.array([center_x, center_y]) - np.array(last_position))
                            if distance < movement_threshold:  # If the movement is minimal
                                # Calculate how long the person has been stationary in the ROI
                                if time.time() - time_in_roi >= time_threshold:
                                    cv2.putText(frame, "ALERT: Person not moving!", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                            else:
                                # If moving, reset the timer
                                time_in_roi = time.time()

                            last_position = (center_x, center_y)  # Update last position

    cv2.imshow("Video", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Press 'q' to exit
        break
    elif key == ord('c'):  # Press 'c' to clear the polygon
        pts.clear()
        roi_set = False
        last_position = None
        time_in_roi = 0

cap.release()
cv2.destroyAllWindows()



0: 480x640 2 persons, 136.6ms
Speed: 5.6ms preprocess, 136.6ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 150.1ms
Speed: 6.7ms preprocess, 150.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 177.6ms
Speed: 6.1ms preprocess, 177.6ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 253.1ms
Speed: 3.9ms preprocess, 253.1ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 134.3ms
Speed: 6.5ms preprocess, 134.3ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 110.9ms
Speed: 3.3ms preprocess, 110.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 89.9ms
Speed: 2.3ms preprocess, 89.9ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 86.5ms
Speed: 2.4ms preprocess, 86.5ms inference, 1.5ms postprocess per image 