In [1]:
#floodfill segmentation camera
import cv2
import mediapipe as mp
import numpy as np

mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(3)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    h, w, _ = frame.shape
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Index Finger Tip Coordinates
            x, y = int(hand_landmarks.landmark[8].x * w), int(hand_landmarks.landmark[8].y * h)

            # Ensure seed point is within image bounds
            x = max(0, min(x, w - 1))
            y = max(0, min(y, h - 1))

            seed_point = (x, y)
            mask = np.zeros((h + 2, w + 2), np.uint8)  # Mask for flood fill

            # Flood fill parameters
            flood_fill_color = (255, 255, 255)
            lower_diff = (10, 10, 10)
            upper_diff = (10, 10, 10)

            # Perform flood fill
            cv2.floodFill(frame, mask, seed_point, flood_fill_color, lower_diff, upper_diff, flags=cv2.FLOODFILL_MASK_ONLY)

            # Convert mask into segmentation output
            segmented = cv2.bitwise_and(frame, frame, mask=mask[1:-1, 1:-1])  # Remove border padding

            cv2.imshow("Segmented Object", segmented)

    cv2.imshow("Hand Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


#### Canny - mobile SAM - point prompt

In [None]:

import cv2
import mediapipe as mp
import numpy as np

from ultralytics import SAM


seg_model = SAM("mobile_sam.pt")

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Smoothing factor for fingertip positions
SMOOTHING_FACTOR = 0.3
last_positions = {}
count = 0
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape

            # Index fingertip (primary pointer)
            index_fingertip = [5, 8]  # Index finger start and tip
            start_idx, end_idx = index_fingertip
            start = np.array([hand_landmarks.landmark[start_idx].x, hand_landmarks.landmark[start_idx].y])
            end = np.array([hand_landmarks.landmark[end_idx].x, hand_landmarks.landmark[end_idx].y])

            # Compute direction vector and extend the fingertip position
            vector = end - start
            vector /= np.linalg.norm(vector)
            extended_tip = end + vector * 0.1  # Extend by a small factor

            # Convert to pixel coordinates
            extended_tip_pixel = (int(extended_tip[0] * w), int(extended_tip[1] * h))
            #smoothning
            if start_idx not in last_positions:
                last_positions[start_idx] = extended_tip_pixel
            smoothed_tip = (
                int(last_positions[start_idx][0] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[0] * SMOOTHING_FACTOR),
                int(last_positions[start_idx][1] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[1] * SMOOTHING_FACTOR)
            )
            last_positions[start_idx] = smoothed_tip

            x, y = smoothed_tip
            # Define small region around fingertip
            roi_size = 50
            x1, y1 = max(0, x - roi_size), max(0, y - roi_size)
            x2, y2 = min(w, x + roi_size), min(h, y + roi_size)
            roi = frame[y1:y2, x1:x2]

            # Convert ROI to grayscale for edge detection
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray, 50, 150)  # Edge detection

            # Expand segmentation outward by detecting contours
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            for cnt in contours:
                cv2.drawContours(frame[y1:y2, x1:x2], [cnt], -1, (0, 255, 0), 2)  # Green boundary

            # Draw rectangle around detected region
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            count += 1
            if count % 1 == 0:
                x = int((x1+x2)/2)
                y = int((y1+y2)/2)
                seg_frame = seg_model.predict(frame,points=[x,y])
                cv2.imshow("Segmented Object", seg_frame[0].plot())                
    cv2.imshow("Edge-Based Segmentation", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


#### Canny - mobile SAM - box prompt

In [None]:

import cv2
import mediapipe as mp
import numpy as np

from ultralytics import SAM


seg_model = SAM("mobile_sam.pt")

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Smoothing factor for fingertip positions
SMOOTHING_FACTOR = 0.3
last_positions = {}
count = 0
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue
    frame  =  cv2.resize(frame,(640,360))
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape

            # Index fingertip (primary pointer)
            index_fingertip = [5, 8]  # Index finger start and tip
            start_idx, end_idx = index_fingertip
            start = np.array([hand_landmarks.landmark[start_idx].x, hand_landmarks.landmark[start_idx].y])
            end = np.array([hand_landmarks.landmark[end_idx].x, hand_landmarks.landmark[end_idx].y])

            # Compute direction vector and extend the fingertip position
            vector = end - start
            vector /= np.linalg.norm(vector)
            extended_tip = end + vector * 0.1  # Extend by a small factor

            # Convert to pixel coordinates
            extended_tip_pixel = (int(extended_tip[0] * w), int(extended_tip[1] * h))
            #smoothning
            if start_idx not in last_positions:
                last_positions[start_idx] = extended_tip_pixel
            smoothed_tip = (
                int(last_positions[start_idx][0] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[0] * SMOOTHING_FACTOR),
                int(last_positions[start_idx][1] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[1] * SMOOTHING_FACTOR)
            )
            last_positions[start_idx] = smoothed_tip

            x, y = smoothed_tip
            # Define small region around fingertip
            roi_size = 50
            x1, y1 = max(0, x - roi_size), max(0, y - roi_size)
            x2, y2 = min(w, x + roi_size), min(h, y + roi_size)
            roi = frame[y1:y2, x1:x2]

            # Convert ROI to grayscale for edge detection
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray, 50, 150)  # Edge detection

            # Expand segmentation outward by detecting contours
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            for cnt in contours:
                cv2.drawContours(frame[y1:y2, x1:x2], [cnt], -1, (0, 255, 0), 2)  # Green boundary

            # Draw rectangle around detected region
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            x = int((x1+x2)/2)
            y = int((y1+y2)/2)
            # seg_frame = seg_model.predict(frame,points=[x,y])
            seg_frame = seg_model.predict(frame,bboxes=[x1,y1,x2,y2])
            cv2.imshow("Segmented Object", seg_frame[0].plot())                
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


#### Canny - FastSAM - point prompt

In [59]:

import cv2
import mediapipe as mp
import numpy as np
import os
import os

from ultralytics import FastSAM


seg_model = FastSAM("FastSAM-s.pt") 


# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Smoothing factor for fingertip positions
SMOOTHING_FACTOR = 0.3
last_positions = {}
count = 0
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue
    frame  =  cv2.resize(frame,(640,360))
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape

            # Index fingertip (primary pointer)
            index_fingertip = [5, 8]  # Index finger start and tip
            start_idx, end_idx = index_fingertip
            start = np.array([hand_landmarks.landmark[start_idx].x, hand_landmarks.landmark[start_idx].y])
            end = np.array([hand_landmarks.landmark[end_idx].x, hand_landmarks.landmark[end_idx].y])

            # Compute direction vector and extend the fingertip position
            vector = end - start
            vector /= np.linalg.norm(vector)
            extended_tip = end + vector * 0.1  # Extend by a small factor

            # Convert to pixel coordinates
            extended_tip_pixel = (int(extended_tip[0] * w), int(extended_tip[1] * h))
            #smoothning
            if start_idx not in last_positions:
                last_positions[start_idx] = extended_tip_pixel
            smoothed_tip = (
                int(last_positions[start_idx][0] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[0] * SMOOTHING_FACTOR),
                int(last_positions[start_idx][1] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[1] * SMOOTHING_FACTOR)
            )
            last_positions[start_idx] = smoothed_tip

            x, y = smoothed_tip
            # Define small region around fingertip
            roi_size = 20
            x1, y1 = max(0, x - roi_size), max(0, y - roi_size)
            x2, y2 = min(w, x + roi_size), min(h, y + roi_size)
            roi = frame[y1:y2, x1:x2]

            # Convert ROI to grayscale for edge detection
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray, 50, 150)  # Edge detection
            canny_frame = frame.copy()
            # Expand segmentation outward by detecting contours
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            for cnt in contours:
                cv2.drawContours(canny_frame[y1:y2, x1:x2], [cnt], -1, (0, 255, 0), 2)  # Green boundary

            # Draw rectangle around detected region
            cv2.rectangle(canny_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            x = int((x1+x2)/2)
            y = int((y1+y2)/2)
            # seg_frame = seg_model.predict(frame,points=[x,y])
            blurred_frame = cv2.GaussianBlur(frame, (5, 5), 0)  # Apply mild blur

            seg_frame = seg_model.predict(blurred_frame,points=[x,y])[0]
            seg_frame_resize = cv2.resize(seg_frame.plot(),(1920,1080))
            seg_frame_plot = seg_frame_resize
            cv2.imshow("Segmented Object", seg_frame_plot)              
            cv2.imshow("Canny", canny_frame)  
    # cv2.imshow("orig frame", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1742638121.854453    8491 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742638121.872936   12398 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 550.120), renderer: NVIDIA GeForce GTX 1650 Ti/PCIe/SSE2
W0000 00:00:1742638121.899734   12383 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742638121.918895   12388 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.



0: 384x640 2 objects, 42.9ms
Speed: 3.1ms preprocess, 42.9ms inference, 180.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 object, 18.3ms
Speed: 1.3ms preprocess, 18.3ms inference, 6.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 objects, 17.9ms
Speed: 1.1ms preprocess, 17.9ms inference, 5.0ms postprocess per image at shape (1, 3, 384, 640)


QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to tar


0: 384x640 1 object, 16.3ms
Speed: 1.2ms preprocess, 16.3ms inference, 5.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 object, 14.4ms
Speed: 0.9ms preprocess, 14.4ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 14.7ms
Speed: 1.2ms preprocess, 14.7ms inference, 6.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 16.0ms
Speed: 1.8ms preprocess, 16.0ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 15.6ms
Speed: 2.0ms preprocess, 15.6ms inference, 9.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 object, 15.2ms
Speed: 1.0ms preprocess, 15.2ms inference, 8.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 object, 15.1ms
Speed: 1.4ms preprocess, 15.1ms inference, 6.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 object, 14.2ms
Speed: 2.5ms preprocess, 14.2ms inference, 8.6ms postprocess per ima

#### Canny roi segment

In [None]:
#Canny roi segment
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

SMOOTHING_FACTOR = 0.3
last_positions = {}

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue
    frame = cv2.resize(frame, (640, 360))
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape

            # Index fingertip (index finger tip)
            start_idx, end_idx = 5, 8  # Index finger start and tip
            start = np.array([hand_landmarks.landmark[start_idx].x, hand_landmarks.landmark[start_idx].y])
            end = np.array([hand_landmarks.landmark[end_idx].x, hand_landmarks.landmark[end_idx].y])

            # Compute direction vector and extend the fingertip position
            vector = end - start
            vector /= np.linalg.norm(vector)
            extended_tip = end + vector * 0.15

            # Convert to pixel coordinates
            extended_tip_pixel = (int(extended_tip[0] * w), int(extended_tip[1] * h))

            # Smooth movement
            if start_idx not in last_positions:
                last_positions[start_idx] = extended_tip_pixel
            smoothed_tip = (
                int(last_positions[start_idx][0] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[0] * SMOOTHING_FACTOR),
                int(last_positions[start_idx][1] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[1] * SMOOTHING_FACTOR)
            )
            last_positions[start_idx] = smoothed_tip

            x, y = smoothed_tip
            roi_size = 35
            x1, y1 = max(0, x - roi_size), max(0, y - roi_size)
            x2, y2 = min(w, x + roi_size), min(h, y + roi_size)
            roi = frame[y1:y2, x1:x2]

            # Convert ROI to grayscale for edge detection
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
            edges = cv2.Canny(blurred, 50, 150)


            # Close gaps in edges
            kernel = np.ones((3, 3), np.uint8)
            closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)

            # Find contours
            contours, _ = cv2.findContours(closed_edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            # Create a blank mask with the same shape as ROI
            mask = np.zeros_like(roi, dtype=np.uint8)

            for cnt in contours:
                area = cv2.contourArea(cnt)
                if area < 1000:  # Ignore small contours
                    continue
                color = np.random.randint(0, 255, (3,), dtype=np.uint8).tolist()
                cv2.drawContours(mask, [cnt], -1, color, thickness=cv2.FILLED)

            # Overlay segmented objects onto the original frame
            frame[y1:y2, x1:x2] = cv2.addWeighted(frame[y1:y2, x1:x2], 0.3, mask,0.7, 0)

            # Draw rectangle around detected region
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'{len(contours)}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    frame = cv2.resize(frame, (1920,1080))
    cv2.imshow("Segmented Objects", frame)  

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows() 

#### Canny - otsu - adaptive smoothing

In [None]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

SMOOTHING_FACTOR = 0.3
last_positions = {}

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue
    # frame = cv2.resize(frame, (640, 360))
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape

            # Index fingertip
            start_idx, end_idx = 5, 8
            start = np.array([hand_landmarks.landmark[start_idx].x, hand_landmarks.landmark[start_idx].y])
            end = np.array([hand_landmarks.landmark[end_idx].x, hand_landmarks.landmark[end_idx].y])

            # Compute direction vector and extend the fingertip position
            vector = end - start
            vector /= np.linalg.norm(vector)
            extended_tip = end + vector * 0.15

            # Convert to pixel coordinates
            extended_tip_pixel = (int(extended_tip[0] * w), int(extended_tip[1] * h))

            # Adaptive smoothing: More smoothing for fast movements
            if start_idx in last_positions:
                dist = np.linalg.norm(np.array(extended_tip_pixel) - np.array(last_positions[start_idx]))
                dynamic_smoothing = max(0.2, min(0.7, 1 - dist / 50))  # Adjust factor
            else:
                dynamic_smoothing = SMOOTHING_FACTOR

            # Apply smoothing
            if start_idx not in last_positions:
                last_positions[start_idx] = extended_tip_pixel
            smoothed_tip = (
                int(last_positions[start_idx][0] * (1 - dynamic_smoothing) + extended_tip_pixel[0] * dynamic_smoothing),
                int(last_positions[start_idx][1] * (1 - dynamic_smoothing) + extended_tip_pixel[1] * dynamic_smoothing)
            )
            last_positions[start_idx] = smoothed_tip

            x, y = smoothed_tip
            roi_size = 100
            x1, y1 = max(0, x - roi_size), max(0, y - roi_size)
            x2, y2 = min(w, x + roi_size), min(h, y + roi_size)
            roi = frame[y1:y2, x1:x2]
            # Convert ROI to LAB color space to enhance color contrast
            lab = cv2.cvtColor(roi, cv2.COLOR_BGR2LAB)
            l, a, b = cv2.split(lab)

            # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) on L channel
            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
            l = clahe.apply(l)

            # Merge back the LAB channels and convert to BGR
            enhanced_roi = cv2.merge((l, a, b))
            enhanced_roi = cv2.cvtColor(enhanced_roi, cv2.COLOR_LAB2BGR)


            # Convert ROI to grayscale and apply Gaussian blur
            gray = cv2.cvtColor(enhanced_roi, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (5, 5), 0)

            # Apply Otsu’s threshold to automatically adjust Canny thresholds
            # Apply Otsu’s threshold to determine Canny thresholds dynamically
            otsu_thresh, _ = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            lower_thresh = max(30, 0.5 * otsu_thresh)  # Ensure it doesn't go too low
            upper_thresh = min(255, 1.5 * otsu_thresh)  # Ensure it doesn't go too high
            edges = cv2.Canny(blurred, int(lower_thresh), int(upper_thresh))

            # Close gaps in edges using morphological operations
            kernel = np.ones((3, 3), np.uint8)
            closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)

            # Find contours
            contours, _ = cv2.findContours(closed_edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            # Create a blank mask with the same shape as ROI
            mask = np.zeros_like(roi, dtype=np.uint8)

            valid_contours = []
            for cnt in contours:
                area = cv2.contourArea(cnt)
                perimeter = cv2.arcLength(cnt, True)
                if area < 200:  # Ignore small contours
                    continue
                # Check convexity to avoid fragmented segments
                if cv2.isContourConvex(cnt) or perimeter > 20:
                    valid_contours.append(cnt)

            # Draw valid contours
            for cnt in valid_contours:
                color = np.random.randint(0, 255, (3,), dtype=np.uint8).tolist()
                color = (255, 255, 255)
                cv2.drawContours(mask, [cnt], -1, color, thickness=cv2.FILLED)
            cv2.imshow("mask", mask)
            # Overlay segmented objects onto the original frame
            frame[y1:y2, x1:x2] = cv2.addWeighted(frame[y1:y2, x1:x2], 0.3, mask, 0.7, 0)

            # Draw rectangle around detected region
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Display contour count (number of segmented objects)
            contour_count = len(valid_contours)
            cv2.putText(frame, f'{contour_count}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        

    # frame = cv2.resize(frame, (1920, 1080))
    cv2.imshow("Segmented Objects", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


In [58]:
import cv2
import numpy as np

cap = cv2.VideoCapture(2)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    edges = cv2.Canny(blurred, 50, 150)

    # kernel = np.ones((1,1), np.uint8)
    # closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=2)
    #contours, _ = cv2.findContours(closed_edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    kernel  = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,1))
    dilated = cv2.dilate(edges,kernel)
    contours,_ = cv2.findContours(dilated.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    

    mask = np.zeros_like(frame)

    for i, cnt in enumerate(contours):
        # cnt = cv2.convexHull(cnt)
        area = cv2.contourArea(cnt)
        if area < 100:  # Ignore small noise
            continue

        perimeter = cv2.arcLength(cnt, closed=True)
        approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)

        # Ensure contour is closed by checking the first and last points
        is_closed = np.linalg.norm(cnt[0] - cnt[-1]) < 10  # Small threshold for closure

        if not is_closed:
            continue  # Ignore open contours

        color = np.random.randint(0, 255, (3,), dtype=np.uint8).tolist()
        cv2.drawContours(mask, [cnt], -1, color, thickness=cv2.FILLED)

    segmented = cv2.addWeighted(frame, 0.1, mask, 0.9, 0)

    cv2.imshow("Segmented Objects", segmented)
    cv2.imshow("Edges", dilated)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to target thread (0x5697b55928e0)

QObject::moveToThread: Current thread (0x5697b55928e0) is not the object's thread (0x5697b51141f0).
Cannot move to tar

In [None]:
#grabcut segmentation
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Smoothing factor for fingertip positions
SMOOTHING_FACTOR = 0.3
last_positions = {}

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        continue

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            h, w, _ = frame.shape

            # Index fingertip (primary pointer)
            index_fingertip = [5, 8]  # Index finger start and tip
            start_idx, end_idx = index_fingertip
            start = np.array([hand_landmarks.landmark[start_idx].x, hand_landmarks.landmark[start_idx].y])
            end = np.array([hand_landmarks.landmark[end_idx].x, hand_landmarks.landmark[end_idx].y])

            # Compute direction vector and extend the fingertip position
            vector = end - start
            vector /= np.linalg.norm(vector)
            extended_tip = end + vector * 0.1  # Extend by a small factor

            # Convert to pixel coordinates
            extended_tip_pixel = (int(extended_tip[0] * w), int(extended_tip[1] * h))

            # Apply smoothing
            if start_idx not in last_positions:
                last_positions[start_idx] = extended_tip_pixel
            smoothed_tip = (
                int(last_positions[start_idx][0] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[0] * SMOOTHING_FACTOR),
                int(last_positions[start_idx][1] * (1 - SMOOTHING_FACTOR) + extended_tip_pixel[1] * SMOOTHING_FACTOR)
            )
            last_positions[start_idx] = smoothed_tip

            x, y = smoothed_tip

            # Define a small ROI around the fingertip
            roi_size = 50
            x1, y1 = max(0, x - roi_size), max(0, y - roi_size)
            x2, y2 = min(w, x + roi_size), min(h, y + roi_size)
            roi = frame[y1:y2, x1:x2]

            # Skip GrabCut if ROI is too small
            if roi.shape[0] < 5 or roi.shape[1] < 5:
                continue

            # GrabCut Segmentation
            mask = np.zeros(roi.shape[:2], np.uint8)
            bgdModel = np.zeros((1, 65), np.float64)
            fgdModel = np.zeros((1, 65), np.float64)
            rect = (2, 2, roi.shape[1] - 4, roi.shape[0] - 4)  # Avoid edges
            cv2.grabCut(roi, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
            mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')

            # Extract the object by applying the mask
            roi_segmented = roi * mask2[:, :, np.newaxis]

            # Find contours on the segmented object
            gray = cv2.cvtColor(roi_segmented, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            # Draw contour around the object
            for cnt in contours:
                cv2.drawContours(frame[y1:y2, x1:x2], [cnt], -1, (0, 0, 255), 2)  # Red boundary

            # Draw a green rectangle around the ROI
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    cv2.imshow("Object Segmentation", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()