Live camera code

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from ultralytics import YOLO

def image_on_hand(extracted_region):

    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    cap = cv2.VideoCapture(0)  
    #up_img = cv2.resize(extracted_region, (0, 0), fx=0.5, fy=0.5)
    up_img = cv2.resize(extracted_region, (30, 30))
    with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                continue
            
            frame = cv2.flip(frame, 1)
            
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            results = hands.process(rgb_frame)
            
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    num_landmarks = len(hand_landmarks.landmark)
                    if num_landmarks > 12:  
                        index_finger_mcp = hand_landmarks.landmark[13] 
                        
                        frame_height, frame_width, _ = frame.shape
    
                        x_pixel = int(index_finger_mcp.x * frame_width)
                        y_pixel = int(index_finger_mcp.y * frame_height)
    
                        #print("X:", x_pixel, "Y:", y_pixel)
                        h1, w1 = up_img.shape[:2]

                        if x_pixel + w1 <= frame_width and y_pixel + h1 <= frame_height:
                            frame[y_pixel:y_pixel+h1, x_pixel:x_pixel+w1] = up_img
                    #mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
            
            cv2.imshow('Hand Tracking', frame)
            
            key = cv2.waitKey(1)
    
            if key == 27 or key == ord("q"):
                return
                

def select_object(event, x, y, flags, param):
    #print("object selection function called")
    global selected_index

    if event == cv2.EVENT_LBUTTONDOWN:
        for idx, (box, mask) in enumerate(zip(boxes, masks)):
            r = box.xyxy[0].astype(int)
            seg = mask.xy
            seg = np.array(seg, dtype=np.int32)
            seg = seg.reshape((-1, 1, 2))

            if cv2.pointPolygonTest(seg, (x, y), False) >= 0:
                selected_index = idx
                break

selected_index = -1

model = YOLO("yolov8m-seg.pt")
cap = cv2.VideoCapture(0)

while True:
    ret, img = cap.read() 
    if not ret:
        break
    
    img = cv2.resize(img, (750, 750))
    results = model.predict(source=img)
    boxes = []
    masks = []

    for result in results:
        boxes.extend(result.boxes.cpu().numpy())
        masks.extend(result.masks)
    
    # Set up the mouse callback for object selection
    cv2.namedWindow("image")
    cv2.setMouseCallback("image", select_object)

    if selected_index != -1 and selected_index < len(boxes):
        display_img = img.copy()
        box = boxes[selected_index].xyxy[0].astype(int)
        mask = masks[selected_index]
        seg = mask.xy
        seg = np.array(seg, dtype=np.int32)
        seg = seg.reshape((-1, 1, 2))
        cv2.rectangle(display_img, box[:2], box[2:], (0, 255, 0), 1)
        cv2.polylines(display_img, [seg], True, (0, 0, 255), 1)
            
        object_mask = np.zeros_like(img)
        cv2.fillPoly(object_mask, [seg], (255, 255, 255))
        object_img = cv2.bitwise_and(img, object_mask)

        blank_img = np.zeros_like(object_img)

        blank_img = object_img.copy()

        results_blank = model.predict(source=blank_img)
        boxes_blank = []

        for result in results_blank:
            boxes_blank.extend(result.boxes.cpu().numpy())

        for box in boxes_blank: 
            r = box.xyxy[0].astype(int)
            #print(r)
            blank_img = cv2.rectangle(blank_img, r[:2], r[2:], (0, 255, 0), 2)
        #cv2.imshow("Segmented Object", blank_img)
        #print(blank_img.shape)

        box_coordinates = boxes[selected_index].xyxy[0]
        x1, y1, x2, y2 = map(int, box_coordinates)
        extracted_region = blank_img[y1:y2, x1:x2]
        cv2.imshow("Extracted Region", extracted_region)


    else:
        display_img = img.copy()

    cv2.imshow("image", display_img)
    key = cv2.waitKey(1)

    if key == 13: # it is enter keyboard key 
        break

    # If window is closed, reopen it and reattach the mouse callback
    if cv2.getWindowProperty("image", cv2.WND_PROP_VISIBLE) < 1:
        cv2.namedWindow("image")
        cv2.setMouseCallback("image", select_object)

cv2.destroyAllWindows()
image_on_hand(extracted_region)
cap.release()
cv2.destroyAllWindows()


Directory Code

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from ultralytics import YOLO

def image_on_hand(extracted_region):

    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    cap = cv2.VideoCapture(0)  
    #up_img = cv2.resize(extracted_region, (0, 0), fx=0.2, fy=0.2)
    up_img = cv2.resize(extracted_region, (30, 30))
    with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                continue
            
            frame = cv2.flip(frame, 1)
            
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            results = hands.process(rgb_frame)
            
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    num_landmarks = len(hand_landmarks.landmark)
                    if num_landmarks > 5:  
                        index_finger_mcp = hand_landmarks.landmark[9] 
                        
                        frame_height, frame_width, _ = frame.shape
    
                        x_pixel = int(index_finger_mcp.x * frame_width)
                        y_pixel = int(index_finger_mcp.y * frame_height)
    
                        #print("X:", x_pixel, "Y:", y_pixel)
                        h1, w1 = up_img.shape[:2]

                        if x_pixel + w1 <= frame_width and y_pixel + h1 <= frame_height:
                            frame[y_pixel:y_pixel+h1, x_pixel:x_pixel+w1] = up_img
                    #mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
            
            cv2.imshow('Hand Tracking', frame)
            
            key = cv2.waitKey(1)
    
            if key == 27 or key == ord("q"):
                break
    
    cap.release()
    cv2.destroyAllWindows()


def select_object(event, x, y, flags, param):
    #print("object selection function called")
    global selected_index

    if event == cv2.EVENT_LBUTTONDOWN:
        for idx, (box, mask) in enumerate(zip(boxes, masks)):
            r = box.xyxy[0].astype(int)
            seg = mask.xy
            seg = np.array(seg, dtype=np.int32)
            seg = seg.reshape((-1, 1, 2))

            if cv2.pointPolygonTest(seg, (x, y), False) >= 0:
                selected_index = idx
                break

selected_index = -1

model = YOLO("yolov8m-seg.pt")
src = "test.jpeg"
img = cv2.imread(src)
img = cv2.resize(img, (750, 750))
results = model.predict(source=img)

boxes = []
masks = []

for result in results:
    #print("prediction is going on")
    boxes.extend(result.boxes.cpu().numpy())
    masks.extend(result.masks)

cv2.namedWindow("image")
cv2.setMouseCallback("image", select_object)

while True:
    #print("showing object")
    if selected_index != -1:
        #print("object is selected")
        display_img = img.copy()
        box = boxes[selected_index].xyxy[0].astype(int)
        mask = masks[selected_index]
        seg = mask.xy
        seg = np.array(seg, dtype=np.int32)
        seg = seg.reshape((-1, 1, 2))
        cv2.rectangle(display_img, box[:2], box[2:], (0, 255, 0), 1)
        cv2.polylines(display_img, [seg], True, (0, 0, 255), 1)
        
        object_mask = np.zeros_like(img)
        cv2.fillPoly(object_mask, [seg], (255, 255, 255))
        object_img = cv2.bitwise_and(img, object_mask)

        blank_img = np.zeros_like(object_img)

        blank_img = object_img.copy()

        results_blank = model.predict(source=blank_img)
        boxes_blank = []

        for result in results_blank:
            boxes_blank.extend(result.boxes.cpu().numpy())

        for box in boxes_blank: 
            r = box.xyxy[0].astype(int)
            #print(r)
            blank_img = cv2.rectangle(blank_img, r[:2], r[2:], (0, 255, 0), 2)
        #cv2.imshow("Segmented Object", blank_img)
        #print(blank_img.shape)

        x1, y1, x2, y2 = box.xyxy[0].astype(int)
        extracted_region = blank_img[y1:y2, x1:x2]
        cv2.imshow("Extracted Region", extracted_region)


    else:
        display_img = img.copy()

    cv2.imshow("image", display_img)
    key = cv2.waitKey(1)

    if key == 13: # it is enter keyboard key 
        break

    # If window is closed, reopen it and reattach the mouse callback
    if cv2.getWindowProperty("image", cv2.WND_PROP_VISIBLE) < 1:
        cv2.namedWindow("image")
        cv2.setMouseCallback("image", select_object)

cv2.destroyAllWindows()
image_on_hand(extracted_region)


                                                                    THANK YOU