In [None]:
import cv2
import mediapipe as mp
import numpy as np


mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

image_path = 'image.png'
drag_image = cv2.imread(image_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

resize_factor = 2 / 5
image_width = int(frame_width * resize_factor)
image_height = int(frame_height * resize_factor)
drag_image = cv2.resize(drag_image, (image_width, image_height))

image_pos = [0, 0]

def calculate_distance(point1, point2):
    return np.linalg.norm(np.array(point1) - np.array(point2))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
    
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
            thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
            h, w, _ = frame.shape
            index_finger_pos = (int(index_finger_tip.x * w), int(index_finger_tip.y * h))
            thumb_pos = (int(thumb_tip.x * w), int(thumb_tip.y * h))

            length = calculate_distance(index_finger_pos, thumb_pos)

            if length < 20:
                image_pos = [index_finger_pos[0] - image_width // 2, index_finger_pos[1] - image_height // 2]

    image_pos[0] = max(0, min(image_pos[0], frame.shape[1] - image_width))
    image_pos[1] = max(0, min(image_pos[1], frame.shape[0] - image_height))

    overlay = frame.copy()
    overlay[image_pos[1]:image_pos[1] + image_height, image_pos[0]:image_pos[0] + image_width] = drag_image
    alpha = 0.7
    frame = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)

    cv2.imshow('Drag and Drop', frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()


