In [1]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torch

class KeypointClassifier(nn.Module):
    def __init__(self):
        super(KeypointClassifier, self).__init__()
        self.fc1 = nn.Linear(21*3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 2)

    def forward(self, x):
        x = x.view(-1, 21*3)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

In [2]:
import pyautogui

pyautogui.PAUSE = 0.01

def zoom(amount):
    pyautogui.keyDown('ctrl')
    pyautogui.scroll(-int(amount*2000))
    pyautogui.keyUp('ctrl')


In [3]:
import cv2
import mediapipe as mp
import numpy as np
import pickle
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
with open('model1401.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

class model():
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands()
    
    def findHands(self, image):
        self.results = self.hands.process(image)
        
    def drawHands(self, image):
        if self.results.multi_hand_landmarks:
            for hand_landmarks in self.results.multi_hand_landmarks:
                mp.solutions.drawing_utils.draw_landmarks(image, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)
        return image
    
    def getKeypoints(self):
        data = []
        if self.results.multi_hand_landmarks:
            for hand_landmarks in self.results.multi_hand_landmarks:
                for i, landmark in enumerate(hand_landmarks.landmark):
                    data.append([landmark.x, landmark.y, landmark.z])
        data = np.array(data)
        
        hands = []
        centers = []
        for i in range(0, data.shape[0], 21):
            hands.append(data[i:i+21])
            centers.append(np.mean(hands[-1], axis = 0).reshape(3))
        hands = np.array(hands).reshape(-1, 21, 3)
        centers = np.array(centers).reshape(-1, 3)
            
        if len(centers.shape) == 2 and centers.shape[0] >= 2:
            dist = (np.sum((centers[0, :2] - centers[1, :2])**2))**0.5
        else:
            dist = None
        
        dataset = {
            "hands": hands,
            "centers": centers,
            "distance": dist
        }
        return dataset
   

In [4]:
     
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920//4)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080//4)
dataset = []

h = model()
prev = time.time()
history = []

while True:
    # Read frame, make it rgb and flip
    ret, frame = cap.read()
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = cv2.flip(image, 1)

    # Find keypoints, draw them and get back useful data
    h.findHands(image)
    image = h.drawHands(image)
    data = h.getKeypoints()
    
    # Reshape bgr image
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    image = cv2.resize(image, (1280, 720))
    
    # find centers
    if len(data["centers"]) > 0:
        for x, y, z in data["centers"]:
            image = cv2.circle(image, (int(x*1280), int(y*720)), 10, (255, 255, 255), -1)
    
    tuttiPugni = True
    n = 0
    for n, hand in enumerate(data["hands"]):
        pugno = loaded_model(torch.tensor(hand).view(1, 63).to(device).float())[0][0] > 0.5
        if pugno:
            cv2.putText(image, "Pugno", (20, 50+50*n), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        else:
            tuttiPugni = False
            cv2.putText(image, "Mano aperta", (20, 50+50*n), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
    if tuttiPugni and data["distance"] != None:
        history.append(data["distance"])
        cv2.putText(image, str(data["distance"]), (20, 50+50*(n+1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        if len(history) > 3:
            zoom(history[-3]-history[-1]) # zoom(hist)
    else:
        history = []
        cv2.putText(image, str(data["distance"]), (20, 50+50*(n+1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
    
    cv2.putText(image, str(round(1/(time.time()-prev), 1)), (20, 50+50*(n+2)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    prev = time.time()
    
    cv2.imshow('Hand Tracking', cv2.resize(image, (1600//2, 900//2)))

    # Step 10: Break the loop if the user presses the 'q' key
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break    
    
    


# Step 9: Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()


In [11]:
np.save("notfists.npy", dataset)

In [36]:
import pyautogui
import time

time.sleep(5)
# Get the size of the screen
screen_width, screen_height = pyautogui.size()

# Move the touchpad cursor to the center of the screen
center_x, center_y = screen_width / 2, screen_height / 2
pyautogui.moveTo(center_x, center_y, duration=0.25)

# Simulate a two-finger scroll down on the touchpad
scroll_amount = 1250  # adjust this value to control the scroll amount
pyautogui.scroll(scroll_amount, x=center_x, y=center_y)


In [71]:
import pyautogui
import time
time.sleep(2)

# Get the size of the screen
screen_width, screen_height = pyautogui.size()

# Move the touchpad cursor to the center of the screen

center_x, center_y = screen_width / 2, screen_height / 2

t0 = time.time()
pyautogui.moveTo(center_x, center_y)
print(time.time()-t0)

# Simulate a two-finger pinch in on the touchpad
pinch_amount = 1000  # adjust this value to control the zoom amount
pyautogui.keyDown('ctrl')
pyautogui.scroll(pinch_amount, x=center_x, y=center_y)
pyautogui.keyUp('ctrl')


0.10937190055847168


In [2]:
import math
import pyautogui

def pinch_to_zoom(amount):
    # Get the size of the screen
    screen_width, screen_height = pyautogui.size()

    # Move the touchpad cursor to the center of the screen if it's far enough
    center_x, center_y = screen_width / 2, screen_height / 2
    current_x, current_y = pyautogui.position()
    distance = math.sqrt((current_x - center_x) ** 2 + (current_y - center_y) ** 2)
    if distance > 50:
        pyautogui.moveTo(center_x, center_y)

    # Simulate a two-finger pinch in on the touchpad
    pyautogui.keyDown('ctrl')
    pyautogui.scroll(amount, x=center_x, y=center_y)
    pyautogui.keyUp('ctrl')


In [64]:
import cv2
import mediapipe as mp

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Initialize hands module
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# OpenCV video capture
cap = cv2.VideoCapture(0)

while True:
    success, image = cap.read()

    if not success:
        print("Ignoring empty camera frame.")
        continue

    # Flip the image horizontally for a later selfie-view display
    image = cv2.flip(image, 1)

    # Convert the BGR image to RGB before processing
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Process the image with Mediapipe hands
    results = hands.process(image_rgb)

    # Check if a fist is detected with high confidence
    if results.multi_hand_landmarks:
        # Get the landmark coordinates for the first hand
        landmarks = results.multi_hand_landmarks[0].landmark

        # Check if the hand is in a fist position
        is_fist = landmarks[mp_hands.HandLandmark.WRIST].y < landmarks[mp_hands.HandLandmark.THUMB_CMC].y < landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP].y < landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y < landmarks[mp_hands.HandLandmark.RING_FINGER_MCP].y < landmarks[mp_hands.HandLandmark.PINKY_MCP].y
        mp_drawing.draw_landmarks(image, results.multi_hand_landmarks[0], mp_hands.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4))

        if is_fist:
            # Draw a green bounding box around the hand
            
            # Print the message "Fist detected" on the image
            cv2.putText(image, "Fist detected", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Show the image
    cv2.imshow("Hand tracking", image)

    # Press "q" to exit the loop
    if cv2.waitKey(5) & 0xFF == ord("q"):
        break

# Release the resources
cap.release()
cv2.destroyAllWindows()
