In [1]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import Sequential, load_model
from sklearn.metrics.pairwise import cosine_similarity
import pickle
from collections import Counter

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Create an intermediate model excluding the last layer
intermediate_model = Sequential()
for layer in model.layers[:-1]:
    intermediate_model.add(layer)

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            msg = actions[np.argmax(res)] if np.max(res) >= 0.60 else "None"
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()


TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [3]:
#remove intermediate layer


import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
from sklearn.metrics.pairwise import cosine_similarity
import pickle
from collections import Counter

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            msg = actions[np.argmax(res)] if np.max(res) >= 0.60 else "None"
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


click occurred
click occurred
Swipe Up
Enter
Swipe Up
click occurred
click occurred
click occurred
Swipe Up
None
Enter
Enter
Swipe Left
Swipe Right
Swipe Right
Enter
None
Enter
Backspace
Swipe Right
Enter
Swipe Right
Ctrl_A
Swipe Down
Swipe Down
Swipe Down
Swipe Down
Swipe Up
Swipe Down
Enter
Enter
Backspace
Swipe Down
Tab
Tab
Tab


In [1]:
#logic : smooth


import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Swipe Up
Swipe Down
Swipe Left
Swipe Right
Swipe Right
Enter
Enter


In [6]:
!pip install pyttsx3

Collecting pyttsx3
  Obtaining dependency information for pyttsx3 from https://files.pythonhosted.org/packages/33/9a/de4781245f5ad966646fd276259ef7cfd400ba3cf7d5db7c0d5aab310c20/pyttsx3-2.90-py3-none-any.whl.metadata
  Downloading pyttsx3-2.90-py3-none-any.whl.metadata (3.6 kB)
Collecting comtypes (from pyttsx3)
  Obtaining dependency information for comtypes from https://files.pythonhosted.org/packages/f5/c0/14dae7492649d7b41cc4f1dd392dd7bb2bc46a07f099f1d2cf4d8dff03e5/comtypes-1.4.4-py3-none-any.whl.metadata
  Downloading comtypes-1.4.4-py3-none-any.whl.metadata (6.4 kB)
Collecting pypiwin32 (from pyttsx3)
  Obtaining dependency information for pypiwin32 from https://files.pythonhosted.org/packages/d0/1b/2f292bbd742e369a100c91faa0483172cd91a1a422a6692055ac920946c5/pypiwin32-223-py3-none-any.whl.metadata
  Downloading pypiwin32-223-py3-none-any.whl.metadata (236 bytes)
Downloading pyttsx3-2.90-py3-none-any.whl (39 kB)
Downloading comtypes-1.4.4-py3-none-any.whl (210 kB)
   ------------

In [7]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
            engine.say(msg)  # Add this line for voice output
            engine.runAndWait()  # Add this line to wait until the speech is finished
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        engine.say(msg)  # Add this line for voice output
                        engine.runAndWait()  # Add this line to wait until the speech is finished
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))
    engine.say(stat_msgs.get(msg, "None"))  # Add this line for voice output
    engine.runAndWait()  # Add this line to wait until the speech is finished

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Like
Dislike
Swipe Up
Swipe Down
Like
Dislike
Swipe Up
Swipe Down
Swipe Right
Swipe Right


In [5]:
    cap.release()
    cv2.destroyAllWindows()

In [3]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    if action == "Swipe Up":
        pyautogui.hotkey('volumeup')  # volume up
    elif action == "Swipe Down":
        pyautogui.hotkey('volumedown')  # volume down
    elif action == "Swipe Right":
        pyautogui.hotkey('p')  # for previous song
    elif action == "Swipe Left":
        pyautogui.hotkey('n')  # for next song
    elif action == "Enter":
        pyautogui.press('space')  # pause/play
    elif action == "Backspace":
        pyautogui.press('backspace')
    elif action == "Tab":
        pyautogui.press('tab')
    elif action == "Ctrl_A":
        pyautogui.hotkey('ctrl', 'a')  # select all

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Enter
Swipe Up


In [3]:
#pause logic


import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Swipe Up
Swipe Down
Like
Enter
Swipe Down
Swipe Up
Swipe Up
Enter
Enter
Swipe Down
Swipe Right
Swipe Left
Swipe Right
Swipe Right
Swipe Left
Swipe Left
Swipe Right
Swipe Right
Swipe Right
Swipe Right
Swipe Right
Swipe Right
Swipe Left
Swipe Right
Swipe Right
Swipe Right
Swipe Left
Swipe Right
Swipe Right
Swipe Right
Enter
Swipe Down
Swipe Down
Swipe Down
Swipe Down
Enter
Enter
Enter
Enter
Enter
Swipe Up
Swipe Down
Enter
Swipe Up


In [1]:
#impliment Like -- Test1


import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Coordinates for YouTube Like button (to be adjusted as per actual position on your screen)
like_button_coords = (100, 200)  # Example coordinates; replace with actual coordinates

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.75:  # Increased confidence threshold
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(f"Prediction: {actions[prediction]}, Confidence: {confidence}, Smoothed Prediction: {msg}")
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    if msg == "like":
        print("Like detected. Clicking the Like button.")
        pyautogui.moveTo(*like_button_coords)
        pyautogui.click()
    else:
        print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Prediction: Swipe Up, Confidence: 0.9932403564453125, Smoothed Prediction: Swipe Up
Prediction: Swipe Up, Confidence: 0.9146641492843628, Smoothed Prediction: Swipe Up
Prediction: Swipe Right, Confidence: 0.49095842242240906, Smoothed Prediction: Swipe Right
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Prediction: Swipe Right, Confidence: 0.9997652173042297, Smoothed Prediction: Swipe Right
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Prediction: Swipe Left, Confidence: 0.46716180443763733, Smoothed Prediction: Swipe Left
Like detected. Clicking the Like button.
Like detected. Clicking the Like button.
Like detected. Clicking the L

In [None]:
### Hotkey Impliment ##

In [32]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5'        
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            return

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    print(stat_msgs.get(msg, "None"))

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):  
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    if action == "Swipe Up":
#         pyautogui.hotkey('volumeup')  # volume up
#         pyautogui.scroll(10)  # screen scroll upward
    elif action == "Swipe Down":
        pyautogui.hotkey('volumedown')  # volume down
#         pyautogui.scroll(-10)  # screen scroll downward
    elif action == "Swipe Right":
#         pyautogui.hotkey('p')  # for previous song
#         pyautogui.hotkey('right') 
    elif action == "Swipe Left": 
           pyautogui.hotkey('left')   
    elif action == "Enter":
#         pyautogui.press('space')  # pause/play
    elif action == "Backspace":
#         pyautogui.press('backspace')
    elif action == "Tab":
#         pyautogui.press('tab')
    elif action == "Ctrl_A":
#         pyautogui.hotkey('ctrl', 'a')  # select all

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Swipe Up
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
Swipe Down
Swipe Left
Swipe Left
Swipe Right
click occurred
Enter
Ctrl_A
Ctrl_A
Enter
Backspace
Enter
Enter
Tab
Enter
Enter
Tab
Tab
Swipe Right
Like
Like
Like
Swipe Right
Dislike
Dislike
Ctrl_A
Love
Request
Request
Ctrl_A


In [9]:
    cap.release()
    cv2.destroyAllWindows()

In [36]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time  # Import time for adding delay

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return 0, msg

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):  
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")  # Add this line for debugging
    if action == "Swipe Up":
#         pyautogui.scroll(50)
        engine.say("Page Scroll Up")
    elif action == "Swipe Down":
#         pyautogui.scroll(-50)
        engine.say("Page Scroll Down")
    elif action == "Swipe Right":
#         pyautogui.hotkey('j')
        engine.say("Next Post")
    elif action == "Swipe Left":
        # Uncomment and add functionality as needed
#         pyautogui.hotkey('k')
        engine.say("Previous Post")
#     elif action == "Enter":
        # pyautogui.press('space')  # Example: pause/play
#         engine.say("Press Enter")
    elif action == "Backspace":
#         pyautogui.hotkey('esc')
        engine.say("go to back")
#     elif action == "Tab":
#         pyautogui.press('tab')
#         engine.say("Select Button")
#     elif action == "Ctrl_A":
#         pyautogui.hotkey('ctrl', 'a')
#         engine.say("Select all")


# # Implement PyAutoGUI actions based on gestures based on YT & VLC
# def execute_action(action):
#     print(f"Executing action: {action}")  # Add this line for debugging
#     if action == "Swipe Up":
#         pyautogui.scroll(50)
#         engine.say("Page Scroll Up")
#     elif action == "Swipe Down":
#         pyautogui.scroll(-50)
#         engine.say("Page Scroll Down")
# #     elif action == "Swipe Right":
# #         pyautogui.hotkey('n')
# #         engine.say("Next Item")
# #     elif action == "Swipe Left":
# #         # Uncomment and add functionality as needed
# #         # pyautogui.hotkey('p')
# #         engine.say("Previous Item")
# #     elif action == "Enter":
#         # pyautogui.press('space')  # Example: pause/play
# #         engine.say("Press Enter")
#     elif action == "Backspace":
#         pyautogui.hotkey('esc')
#         engine.say("go to back")
# #     elif action == "Tab":
# #         pyautogui.press('tab')
# #         engine.say("Select Button")
# #     elif action == "Ctrl_A":
# #         pyautogui.hotkey('ctrl', 'a')
# #         engine.say("Select all")


# # Implement PyAutoGUI actions based on gestures
# def execute_action(action):
#     print(f"Executing action: {action}")  # Add this line for debugging
#     if action == "Swipe Up":
# #         pyautogui.scroll(50)
#         engine.say("Swipe Up")
#     elif action == "Swipe Down":
# #         pyautogui.scroll(-50)
#         engine.say("Swipe Down")
#     elif action == "Swipe Right":
# #         pyautogui.hotkey('n')
#         engine.say("Swipe Righ")
#     elif action == "Swipe Left":
# #         # Uncomment and add functionality as needed
# #         # pyautogui.hotkey('p')
#         engine.say("Swipe Left")
#     elif action == "Enter":
#         # pyautogui.press('space')  # Example: pause/play
#         engine.say("Enter")
#     elif action == "Backspace":
# #         pyautogui.hotkey('esc')
#         engine.say("Backspace")
#     elif action == "Tab":
# #         pyautogui.press('tab')
#         engine.say("Tab")
#     elif action == "Ctrl_A":
# #         pyautogui.hotkey('ctrl', 'a')
#         engine.say("Select all")
    
    engine.runAndWait()
    # time.sleep(0.8)  # Optionally add a delay after each gesture
    
# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Swipe Down
Executing action: Swipe Down
click occurred
click occurred
click occurred
Swipe Down
Executing action: Swipe Down
Swipe Down
Executing action: Swipe Down
Swipe Up
Executing action: Swipe Up
Swipe Down
Executing action: Swipe Down
Swipe Left
Executing action: Swipe Left
Swipe Right
Executing action: Swipe Right
Enter
Executing action: Enter
Enter
Executing action: Enter
Enter
Executing action: Enter
Executing action: 
Swipe Down
Executing action: Swipe Down
Enter
Executing action: Enter
Enter
Executing action: Enter
Enter
Executing action: Enter
Enter
Executing action: Enter
Enter
Executing action: Enter
Enter
Executing action: Enter
Backspace
Executing action: Backspace
Swipe Right
Executing action: Swipe Right
Swipe Up
Executing action: Swipe Up
Tab
Executing action: Tab
Tab
Executing action: Tab
Tab
Executing action: Tab
Tab
Executing action: Tab
Tab
Executing action: Tab
Tab
Executing action: Tab
Tab
Executing action: Tab
Tab
Executing action: Tab
Enter
Executing action: 

In [None]:
###########    START     ###################

In [None]:
## Facebook ##

In [42]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time  # Import time for adding delay

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return 0, msg

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):  
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")  # Add this line for debugging
    if action == "Swipe Up":
        pyautogui.scroll(50)
        engine.say("Page Scroll Up")
    elif action == "Swipe Down":
        pyautogui.scroll(-50)
        engine.say("Page Scroll Down")
    elif action == "Swipe Right":
        pyautogui.hotkey('j')
        engine.say("Next Post")
    elif action == "Swipe Left":
        pyautogui.hotkey('k')
        engine.say("Previous Post")
    elif action == "like":
        pyautogui.hotkey('l')
        engine.say("Like The Post") 
    elif action == "dislike":
        pyautogui.hotkey('l')
        engine.say("DisLike The Post")
#     elif action == "Enter":
        # pyautogui.press('space')  # Example: pause/play
#         engine.say("Press Enter")
    elif action == "Backspace":
        pyautogui.hotkey('esc')
        engine.say("go to back")
#     elif action == "Tab":
#         pyautogui.press('tab')
#         engine.say("Select Button")
#     elif action == "Ctrl_A":
#         pyautogui.hotkey('ctrl', 'a')
#         engine.say("Select all")


   
    engine.runAndWait()
    # time.sleep(0.8)  # Optionally add a delay after each gesture
    
# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
Ctrl_A
Executing action: Ctrl_A
Swipe Down
Executing action: Swipe Down
Enter
Executing action: Enter
Enter
Executing action: Enter
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
Swipe Up
Executing action: Swipe Up
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Like
Executing action: like
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
Executing action: 
click occurred
click occurred
Ctrl_A
Executing action: Ctrl_A
click occurred
click occurred
click occurred
click occurred
Enter
Executing action: Enter
Backspace
Executing action: Backspace
Executing action: 
Executing action: 


In [None]:
##gmail##

In [41]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time  # Import time for adding delay

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Set text-to-speech voice to 'hi-IN'
voices = engine.getProperty('voices')
for voice in voices:
    if 'hi-IN' in voice.id:
        engine.setProperty('voice', voice.id)
        break

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return 0, msg

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):  
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")  # Add this line for debugging
    if action == "Swipe Up":
        pyautogui.hotkey('k')
        engine.say("Previous Message")
    elif action == "Swipe Down":
        pyautogui.hotkey('j')
        engine.say("Next Message")
    elif action == "Swipe Right":
        pyautogui.hotkey('g', 'n')
        engine.say("Go to Next Page")
    elif action == "Swipe Left":
        pyautogui.hotkey('g', 'p')
        engine.say("Go to Previous Page")
    elif action == "Enter":
        pyautogui.press('enter')  # Example: pause/play
        engine.say("Open Message")
    elif action == "Backspace":
        pyautogui.hotkey('g', 'i')
        engine.say("Go back")
#     elif action == "Tab":
#         pyautogui.press('tab')
#         engine.say("Select Button")
#     elif action == "Ctrl_A":
#         pyautogui.hotkey('ctrl', 'a')
#         engine.say("Select all")


   
    engine.runAndWait()
    # time.sleep(0.8)  # Optionally add a delay after each gesture
    
# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
click occurred
Executing action: 
Executing action: 
Swipe Down
Executing action: Swipe Down
Tab
Executing action: Tab
Love
Executing action: love
Tab
Executing action: Tab
Tab
Executing action: Tab
Love
Executing action: love
Enter
Executing action: Enter
click occurred
click occurred
click occurred
click occurred
Enter
Executing action: Enter
Executing action: 
Swipe Left
Executing action: Swipe Left
Backspace
Executing action: Backspace
Swipe Up
Executing action: Swipe Up
Executing action: 
Swipe Down
Executing action: Swipe Down
Swipe Down
Executing action: Swipe Down
Enter
Executing action: Enter
Backspace
Executing action: Backspace
click occurred
click occurred
Enter
Executing action: Enter
click occurred
Swipe Left
Executing action: Swipe Left
Swipe Right
Executing action: Swipe Right
Swipe Left
Executing action: Swipe Left


In [None]:
##youtube

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Set text-to-speech voice to 'hi-IN'
voices = engine.getProperty('voices')
for voice in voices:
    if 'hi-IN' in voice.id:
        engine.setProperty('voice', voice.id)
        break

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return 0, msg

def find_button_location(button_location):
    if button_location is not None:
        button_center = pyautogui.center(button_location)
        pyautogui.moveTo(button_center)
        pyautogui.click()
    else:
        print("Button not found on the screen.")

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")
    if action == "Swipe Up":
        pyautogui.hotkey('volumeup')
        engine.say("Volume Increase")
    elif action == "Swipe Down":
        pyautogui.hotkey('volumedown')
        engine.say("Volume Decrease")
    elif action == "Swipe Right":
#         pyautogui.hotkey('l')
        pyautogui.hotkey('right')
        engine.say("5 seconds forward")
    elif action == "Swipe Left":
#         pyautogui.hotkey('j')
        pyautogui.hotkey('left')
        engine.say("5 seconds backward")
    elif action == "Enter":
        pyautogui.press('space')
        engine.say("Play or Pause")
    elif action == "Backspace":
        # Uncomment and adjust as necessary
        # pyautogui.hotkey('g', 'i')
        # engine.say("Go back")
        pass
    elif action == 'like':
        print('like')
        try:
            like_location = pyautogui.locateOnScreen('like_1.png', confidence=0.8)
            if like_location:
                find_button_location(like_location)
            else:
                like_location = pyautogui.locateOnScreen('like_button1.png', confidence=0.8)
                if like_location:
                    find_button_location(like_location)
                else:
                    like_location = pyautogui.locateOnScreen('Like_dark.png', confidence=0.8)
                    if like_location:
                        find_button_location(like_location)
                    else:
                        print("Like button not found on the screen.")
        except pyautogui.ImageNotFoundException:
            print("Button not found on the screen.")
    elif action == 'dislike':
        print('dislike')
        try:
            dislike_location = pyautogui.locateOnScreen('dis_like.png', confidence=0.8)
            if dislike_location:
                find_button_location(dislike_location)
            else:
                dislike_location = pyautogui.locateOnScreen('dis_like2.png', confidence=0.8)
                if dislike_location:
                    find_button_location(dislike_location)
                else:
                    dislike_location = pyautogui.locateOnScreen('Dis_like_dark.png', confidence=0.8)
                    if dislike_location:
                        find_button_location(dislike_location)
                    else:
                        print("Dislike button not found on the screen.")
        except pyautogui.ImageNotFoundException:
            print("Button not found on the screen.")
    engine.runAndWait()

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)

        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Enter
Executing action: Enter
Swipe Up
Executing action: Swipe Up
Executing action: 
Executing action: 
Swipe Left
Executing action: Swipe Left
Executing action: 
Swipe Right
Executing action: Swipe Right
Swipe Down
Executing action: Swipe Down
Swipe Up
Executing action: Swipe Up
Enter
Executing action: Enter
Like
Executing action: like
like
Swipe Up
Executing action: Swipe Up
Dislike
Executing action: dislike
dislike
Swipe Up
Executing action: Swipe Up
Executing action: 
Executing action: 
Swipe Down
Executing action: Swipe Down
Executing action: 


In [None]:
###########END###################

In [6]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
from gtts import gTTS
import pygame
import time
import os

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Initialize Pygame mixer
pygame.mixer.init()

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return 0, msg

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        tts = gTTS(action, lang='en', tld='co.in')
        tts.save("temp.mp3")
        pygame.mixer.music.load("temp.mp3")
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            time.sleep(0.1)
        os.remove("temp.mp3")

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")
    tts = gTTS("Unknown Action", lang='en', tld='co.in')
    if action == "Swipe Up":
        pyautogui.hotkey('k')
        tts = gTTS("Previous Message", lang='en', tld='co.in')
    elif action == "Swipe Down":
        pyautogui.hotkey('j')
        tts = gTTS("Next Message", lang='en', tld='co.in')
    elif action == "Swipe Right":
        pyautogui.hotkey('g', 'n')
        tts = gTTS("Go to Next Page", lang='en', tld='co.in')
    elif action == "Swipe Left":
        pyautogui.hotkey('g', 'p')
        tts = gTTS("Go to Previous Page", lang='en', tld='co.in')
    elif action == "Enter":
        pyautogui.press('enter')
        tts = gTTS("Open Message", lang='en', tld='co.in')
    elif action == "Backspace":
        pyautogui.hotkey('g', 'i')
        tts = gTTS("Go back", lang='en', tld='co.in')

    tts.save("temp.mp3")
    pygame.mixer.music.load("temp.mp3")
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        time.sleep(0.1)
    time.sleep(1)  # Small delay to ensure file is not being used
    os.remove("temp.mp3")

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Swipe Up
Executing action: Swipe Up


PermissionError: [Errno 13] Permission denied: 'temp.mp3'

In [17]:
    cap.release()
    cv2.destroyAllWindows()

In [None]:
#######Test COde

In [2]:
import pyautogui
import time

# Add a delay to allow the user to switch to the YouTube video
print("You have 5 seconds to switch to the YouTube video window...")
time.sleep(5)

# Simulate pressing the 'L' key
pyautogui.hotkey('l')
print("The 'L' key was pressed.")

You have 5 seconds to switch to the YouTube video window...
The 'L' key was pressed.


In [4]:
!pip install keyboard

Collecting keyboard
  Obtaining dependency information for keyboard from https://files.pythonhosted.org/packages/55/88/287159903c5b3fc6d47b651c7ab65a54dcf9c9916de546188a7f62870d6d/keyboard-0.13.5-py3-none-any.whl.metadata
  Downloading keyboard-0.13.5-py3-none-any.whl.metadata (4.0 kB)
Downloading keyboard-0.13.5-py3-none-any.whl (58 kB)
   ---------------------------------------- 0.0/58.1 kB ? eta -:--:--
   --------------------- ------------------ 30.7/58.1 kB 1.4 MB/s eta 0:00:01
   ---------------------------------------- 58.1/58.1 kB 613.6 kB/s eta 0:00:00
Installing collected packages: keyboard
Successfully installed keyboard-0.13.5


In [None]:
import pyautogui
import keyboard
import time

def like_video():
    # Locate the 'Like' button on the screen using image recognition
    try:
        like_button_location = pyautogui.locateCenterOnScreen('like_button.png', confidence=0.5)
        if like_button_location:
            pyautogui.click(like_button_location)
            print("Clicked the 'Like' button")
        else:
            print("Could not find the 'Like' button")
    except Exception as e:
        print(f"Error: {e}")

# Keyboard event listener
keyboard.add_hotkey('a', like_video)

# Keep the script running
try:
    while True:
        time.sleep(0.1)  # Adjust if needed
except KeyboardInterrupt:
    print("Stopping script")

In [4]:
import pyttsx3

engine = pyttsx3.init()
voices = engine.getProperty('voices')

for voice in voices:
    print("Voice:")
    print(" - ID: %s" % voice.id)
    print(" - Name: %s" % voice.name)
    print(" - Languages: %s" % voice.languages)
    print(" - Gender: %s" % voice.gender)
    print(" - Age: %s" % voice.age)
    print("\n")

Voice:
 - ID: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0
 - Name: Microsoft David Desktop - English (United States)
 - Languages: []
 - Gender: None
 - Age: None


Voice:
 - ID: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0
 - Name: Microsoft Zira Desktop - English (United States)
 - Languages: []
 - Gender: None
 - Age: None




In [6]:
import pyttsx3

engine = pyttsx3.init()
voices = engine.getProperty('voices')

for voice in voices:
    print("Voice:")
    print(" - Name: %s" % voice.name)
    print(" - Languages: %s" % voice.languages)
    print("\n")

Voice:
 - Name: Microsoft David Desktop - English (United States)
 - Languages: []


Voice:
 - Name: Microsoft Zira Desktop - English (United States)
 - Languages: []




In [5]:
import pyttsx3

engine = pyttsx3.init()

# Set properties before adding voice
engine.setProperty('rate', 150)    # Speed percent (can go over 100)
engine.setProperty('volume', 0.9)  # Volume 0-1

# Specify a voice with Indian English accent
engine.setProperty('voice', 'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-IN_KALPANA_11.0')

# Test the voice
engine.say("Hello, how are you?")
engine.runAndWait()

In [14]:
from gtts import gTTS
tts = gTTS('Backspace - moving the right-hand fist from right to left with the thumb pointing towards the left.', lang='en', tld='co.in')
tts.save('hello1.mp3')

In [15]:
# from gtts import gTTS
# from io import BytesIO

# mp3_fp = BytesIO()
# tts = gTTS('Backspace - moving the right-hand fist from right to left with the thumb pointing towards the left.', lang='en',tld='co.in')
# tts.write_to_fp(mp3_fp)

<gtts.tts.gTTS at 0x1c11682e7d0>

In [17]:
!pip install gtts playsound



In [3]:
!pip install pygame

