In [None]:
###########    START     ###################

In [None]:
## Facebook ##

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time  # Import time for adding delay

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Get screen size
screen_width, screen_height = pyautogui.size()

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def pointer(dis, msg):
    first_time = 1
    buffer = 5 if dis < 0 else -2
    while dis < 0.10 and (first_time == 1 or (results.right_hand_landmarks and results.left_hand_landmarks is None)):
        first_time = 0
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        if results.right_hand_landmarks:
            l4 = results.right_hand_landmarks.landmark[4]
            l12 = results.right_hand_landmarks.landmark[12]
            draw_landmarks(image, results, (0, 0, 255))
            dis = eucal(l4, l12)
            if dis < 0.10:
                l1 = results.right_hand_landmarks.landmark[8]
                ix = (screen_width + 10) - (screen_width + 10) * l1.x
                iy = (screen_height + 10) * l1.y
                pyautogui.moveTo(ix, iy)
                if buffer > 0:
                    return 1, msg
        if dis >= 0.10 or results.right_hand_landmarks is None:
            if buffer == -2:
                val, msg = pointer(-1, msg)
                if val == 1:
                    dis = 0
                    first_time = 1
            elif buffer > 0:
                cv2.putText(image, "In rec", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
                if results.right_hand_landmarks:
                    l11 = results.right_hand_landmarks.landmark[11].y
                    l7 = results.right_hand_landmarks.landmark[7].y
                    if l11 < l7:
                        pyautogui.press('ctrl', presses=5)
                        pyautogui.click()
                        msg = "Click"
                        print("click occurred")
                        return 1, msg
                buffer -= 1
                dis = 0
                first_time = 1
            else:
                return 0, msg
        cv2.putText(image, "Action : Pointer", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return 0, msg

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):  
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")  # Add this line for debugging
    if action == "Swipe Up":
        pyautogui.scroll(50)
        engine.say("Page Scroll Up")
    elif action == "Swipe Down":
        pyautogui.scroll(-50)
        engine.say("Page Scroll Down")
    elif action == "Swipe Right":
        pyautogui.hotkey('j')
        engine.say("Next Post")
    elif action == "Swipe Left":
        pyautogui.hotkey('k')
        engine.say("Previous Post")
    elif action == "like":
        pyautogui.hotkey('l')
        engine.say("Like The Post") 
    elif action == "dislike":
        pyautogui.hotkey('l')
        engine.say("DisLike The Post")
#     elif action == "Enter":
        # pyautogui.press('space')  # Example: pause/play
#         engine.say("Press Enter")
    elif action == "Backspace":
        pyautogui.hotkey('esc')
        engine.say("go to back")
#     elif action == "Tab":
#         pyautogui.press('tab')
#         engine.say("Select Button")
#     elif action == "Ctrl_A":
#         pyautogui.hotkey('ctrl', 'a')
#         engine.say("Select all")


   
    engine.runAndWait()
    # time.sleep(0.8)  # Optionally add a delay after each gesture
    
# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            if results.right_hand_landmarks and not results.left_hand_landmarks:
                l4 = results.right_hand_landmarks.landmark[4]
                l12 = results.right_hand_landmarks.landmark[12]
                dis = eucal(l4, l12)
                if dis < 0.10:
                    pointer(dis, "")
                    continue
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [None]:
##gmail##

In [2]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import pygetwindow as gw
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time  # Import time for adding delay

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Set text-to-speech voice to 'hi-IN'
voices = engine.getProperty('voices')
for voice in voices:
    if 'hi-IN' in voice.id:
        engine.setProperty('voice', voice.id)
        break

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):  
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")  # Add this line for debugging
    if action == "Swipe Up":
        pyautogui.hotkey('k')
        engine.say("Previous Message")
    elif action == "Swipe Down":
        pyautogui.hotkey('j')
        engine.say("Next Message")
    elif action == "Swipe Right":
        pyautogui.hotkey('g', 'n')
        engine.say("Go to Next Page")
    elif action == "Swipe Left":
        pyautogui.hotkey('g', 'p')
        engine.say("Go to Previous Page")
    elif action == "Enter":
        pyautogui.press('enter')  # Example: pause/play
        engine.say("Open Message")
    elif action == "Backspace":
        pyautogui.hotkey('g', 'i')
        engine.say("Go back")
    elif action == "Tab":
        pyautogui.press('tab')
        engine.say("Select Button")
    elif action == "Ctrl_A":
        pyautogui.hotkey('ctrl', 'a')
        engine.say("Select all")

    engine.runAndWait()
    # time.sleep(0.8)  # Optionally add a delay after each gesture
    
# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        
        if results.left_hand_landmarks or results.right_hand_landmarks:
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Executing action: 
Executing action: 
Executing action: 
Swipe Right
Executing action: Swipe Right
Tab
Executing action: Tab
Swipe Up
Executing action: Swipe Up
Executing action: 
Swipe Down
Executing action: Swipe Down
Executing action: 
Swipe Up
Executing action: Swipe Up
Swipe Up
Executing action: Swipe Up
Swipe Down
Executing action: Swipe Down
Swipe Down
Executing action: Swipe Down
Swipe Down
Executing action: Swipe Down
Executing action: 
Swipe Left
Executing action: Swipe Left
Swipe Right
Executing action: Swipe Right
Swipe Left
Executing action: Swipe Left
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Enter
Executing action: Enter
Backspace
Executing action: Backspace
Backspace
Executing action: Backspace
Backspace
Executing action: Backspace


In [None]:
##youtube & VLC ##

In [3]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Set text-to-speech voice to 'hi-IN'
voices = engine.getProperty('voices')
for voice in voices:
    if 'hi-IN' in voice.id:
        engine.setProperty('voice', voice.id)
        break

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            print(msg)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
        cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(statuses[gest[0]])
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    frequency = Counter(res)
    for element, count in frequency.items():
        if count == total:
            msg = element
            break
    if msg and msg not in {"none", "closed_fist"}:
        map_stat(msg)
        return 1, ret_keypoints, msg
    else:
        return 0, ret_keypoints, "None"

# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")
    if action == "Swipe Up":
        pyautogui.hotkey('volumeup')
        engine.say("Volume Increase")
    elif action == "Swipe Down":
        pyautogui.hotkey('volumedown')
        engine.say("Volume Decrease")
    elif action == "Swipe Right":
        pyautogui.hotkey('right')
        engine.say("5 seconds forward")
    elif action == "Swipe Left":
        pyautogui.hotkey('left')
        engine.say("5 seconds backward")
    elif action == "Enter":
        pyautogui.press('space')
        engine.say("Play or Pause")
    elif action == "Backspace":
        pass  # No action for backspace
    elif action == 'like':
        print('like')
        try:
            like_location = pyautogui.locateOnScreen('like_1.png', confidence=0.8)
            if like_location:
                pyautogui.click(like_location)
            else:
                like_location = pyautogui.locateOnScreen('like_button1.png', confidence=0.8)
                if like_location:
                    pyautogui.click(like_location)
                else:
                    like_location = pyautogui.locateOnScreen('Like_dark.png', confidence=0.8)
                    if like_location:
                        pyautogui.click(like_location)
                    else:
                        print("Like button not found on the screen.")
        except pyautogui.ImageNotFoundException:
            print("Button not found on the screen.")
    elif action == 'dislike':
        print('dislike')
        try:
            dislike_location = pyautogui.locateOnScreen('dis_like.png', confidence=0.8)
            if dislike_location:
                pyautogui.click(dislike_location)
            else:
                dislike_location = pyautogui.locateOnScreen('dis_like2.png', confidence=0.8)
                if dislike_location:
                    pyautogui.click(dislike_location)
                else:
                    dislike_location = pyautogui.locateOnScreen('Dis_like_dark.png', confidence=0.8)
                    if dislike_location:
                        pyautogui.click(dislike_location)
                    else:
                        print("Dislike button not found on the screen.")
        except pyautogui.ImageNotFoundException:
            print("Button not found on the screen.")
    engine.runAndWait()

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)

        if results.left_hand_landmarks or results.right_hand_landmarks:
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Executing action: 
Swipe Up
Executing action: Swipe Up
Swipe Up
Executing action: Swipe Up
Swipe Down
Executing action: Swipe Down
Swipe Down
Executing action: Swipe Down
Swipe Left
Executing action: Swipe Left
Executing action: 
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Swipe Left
Executing action: Swipe Left
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Swipe Right
Executing action: Swipe Right
Swipe Left
Executing action: Swipe Left
Enter
Executing action: Enter
Enter
Executing action: Enter
Dislike
Executing action: dislike
dislike
Dislike button not found on the screen.
Dislike
Executing action: dislike
dislike
Dislike button not found on the screen.
Enter
Executing action: Enter
Ctrl_A
Executing action: Ctrl_A
Dislike
Executing action: dislike
dislike
Dislike button not found o

In [11]:
import cv2
import numpy as np
import mediapipe as mp
import pyautogui
import math
from tensorflow.keras.models import load_model
import pickle
from collections import Counter, deque
import pyttsx3
import time

# Disable PyAutoGUI failsafe
pyautogui.FAILSAFE = False

# Define actions and statuses
actions = np.array(["Swipe Up", "Swipe Down", "Swipe Left", "Swipe Right", "Backspace", "Tab", "Enter", "Ctrl_A"])
statuses = ["like", "love", "request", "victory", "dislike", "closed_fist", "none"]

# Initialize MediaPipe holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Load pre-trained model and weights
model = load_model(r'AllRemain-LSTMv2.h5')
model.load_weights(r'AllRemain-LSTMv2.h5')

# Load SVM model for static gesture recognition
with open('svm_model.pkl', 'rb') as file:
    smodel = pickle.load(file)

# Load feature vectors
loaded_list = np.load(r'avg_600_feature_vector1.npy', allow_pickle=True)

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Set text-to-speech voice to 'hi-IN'
voices = engine.getProperty('voices')
for voice in voices:
    if 'hi-IN' in voice.id:
        engine.setProperty('voice', voice.id)
        break

# Helper functions
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results, clr):
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=clr))
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=clr))

def draw_styled_landmarks(image, results):
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([lh, rh])

def s_extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    res = np.concatenate([lh, rh])
    for i in range(len(res)):
        if i < 63:
            k = 0
        else:
            k = 63
        if i % 3 == 0:
            res[i] = res[i] - res[k]
        elif i % 3 == 1:
            res[i] = res[i] - res[k + 1]
        elif i % 3 == 2:
            res[i] = res[i] - res[k + 2]
    return res

async def do_map(k):
    action_msgs = {
        "Swipe Up": "Scrolling up",
        "Swipe Down": "Scrolling down",
        "Swipe Right": "Scrolling right",
        "Swipe Left": "Scrolling left",
        "Ctrl_A": "Select All",
        "Tab": "Tab",
        "Backspace": "Backspace",
        "Enter": "Enter"
    }
    return action_msgs.get(actions[k], "None")

async def fun1(initial_sequence, flag):
    sequence = initial_sequence if flag else []
    msg = ""
    prediction_history = deque(maxlen=5)
    action_time = time.time()
    while len(sequence) <= 20:
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image, results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        if len(sequence) == 10:
            if sum(np.all(seq == 0) for seq in sequence) > 5:
                return ""
            sequence = [np.zeros(126)] * 5 + sequence + [np.zeros(126)] * 5
            res = model.predict(np.array([sequence]))
            prediction = np.argmax(res)
            confidence = np.max(res)
            if confidence >= 0.60:
                prediction_history.append(actions[prediction])
            if len(prediction_history) == prediction_history.maxlen:
                most_common_prediction = Counter(prediction_history).most_common(1)[0][0]
                msg = most_common_prediction
            else:
                msg = actions[prediction]
            # Implement debounce to prevent multiple actions
            if time.time() - action_time > 0.5:
                action_time = time.time()
                cv2.putText(image, "Last Gesture : " + msg, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    return msg

def eucal(p1, p2):
    return math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)

def map_stat(msg):
    stat_msgs = {
        "like": "Like",
        "love": "Love",
        "dislike": "Dislike",
        "request": "Request",
        "victory": "Victory",
        "closed_fist": "Fist Closed",
        "none": "None"
    }
    action = stat_msgs.get(msg, "None")
    if action != "None":
        print(action)
        engine.say(action)
        engine.runAndWait()

def static():
    res = []
    total = 5
    msg = ""
    ret_keypoints = []
    for _ in range(total):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results, (0, 255, 0))
        keypoints = s_extract_keypoints(results)
        keypts = extract_keypoints(results)
        ret_keypoints.append(keypts)
        gest = smodel.predict([keypoints])
        res.append(gest[0])  # Append the index of the gesture
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, "Last Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Current Gesture : " + statuses[gest[0]], (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    most_common_gesture_index = Counter(res).most_common(1)[0][0]
    return res, ret_keypoints, statuses[most_common_gesture_index]


# Implement PyAutoGUI actions based on gestures
def execute_action(action):
    print(f"Executing action: {action}")
    if action == "Swipe Up":
        pyautogui.hotkey('volumeup')
        engine.say("Volume Increase")
    elif action == "Swipe Down":
        pyautogui.hotkey('volumedown')
        engine.say("Volume Decrease")
    elif action == "Swipe Right":
        pyautogui.hotkey('right')
        engine.say("5 seconds forward")
    elif action == "Swipe Left":
        pyautogui.hotkey('left')
        engine.say("5 seconds backward")
    elif action == "Enter":
        pyautogui.press('space')
        engine.say("Play or Pause")
    elif action == "Backspace":
        pass  # No action for backspace
    elif action == 'like':
        print('Attempting to like...')
        try:
            for like_img in ['like_1.png', 'like_button1.png', 'Like_dark.png']:
                like_location = pyautogui.locateOnScreen(like_img, confidence=0.8)
                if like_location:
                    pyautogui.click(like_location)
                    print('Liked successfully')
                    break
            else:
                print("Like button not found on the screen.")
        except pyautogui.ImageNotFoundException:
            print("Like button images not found on the screen.")
    elif action == 'dislike':
        print('Attempting to dislike...')
        try:
            for dislike_img in ['dis_like.png', 'dis_like2.png', 'Dis_like_dark.png']:
                dislike_location = pyautogui.locateOnScreen(dislike_img, confidence=0.8)
                if dislike_location:
                    pyautogui.click(dislike_location)
                    print('Disliked successfully')
                    break
            else:
                print("Dislike button not found on the screen.")
        except pyautogui.ImageNotFoundException:
            print("Dislike button images not found on the screen.")
    engine.runAndWait()

# Main code
cap = cv2.VideoCapture(0)
sequence = []
msg = ""
s_msg = ""
c = 0

with mp_holistic.Holistic(min_detection_confidence=0.6, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        cv2.putText(image, "NO HANDS", (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last D-Gesture : " + msg, (3, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "Last S-Gesture : " + s_msg, (3, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Feed', image)

        if results.left_hand_landmarks or results.right_hand_landmarks:
            t_msg = s_msg
            s, keys, s_msg = static()
            if s_msg == "None":
                s_msg = t_msg
            if s == 1:
                execute_action(s_msg)  # Execute static gesture
                continue
            else:
                msg = await fun1(keys, 1)
                execute_action(msg)
            c = 0
        if cv2.waitKey(10) & 0xff == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Executing action: 
Executing action: Enter
Executing action: Swipe Up
Executing action: Enter
Executing action: Enter
Executing action: Swipe Down
Executing action: Swipe Down
Executing action: Swipe Right
Executing action: Swipe Right
Executing action: Swipe Right
Executing action: Swipe Right
Executing action: 
Executing action: 
Executing action: 
Executing action: 
Executing action: 
Executing action: 
Executing action: 
Executing action: Swipe Right
Executing action: Swipe Right
Executing action: 
Executing action: 
Executing action: Swipe Right
Executing action: Swipe Right
Executing action: Swipe Right
Executing action: Backspace
Executing action: Backspace
Executing action: Backspace
Executing action: 


In [10]:

    cap.release()
    cv2.destroyAllWindows()
