In [1]:
import cv2
import time
import numpy as np
import mediapipe as mp
import HandGesture as hg
import math

# all of this is directly taken from AndreMiras github library repo
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

In [2]:
class handDetector():
    def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.modelComplexity = int(modelComplexity)
        self.detectionCon = float(detectionCon)
        self.trackCon = float(trackCon)

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(
            static_image_mode=self.mode,
            max_num_hands=self.maxHands,
            model_complexity=self.modelComplexity,
            min_detection_confidence=self.detectionCon,
            min_tracking_confidence=self.trackCon
        )
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
        return img

    def findPosition(self, img, handNo=0, draw=True):
        lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmList.append([id, cx, cy])
                if draw:
                    cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
        return lmList

In [3]:
CamWidth, Camheight = 640, 480 # width and height of the cam screen on laptop screen
cap = cv2.VideoCapture(0)
cap.set(3, CamWidth) # 3 is the property id here means id of camwidth is 3
cap.set(4, Camheight) # same goes for 4 here
start = 0

detect = handDetector(detectionCon=0.7, trackCon=0.7) # handdetector imported from script & detectionCon=0.7 means it will show only if there is 70% probability of hand 


# all of this is directly taken from AndreMiras github library repo after importing the above requirements
device = AudioUtilities.GetSpeakers()
interface = device.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# volume.GetMute()  (didn't required)
# volume.GetMasterVolumeLevel()   (didn't required)
volRange = volume.GetVolumeRange()  # volume range here -65.25 is min and 0 is max so at -65.25 volume will be 0 and at 0 it will be 100

# stored the min-65.25 and max 0 in varibales
minVol = volRange[0] 
maxVol = volRange[1]
vol = 0
volBar = 400 # volume bar on side of screen
volPer = 0 # volume percentage

In [4]:
while True:
    success, img = cap.read()
    img = detect.findHands(img)
    
    l = detect.findPosition(img, draw=False) # it is a landmark list i.e., coordinates returned by function findPosition
    if len(l) != 0:
        # print(l[4], l[8])  4th and 8th lm positions in each frame i.e., thumb tip(4th lm)and index finger tip(8th lm) given in mediapipe documentation

        # as we will be using thumb and index finger for voice control we are storing their coordinates in variables(in list they are stored as {id,x,y})
        x1, y1 = l[4][1], l[4][2]
        x2, y2 = l[8][1], l[8][2]
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 # centre of the line between index and thumb

        # circle on the given coordinates of 15 radius and (255, 0, 255) color
        cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
        cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
    
        cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) # line between them (between thumb and undex finger)
        cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)

        
        # so we need to know the length between these two points to control volume acc to the length of the line between them for which we use hypot fun
        length = math.hypot(x2 - x1, y2 - y1)

        # Hand range 50 - 300 minimum 50 and maximum 100
        # Volume Range -65.25 - 0
        # have to convert the above range into -65.25 to 0 one

        vol = np.interp(length, [50, 300], [minVol, maxVol]) # when index and thumb are touched then volume will be -65.25 and length will be 50 so minVol and maxVol for opposite
        volBar = np.interp(length, [50, 300], [400, 150]) # it shows a volume bar on the side of screen
        volPer = np.interp(length, [50, 300], [0, 100])   # volume percentage
        # print(int(len), vol)
        volume.SetMasterVolumeLevel(vol, None)  # this will set the volume level basically it is the function which will control the volume in pur system

        if length < 50:  # when length<50 color is changed to green
            cv2.circle(img, (cx, cy), 15, (0, 255, 0), cv2.FILLED)

    cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3) # empty rectangle  
    cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED) # rectangle filled as we increase the volume or vice versa
    cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX, # percentage is shown
                1, (255, 0, 0), 3)


    curr = time.time()
    fps = 1 / (curr - start)
    start = curr
    cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX,
                1, (255, 0, 0), 3)

    if not success: break
    cv2.imshow("Image", img)
    if cv2.waitKey(1) & 0xFF == ord('q'): break

