In [1]:
import cv2
import numpy as np
import mediapipe as mp

In [2]:
import math

In [3]:
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

In [4]:
devices = AudioUtilities.GetSpeakers()

interface = devices.Activate(
                              IAudioEndpointVolume._iid_ , 
                              CLSCTX_ALL , 
                              None
                             )

volume = cast(interface, POINTER(IAudioEndpointVolume))

In [5]:
vol_Range = volume.GetVolumeRange()
minV = vol_Range[0]
maxV = vol_Range[1]

In [6]:
mpHands = mp.solutions.hands
mpDraw = mp.solutions.drawing_utils

In [7]:
hands = mpHands.Hands(
                       static_image_mode=False,  
                       max_num_hands=1,
                       min_detection_confidence = 0.7 ,
                       min_tracking_confidence = 0.5 
                     )

In [8]:
tipIds = [4, 8, 12, 16, 20]

In [9]:
def findHands(img ,draw=True):
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)
    
    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks:
            if draw:
                mpDraw.draw_landmarks(img, handLms,mpHands.HAND_CONNECTIONS)
    
    return img,results

In [10]:
def findPosition(img ,results,draw=True):
    xList = []
    yList = []
    bbox = []
    
    lmList = []
    
    h, w = img.shape[:2]
    
    if results.multi_hand_landmarks:
        myHand = results.multi_hand_landmarks[0]
        
        for id, lm in enumerate(myHand.landmark):
            cx, cy = int(lm.x*w), int(lm.y*h)

            xList.append(cx)
            yList.append(cy)

            lmList.append([id, cx, cy])

        xmin, xmax = min(xList), max(xList)
        ymin, ymax = min(yList), max(yList)
        
        # bounding boxes
        bbox = xmin,ymin,xmax,ymax
        
        if draw :
            cv2.rectangle(img ,(bbox[0]-20,bbox[1]-20) ,(bbox[2]+20,bbox[3]+20), (0,255,0) ,2)
    return lmList , bbox

In [11]:
def findDistance(lmList,p1,p2,img,draw=True):
    x1,y1 = lmList[p1][1] , lmList[p1][2]
    x2,y2 = lmList[p2][1] , lmList[p2][2]


    cx,cy = (x1+x2)//2 ,(y1+y2)//2
    
    if draw:
        cv2.circle(img ,(x1,y1) ,5 ,(0,0,0) ,cv2.FILLED)
        cv2.circle(img ,(x2,y2) ,5 ,(0,0,0) ,cv2.FILLED)
        cv2.circle(img ,(cx,cy) ,5 ,(0,0,0) ,cv2.FILLED)

        cv2.line(img ,(x1,y1) ,(x2,y2) ,(230,85,73) ,3)

    length = math.hypot(x2-x1,y2-y1)
    
    return length,img ,[x1,y1,x2,y2,cx,cy]

In [12]:
def fingersUp(lmList):
    
    fingers = []
    
    # Thumb
    if lmList[tipIds[0]][1] > lmList[tipIds[0] - 1][1]:
        fingers.append(1)
    else:
        fingers.append(0)
    
    # 4 Fingers
    for id in range(1, 5):
        if lmList[tipIds[id]][2] < lmList[tipIds[id] - 2][2]:
            fingers.append(1)
        else:
            fingers.append(0)
            
    return fingers

In [13]:
cap = cv2.VideoCapture(0)

cap.set(3,1080)
cap.set(4,1080)

vol = 0
volBar = 400
volPer = 0

volColor = (255,0,0)

while True:
    _,img = cap.read()
    
    # Find Hands
    img,results = findHands(img)
    
    lmList ,bbox = findPosition(img ,results)
    if len(lmList)!=0:
        
        # Filter based on size
        area = (bbox[2]-bbox[0])*(bbox[3]-bbox[1]) // 100
        if 250<area<1000 :
            
            # Find Distance between index and Thumb
            length ,img ,line_info = findDistance(lmList,4,8,img)
            
            # Convert Volume 
            volBar = np.interp(length ,[50,200] ,[400,150])
            volPer = np.interp(length ,[50,200] ,[0,100])
            
            # Reduce Resolution to make it smoother
            smoothness = 5
            volPer = smoothness * round(volPer/smoothness)
            
            # Check Fingers
            fingers = fingersUp(lmList)
            
            # If Pinky finger is down
            if not fingers[4]:
                volume.SetMasterVolumeLevelScalar(volPer / 100, None)
                cv2.circle(img ,(line_info[4],line_info[5]) ,5 ,(0,255,0) ,cv2.FILLED)
                volColor = (0,255,0)
            else:
                volColor = (255,0,0)
                
    # Drawings
    cv2.rectangle(img ,(50,150) ,(85,400) ,(0,255,0) ,3)
    cv2.rectangle(img ,(50,int(volBar)) ,(85,400) ,(0,255,0) ,cv2.FILLED)
    cv2.putText(img , f'{int(volPer)}%' ,(40,450) ,cv2.FONT_HERSHEY_COMPLEX ,1 ,(0,255,0) ,3)
    
    cVol = int(volume.GetMasterVolumeLevelScalar()*100)
    cv2.putText(img ,f'Vol Set : {int(cVol)}' ,(400,50) ,cv2.FONT_HERSHEY_COMPLEX ,1 ,volColor,3)
    
            
    cv2.imshow('Gesture Volume Control',img)
    if cv2.waitKey(1)&0xff == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()