# A simple CV based volume controller.

In [1]:
import cv2
import mediapipe as mp
import time
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
from pycaw.pycaw import AudioUtilities, ISimpleAudioVolume
import numpy as np
  
def fingup(lml,finindex):
    re=[]
    if lml[4][1]<lml[3][1]:
        re.append(1)
    else:
        re.append(0)
    for i in range(len(finindex)):
        if lml[finindex[i]][2]<lml[finindex[i]-2][2]:
            re.append(1)
        else:
            re.append(0)
    return re
    
def handpos(results, img, draw=True):
    lml=[]
    if results.multi_hand_landmarks:
        for handlms in results.multi_hand_landmarks:
            mpdraw.draw_landmarks(img, handlms,mpHands.HAND_CONNECTIONS)
            for _id, lm in enumerate(handlms.landmark):
                #print(_id,lm)
                #to know the center of the location of the points
                h,w,c=img.shape
                cx, cy = int(lm.x*w), int(lm.y*h)
                lml.append([_id,cx,cy])
                #if draw:
                    #cv2.circle(img,(cx,cy),5,(255,100,200),cv2.FILLED)
    return lml
def getdist(lml,p1,p2,img,draw=True,rad=12,thic=3):
    x1,y1=lml[p1][1:]
    x2,y2=lml[p2][1:]
    c1,c2=(x1+x2) //2,(y1+y2)//2
           
    if draw:
        cv2.line(img,(x1,y1),(x2,y2),(5,100,200),thic)
        cv2.circle(img,(x1,y1),rad,(5,100,200),cv2.FILLED)
        cv2.circle(img,(x2,y2),rad,(5,100,200),cv2.FILLED)
        cv2.circle(img,(c1,c2),rad-2,(255,0,25),cv2.FILLED)
    dist= math.hypot(x2-x1,y2-y1)
    return dist,img,[x1,x2,y1,y2,c1,c2]
def vol(volu):
    volume.SetMasterVolumeLevel(volu, None)


In [None]:

cap= cv2.VideoCapture(0)

mpHands = mp.solutions.hands
hands= mpHands.Hands()
mpdraw= mp.solutions.drawing_utils    #for drawing dots and connections
lmltop=[]
finindex=[8,12,16,20]

ptime=0
ctime=0

ndist=0


devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
    IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volRange = volume.GetVolumeRange()
minVol = volRange[0]
maxVol = volRange[1]


while True:
    success, img1 = cap.read()
    img = cv2.flip(img1, 1)
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)
    #print( results.multi_hand_landmarks)
    #hand land marks
    volper=int(volume.GetMasterVolumeLevelScalar()*100)
    volbar = np.interp(volper, [0, 100], [400, 150])

    lml=handpos(results,img)
    cv2.rectangle(img, (40,140), (95, 410), (255,255,255), 3)
    cv2.putText(img,'Gesture-based volume Controller',(40,40),cv2.FONT_HERSHEY_PLAIN,2,(255,255,255),2)
    cv2.putText(img,str(int(volper))+'%',(28,120),cv2.FONT_HERSHEY_PLAIN,3,(255,255,255),3)
    cv2.rectangle(img, (50,int(volbar)), (85, 400), (0,255,0), cv2.FILLED)
    if len(lml) !=0:
        finup=fingup(lml,finindex)
        if finup == [1,1,0,0,0]:
            ndist, img, pt=getdist(lml,4,8,img)
            volu = np.interp(ndist, [50, 150], [minVol,maxVol])
            
            if ndist > 50:
                vol(volu)
    ctime=time.time()
    fps=1/(ctime-ptime)
    ptime=ctime
    
    #cv2.putText(img,str(int(fps)),(10,70),cv2.FONT_HERSHEY_PLAIN,3,(255,0,255),3)
    cv2.imshow("Image",img)
    k = cv2.waitKey(1)
    if k == 27:
        cv2.destroyAllWindows()
        break