In [3]:
import cv2 #opencv imported
import numpy as np #scientific computing
import math #mathematical functions
from pygame import mixer #for playing the sound

capture = cv2.VideoCapture(0) #To capture video stream from 1st webcam

mixer.pre_init(44100, -16, 1, 512) #preset the mixer init arguments
mixer.init() #initialize the mixer module (basically a constructor)
sound1 = mixer.Sound('RELATIVE.wav') #sound to be played for sign 1
sound2 = mixer.Sound('DELIVERY PERSONNEL.wav') #sound to be played for sign 2
sound3 = mixer.Sound('CLEANER.wav') #sound to be played for sign 3
sound4 = mixer.Sound('NEWSPAPER.wav') #sound to be played for sign 4
sound5 = mixer.Sound('SALESMAN.wav') #sound to be played for sign 5

count=0 #to keep track of no of video frames passed
lis=[] #to store predictions for each frame 
#count and lis are just a way to add a delay 
     
while(1): #infinite loop to be broken later by break statement
        
    try:  #to handle the error that arises when it cannot detect a sign and hence cannot compute the countours
        
        count = count + 1 #incrementing count
        ret, frame = capture.read() #ret stores in boolean if there is a return frame available, frame stores the video frame
        frame=cv2.flip(frame,1) #frame is flipped 
        kernel = np.ones((3,3),np.uint8) #for morphological transformations (dilating the image) . Structuring info
        #define region of interest
        ROI=frame[100:300, 100:300]
        cv2.rectangle(frame,(100,100),(300,300),(0,255,0),0) #green rectangle indicating ROI    
        HSV_Space = cv2.cvtColor(ROI, cv2.COLOR_BGR2HSV) #convert RGB to HSV 
        Skin_HSV_Range_Lower = np.array([0,10,60], dtype=np.uint8) #define upper range of skin color in HSV
        Skin_HSV_Range_Upper = np.array([20,150,255], dtype=np.uint8) #define lower range of skin color in HSV                    
        mask_frame = cv2.inRange(HSV_Space, Skin_HSV_Range_Lower, Skin_HSV_Range_Upper) #extract the image falling within the range
        mask_frame = cv2.dilate(mask_frame,kernel,iterations = 4) #thicken the mask 
        mask_frame = cv2.GaussianBlur(mask_frame,(5,5),100) #blur the image
        # There are three parameters in cv2.findContours() function, first one is source image, second is contour retrieval mode, 
        # third is contour approximation method.
        _,contours,hierarchy= cv2.findContours(mask_frame,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) #find contours
        MAX_cnt = max(contours, key = lambda x: cv2.contourArea(x)) #find the contour of maximum area( which is our hand)
        epsilon = 0.0005*cv2.arcLength(MAX_cnt,True) 
        approximate= cv2.approxPolyDP(MAX_cnt,epsilon,True) #apply little approximation
        # Convex Hull -> In geometry, the convex hull or convex envelope or convex closure of a shape is the smallest convex set 
        # that contains it.
        Convex_Hull = cv2.convexHull(MAX_cnt) #define Convex Hull around the hand
        Area_Hull = cv2.contourArea(Convex_Hull) #compute the area of Hull
        Area_cnt = cv2.contourArea(MAX_cnt) #compute the area of Hand (of whose contour points we found earlier)
        Area_Ratio=((Area_Hull-Area_cnt)/Area_cnt)*100 #find the percentage of area not covered by Hand in Convex Hull
        Convex_Hull = cv2.convexHull(approximate, returnPoints=False)    
        defects = cv2.convexityDefects(approximate, Convex_Hull) #compute the defects in Convex Hull w.r.t to the Hand
        # defect_count = no. of defects
        defect_count=0 
        
        #code for finding no. of defects due to fingers
        for x in range(defects.shape[0]): #.shape returns dimensions of the array
            s,e,f,d = defects[x,0] #cv2.convexityDefects returns an array where each row contains these values -
            #[ start point, end point, farthest point, approximate distance to farthest point ].                       
            start = tuple(approximate[s][0]) #creating a tuple so that value doesnt change
            end = tuple(approximate[e][0])
            far = tuple(approximate[f][0])
            pt = (100,180) 
            
            
            
            a = math.sqrt((end[0] - start[0])**2 + (end[1] - start[1])**2) #find length of all sides of triangle
            b = math.sqrt((far[0] - start[0])**2 + (far[1] - start[1])**2) #using distance formula
            c = math.sqrt((end[0] - far[0])**2 + (end[1] - far[1])**2)
            s = (a+b+c)/2 #semi-perimeter
            ar = math.sqrt(s*(s-a)*(s-b)*(s-c)) #applying heron's formula to compute area of triangle
            d=(2*ar)/a #distance between defect point and convex hull
            angle = math.acos((b**2 + c**2 - a**2)/(2*b*c)) * 57 # apply cosine rule here (1radian =57degree)
            if angle <= 90 and d>30: # ignore angles > 90 and ignore points very close to convex hull(generally noise)
                defect_count += 1 #increment defect count
                cv2.circle(ROI, far, 3, [255,0,0], -1) #small blue dots to denote the far point of a defect
            cv2.line(ROI,start, end, [0,255,0], 2) #draw lines around hand
        fingers = defect_count + 1 #no of fingers = no of defect_count + 1 
        
        #predict corresponding signs which are in their ranges
        font = cv2.FONT_HERSHEY_SIMPLEX #define a font
        if fingers==1:
            if Area_cnt<2000: #no hand object is detected 
                cv2.putText(frame,'Put hand in the box',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)    
            else:
                if Area_Ratio<12: #fist is almost closed, hence output 0
                    cv2.putText(frame,'0',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
                else:
                    cv2.putText(frame,'1',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
                    lis.append(1)
                    
                    
        elif fingers==2:
            cv2.putText(frame,'2',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
            lis.append(2)
            
            
        elif fingers==3:
            cv2.putText(frame,'3',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
            lis.append(3)
                    
        elif fingers==4:
            cv2.putText(frame,'4',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
            lis.append(4)
            
            
        elif fingers==5:
            cv2.putText(frame,'5',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
            lis.append(5)
            
            
        elif fingers==6:
            cv2.putText(frame,'reposition',(0,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
            
        else :
            cv2.putText(frame,'reposition',(10,50), font, 2, (0,0,255), 3, cv2.LINE_AA)
            
        #sound_playing    
        if (count % 200 == 0): #allows us to add a little delay to the playing of sounds
            a = max(set(lis), key = lis.count) #finding the value with maximum occurences
            if(a==1):
                sound1.play() #play sound for RELATIVE
            elif(a==2):
                sound2.play() #play sound for DELIVERY PERSONNEL
            elif(a==3):
                sound3.play() #play sound for CLEANER
            elif(a==4):
                sound4.play() #play sound for SALESMAN
            else:
                sound5.play() #play sound for NEWSPAPER
            lis.clear()
                
            
        #show the windows
        cv2.imshow('mask',mask_frame)
        cv2.imshow('frame',frame)
        
    except:
        pass #if an error occurs in a particular frame, then continue to the next frame 
        
    k = cv2.waitKey(5) & 0xFF #Hitting 'ESC' breaks the loop 
    if k == 27:
        break
    
cv2.destroyAllWindows()
capture.release()  