<a href="https://colab.research.google.com/github/Satorumi/Machine-Learning/blob/main/Computer_Vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Import Libraries

In [None]:
!pip install mediapipe

In [9]:
import cv2 as cv
import mediapipe as mp
import time
import math
import os

#### Hand-Tracking

In [None]:
capture = cv.VideoCapture(1)
mpHands = mp.solutions.hands
hands = mpHands.Hands() # keep default parameter
mpDraw = mp.solutions.drawing_utils

while True:
  _, img = capture.read()
  imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
  results = hands.process(imgRGB)
  h, w, c = img.shape

  if results.multi_hand_landmarks #if detected multi hand
    for hand_lm in results.multi_hand_landmarks:
      for index, lm in enumerate(hand_lm.landmark): # each landmark has x, y, z values
        cx, cy = int(lm.x*w), int(lm.y*h)
        if index == 4: # the tip of the thumb
          cv.circle(img, 25, (0,0,255), -1) # draw a circle
      # draw connected line on detected hand
      mpDraw.draw_landmarks(img, hand_lm, mpHands.HAND_CONNECTIONS) 

  # compute fps
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previoud_time = current_time
  # display fps
  cv.putText(img, str(int(fps)), org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,255), thickness=2)


Hand-tracking Module

In [None]:
class handDetector():
  def __init__(self, mode=False, max_hand=2, min_detection_confidence=0.5, min_tracking_confidence=0.5):
    self.mode = mode
    self.max_hand = max_hand
    self.min_detection_confidence=min_detection_confidence
    self.min_tracking_confidence=min_tracking_confidence

    self.mpHands = mp.solutions.hands
    self.hands = self.mpHands.Hands(self.mode, self.max_hand, self.min_detection_confidence, self.min_tracking_confidence)
    self.mpDraw = mp.solutions.drawing_utils

  def hand_detect(self, img, draw=True):
    imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    self.results = self.hands.process(imgRGB)
    h, w, c = img.shape

    if self.results.multi_hand_landmarks 
      for hand_lm in self.results.multi_hand_landmarks:
        if draw:
          self.mpDraw.draw_landmarks(img, hand_lm, mpHands.HAND_CONNECTIONS) 
      return img

  def landmarks_detect(self, img, landmark_num=0, draw=True):
    landmarks = []
    if self.results.multi_hand_landmarks:
      hand = self.results.multi_hand_landmarks[landmark_num]
      for index, lm in enumerate(hand.landmark): 
          cx, cy = int(lm.x*w), int(lm.y*h)
          landmarks.append([id, cx, cy])
          if draw: 
            cv.circle(img, 25, (0,0,255), -1) 
    return landmarks

In [None]:
prev_time = 0
capture = cv.VideoCapture(1)
handDetector = handDetector()
while True:
  _, img = capture.read()
  handDetect.hand_detect(img)
  landmarks = handDetect.landmark_detect(img)
  if landmarks:
    print(landmarks[0]) # print location of specific landmark

  cur_time = time.time()
  fps = 1 / (cur_time-prev_time)
  prev_time = cur_time
  cv.putText(img, str(int(fps)), org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,255), thickness=2)


####Pose Estimation

In [None]:
cap = cv.VideoCapture('video_path')
mpPose = mp.solution.pose
pose = mpPose.Pose()
mpDraw = mp.solutions.drawing_utils

while True:
  _, img = cap.read()
  imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
  results = pose.process(imgRGB)
  w, h, c = img.shape
  
  landmarks = []
  if result.pose_landmarks:
    for index, lm in result.pose_landmarks:
      cx, cy = int(lm.x*w), int(lm.y*h)
      landmarks.append(index, cx, cy, lm.z])
      if index == 2: # left-eye
        cv.circle(img, (cx,cy), radius=10, (0,0,255), 1)
    mpDraw.draw_landmarks(img, result.pose_landmarks, mpPose.POSE_CONNECTIONS)

  #compute fps
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previous_time = current_time
  # display fps
  cv.putText(img, str(int(fps)), org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,255), thickness=2)
  cv.waitKey(1)

print(landmarks[14])  # the elblow
cv.circle(img, (landmarks[14][1], landmarks[14][2]),  15, (0,0,255), -1) # tracking the elbow


Pose Estimation Module

In [None]:
class poseDetector():
    def __init__(self, mode=False, upBodyOnly=False, smooth=True,
                 detectionConfidence=0.5, trackingConfidence=0.5):
        self.mode = mode
        self.upBodyOnly = upBodyOnly
        self.smooth = smooth
        self.detectionConfidence = detectionConfidence
        self.trackingConfidence = trackingConfidence
        self.mpDraw = mp.solutions.drawing_utils
        self.mpPose = mp.solutions.pose
        self.pose = self.mpPose.Pose(self.mode, self.upBodyOnly, self.smooth, self.detectionConfidence, self.trackingConfidence)

    def pose_detect(self, img, draw=True):
      imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
      self.results = self.pose.process(imgRGB)
      if self.results.pose_landmarks:
        if draw:
          mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)

    def landmark_detect(self, img, draw=True):
      self.landmarks = []
      if self.results.pose_landmarks::
        for index, lm in self.results.pose_landmarks:
          h, w, c = img.shape
          cx, cy = int(lm.x*w), int(lm.y*h)
          self.landmarks.append([index, cx, cy])
          if draw:
            cv.circle(img, (cx, cy), 5, (255, 0, 0), -1)
      return self.landmarks


#### Face Detection

In [1]:
previous_time = 0
cap = cv.VideoCapture('video_path')
mpFaceDetection = mp.solution.face_detection
faceDetection = mpFaceDetection.FaceDetection()
mpDraw = mp.solutions.drawing_utils

while True:
  _, img = cap.read()
  imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
  results = faceDetection.process(imgRGB)
  h, w, c = img.shape
  
  detections = {}
  if results.detections:
    for index, detection in enumerate(results.detections):
      box = detection.location_data.relative_bounding_box # acees to info in dict
      x, y, w, h = int(box.xmin * w), int(box.ymin * h),
                   int(box.width * w), int(box.height * h)
      x1, y1 = x + w, y + h

      # draw rectangle box around face
      cv.rectangle(img, (x, y), (w, h), (255, 0, 255), 2)
      cv.line(img, (x, y), (x, y+20), (255, 0, 255), 5)
      cv.line(img, (x, y), (x+20, y), (255, 0, 255), 5)
      # display detection score on top of box
      cv.putText(img, f'{int(detection_score[0]/100}%',
                (x, y - 20), cv.FONT_HERSHEY_PLAIN,
                3, (255, 0, 255), 2) 

  
  # compute fps
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previous_time = current_time
  # display fps
  cv.putText(img, str(int(fps)), org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,255), thickness=2)
  cv.waitKey(1)

IndentationError: ignored

####Face Mesh

In [None]:
cap = cv.VideoCapture('video_path')
mpFaceMesh = mp.solution.face_mesh
FaceMesh = mpFaceMesh.FaceMesh(max_num_face=2)
mpDraw = mp.solutions.drawing_utils
drawSpec = mpDraw.DrawingSpec(thickness=2, circle_radius=1) # face display

while True:
  _, img = cap.read()
  imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
  results = faceDetection.process(imgRGB)
  h, w, c = img.shape

  faces = []
  if results.multi_face_landmarks:
    for face_lms in results.multi_face_landmarks:
      mpDraw.draw_landmarks(img, face_lms, mp.FaceMesh.FACE_CONNECTIONS)
      
      landmarks = []
      for index, lm in face_lms.landmark:
        x, y = int(lm.x * w), int(lm.y * h)
        landmarks.append([index, x, y])
        # display idex of landmark
        cv.putText(img, str(index), (x, y), cv.FONT_HERSHEY_COMPLEX, 0.5, (0,255,0), 3)
      faces.append(landmarks)
  

  # compute fps
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previoud_time = current_time
  # display fps
  cv.putText(img, f'FPS: {str(int(fps))}', org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,255), thickness=2)
  cv.waitKey(1)



#### Project #1 - Gesture Control

In [None]:
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

In [None]:
cap = cv.CaptureVideo(0)

# define volume
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volRange = volume.GetVolumeRange()
minVol = volRange[0] # - 65
maxVol = volRange[1] # 0
vol = 0
volBar = 400
volPer = 0 # default volume percentage

camera_width, camera_height = 1280, 720 # define camera size
# set camera size
cap.set(3, camera_width)
cap.set(4, camera_height)
previous_time = 0

# using HandTracking Module
handDetector = handDetector()

while True:
  _, img = capture.read()
  handDetect.hand_detect(img)
  landmarks = handDetect.landmarks_detect(img, draw=False)
  if landmarks: # if not None
    # get x, y position of specific landmark
    x1, y1 = landmarks[4][1], landmarks[4][2]  # the thumb
    x2, y2 = landmarks[8][1], landmarks[8][2] # index finger
    cx, cy = (x1+x2) //2, (x1+x2) //2 # get the midpoint

    cv.circle(img, (x1, y1), 7, (0, 0, 255), -1) # circle at thumb
    cv.circle(img, (x2, y2), 7, (0, 0, 255), -1) # circle at index finger
    cv.circle(img, (cx, cy), 7, (0, 0, 255), -1) # circle in the middle
    cv.line(img, (x1, y1), (x2, y2), (0, 0, 255), 2)

    length = math.hypot(x2 - x1, y2 - y1) # distance bewtween two fingers
    if length < 50:  # change color if smaller distance
      cv.circle(img, (cx, cy), 7, (0, 255, 0), -1)

    # hand range from 50 - 300
    # vol range -65 - 0

  vol = np.interp(length, [50, 300], [minVol, max_Val])
  volBar = np.interp(length, [50, 300], [400, 150]) # range for bar vol
  volPer = np.interp(length, [50, 300], [0, 100])

  volume.SetMasterVolumeLevel(vol, None)

    # create the volume bar on the image
  cv.rectangle(img, (50, 150), (85, 400), (255,0,0), 3)
  cv.rectangle(img, (50, int(vol)), (85, 400), (255,0,0), 3)
  # display vol percentage below vol bar
  cv.putText(img, f'{int(volPer)}%', org=(40, 450), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,0), thickness=2)
  
  # compute fps
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previous_time = current_time
  # display fps
  cv.putText(img, f'FPS: {str(int(fps))}', org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,0), thickness=2)
  cv.waitKey(1)


In [None]:
# update handDetector class
class handDetector():
  def __init__(self, mode=False, max_hand=2, min_detection_confidence=0.5, min_tracking_confidence=0.5):
    # define congifure
    self.mode = mode
    self.max_hand = max_hand
    self.min_detection_confidence=min_detection_confidence
    self.min_tracking_confidence=min_tracking_confidence

    self.mpHands = mp.solutions.hands
    self.hands = self.mpHands.Hands(self.mode, self.max_hand, self.min_detection_confidence, self.min_tracking_confidence)
    self.mpDraw = mp.solutions.drawing_utils

  def hand_detect(self, img, draw=True):
    imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    self.results = self.hands.process(imgRGB)

    if self.results.multi_hand_landmarks 
      for hand_lm in self.results.multi_hand_landmarks:
        if draw:
          self.mpDraw.draw_landmarks(img, hand_lm, mpHands.HAND_CONNECTIONS) 
      return img

  def landmarks_detect(self, img, landmark_num=0, draw=True):
    self.landmarks = [] # a list to store all landmarks

    if self.results.multi_hand_landmarks:
      hand = self.results.multi_hand_landmarks[landmark_num]
      h, w, c = img.shape
      for index, lm in enumerate(hand.landmark): 
          cx, cy = int(lm.x*w), int(lm.y*h)
          self.landmarks.append([id, cx, cy])
          if draw: 
             cv2.circle(img, (cx, cy), 5, (255, 0, 255), -1) 
    return self.landmarks

  def find_distance(self, img, point1, point2, draw=True):
    x1, y1 = self.landmarks[point1][1], self.landmarks[point1][2]
    x2, y2 = self.landmarks[point2][1], self.landmarks[point2][2]
    cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 # compute midpoint between two points
    if draw:
      cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED) # point 1
      cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED) # point 2
      cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) # line connect
      cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED) # middle point 
    distance = math.hypot(x2 - x1, y2 - y1) # distance between
    locations = [x1, y1, x2, y2, cx, cy]
    return distance, img, locations
    

#### Project #2: Finger Counter

In [None]:
cap = cv.CaptureVideo(0)
camera_width, camera_height = 1280, 720 # define camera size
# set camera size
cap.set(3, camera_width)
cap.set(4, camera_height)
previous_time = 0

folder_path = r'folder_path' # all images with hand counting from 0-5
folder = os.listdir(folder_path)
img_list = []
for img_path in folder:
  img = cv.imread(f'{folder_path}/{img_path}') # image path
  img_list.append(img)
 
# using HandTracking Module
handDetector = handDetector()

while True:
  _, img = capture.read()
  handDetect.hand_detect(img)
  landmarks = handDetect.landmarks_detect(img, draw=False)
  if landmarks: # if not None
  
    finger_points = [4, 8, 12, 16, 20] # all fingers tips
    counting = 0 # keep track of counting finger
    for point in finger_points:
      if point == 4: # the thumb
        if landmarks[4][1] > landmarks[3][1]: # check if the tips is on the righ
          counting += 1 # increase the counting finger 
      elif landmarks[point][2] < landmarks[point-2][2]:
        counting += 1 # increase the counting finger 

    # display counting image on top-left
    h, w, c = img_list[1].shape
    img[0:w, 0:h] = img_list[counting]
    # display the number of finger
    cv.rectangle(img, (0,200), (0,350), (0,0,255), -1)
    cv.putText(img, str(counting), (30, 170), cv.FONT_HERSHEY_COMPLEX, 1.2, (255,0,0), 20)



  
  # compute fps
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previous_time = current_time
  # display fps
  cv.putText(img, f'FPS: {str(int(fps))}', org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,0), thickness=2)
  cv.waitKey(1)

In [None]:
# update Hand tracking module
def finger_counter(self, img, counting_img=True, draw=True):
  folder_path = r'folder_path' # all images with hand counting from 0-5
  folder = os.listdir(folder_path)
  img_list = []
  for img_path in folder:
    img = cv.imread(f'{folder_path}/{img_path}') # image path
    img_list.append(img)

  if self.landmarks:  
    finger_points = [4, 8, 12, 16, 20]
    counting = 0 
    for point in finger_points:
      if point == 4:
        if landmarks[4][1] > landmarks[3][1]: 
          counting += 1 
      elif landmarks[point][2] < landmarks[point-2][2]:
        counting += 1 

    if counting_img:
      h, w, c = img_list[1].shape
      img[0:w, 0:h] = img_list[counting]
    if draw:
      cv.rectangle(img, (0,200), (0,350), (0,0,255), -1)
      cv.putText(img, str(counting), (30, 170), cv.FONT_HERSHEY_COMPLEX, 1.2, (255,0,0), 20)

####Project #3 - AI Personal Trainer


In [1]:
# Update PoseDetector Module
def angle_detect(self, img, point1, point2, point3, bar=True, draw=True):
  # Get the landmarks
  x1, y1 = self.lmList[point1][1:]
  x2, y2 = self.lmList[point2][1:]
  x3, y3 = self.lmList[point3][1:]
  points = [[x1, y1], [x2, y2], [x3, y3]]

  # Calculate the Angle
  self.angle = math.degrees(math.atan2(y3 - y2, x3 - x2) -
                      math.atan2(y1 - y2, x1 - x2))
  if self.angle < 0:
    self.angle += 360

  if draw:
    cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 3)
    cv2.line(img, (x3, y3), (x2, y2), (255, 255, 255), 3)

    for x, y in points: # loop through every point
      cv2.circle(img, (x, y), 8, (0, 0, 255), cv2.FILLED)
      cv2.circle(img, (x, y), 11, (0, 0, 255), 2)

    cv2.putText(img, str(int(angle)), (x2 - 40, y2 + 40),
                cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 255), 2)
 
  return angle

In [2]:
def reps_track(self, img, bar=True, draw=True):
  global reps
  direction = 0
  per = np.interp(self.angle, (210, 310), (0,100))
  
  # count number of rep in an exercise
  # example, dumbell curls
  if per == 100 and direction == 0:
    reps += 0.5
    direction = 1
    color = (0,255,0)
  if per == 0 and direction == 1:
    reps += 0.5
    direction = 0
    color = (0,0,255)
    
  # display reps
  cv2.rectangle(img, (0, 450), (250, 720), (0, 255, 0), cv2.FILLED)
  cv2.putText(img, f'Reps:{str(self.reps)}', (45, 650),
                cv2.FONT_HERSHEY_PLAIN, 1.5, (0,0,255), 2) 
  
  if bar: # display the bar, represent action progress
    bar = np.interp(self.angle, (210, 310), (650, 100))
    cv2.rectangle(img, (1100, 100), (1175, 650), color, 3)
    cv2.rectangle(img, (1100, int(bar)), (1175, 650), color, cv2.FILLED)
    cv2.putText(img, f'{int(per)} %', (1100, 75), cv2.FONT_HERSHEY_PLAIN, 4, (0,0,255), 4)

In [3]:
cap = cv.VideoCapture('video_path')
poseDetector = poseDetector() # using pose detector module

while True:
  _, img = cap.read()
  img = cv.resize(img, 1280, 720)
  cv.waitKey(1)
  poseDetector.pose_detect(img, draw=False)
  landmarks = poseDetector.landmark_detect(img, draw=False)

  if landmarks: # not None
    poseDetector.angle_detect(img, 20, 24, 14)
    reps = 0
    reps_track(self, img)  

NameError: ignored

####Project #4 - AI Virtual Painter

In [None]:
# update hand tracking module
self.finger_tips = [4, 8, 12, 16, 20] # define tips of all fingers

def finger_up(self, img):
  fingers_up = []  # store all fingers status
  for i in range(5):
      # check if Thumb on the right
    if i == 0 and self.landmarks[self.finger_tips[0]][1] > self.landmarks[self.finger_tips[0] - 1][1]:
      fingers.append(True)
    elif self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
      fingers.append(True)
    else: # finger is not up
      fingers.append(False)

    return fingers_up

In [None]:
# using canvas to create painting board
folder_path = r'folder_path' # contain 4 images of 3 brushes and one eraser
img_paths = os.listdir(folderpath)
img_list = []
for img_path in imgg_paths:
  img = cv.read(f'{folder_path}/{img_path}')
  img_list.append(img)

In [None]:
cap = cv.CaptureVideo(0)
camera_width, camera_height = 640, 480 # define camera size
# set camera size
cap.set(3, camera_width)
cap.set(4, camera_height)
previous_time = 0


header = img_list[0] # the default img
handDetector = handDetector(0.85) # change confidence level
brush_thicknesss = 2
canvas = np.zeros(shape=(640,480,3), dtype=np.uint8) # the drawing image

while True:
  _, img = cap.read()
  
  img = cv.flip(img, 1) # flip image 
  
  landmarks = handDetector.landmarks_detect(img, draw=False)
  if landmarks: # if hand is detected
    x1, y1 = landmarks[8][1:] # tip of index finger
    x2, y2 = landmarks[12][1:] # tip of middle finger
    
    fingers_up = handDetector.finger_up(img) 

    # selection mode
    if y1 < 125:
      if fingers_up[1] and fingers_up[2]: # both index & middle are up 
        prev_x, prev_y = 0, 0 # default location of hand
        if 250 < x1 < 450: # select purple brush
          header = img_list[1] 
          draw_color = (255,0,255)
        elif 800 < x1 < 950: # select green brush
          header = img_list[2]  
          draw_color = (0,0,255)
        elif 800 < x1 < 950: # select thrid brush
          header = img_list[3]
          draw_color = (255,0,0)
        elif 1050 < x1 < 1200: # select eraser 
          header = img_list[4]
          draw_color = (0,0,0)
          eraser_thickness = 4

    if fingers_up[1] and not fingers_up[2]: # only index is up -> drawing mode

      if (prev_x and prev_y) == 0
        prev_x, prev_y = x1, y1

      if draw_color == (0,0,0): # eraser selected
        cv.circle(img,  6, (x1, y1), draw_color, -1) # a circle on index finger
        cv.line(canvas, (prev_x, prev_y), (x1, y1), draw_color, eraser_thickness)
      else:
        cv.circle(img,  6, (x1, y1), draw_color, -1) # a circle on index finger
        cv.line(canvas, (prev_x, prev_y), (x1, y1), draw_color, brush_thickness)

      prev_x, prev_y = x1, y1 # set previous to current point

    gray_canvas = cv.cvtColor(canvas, cv.COLOR_BGR2GRAY) # convert color to gray
    _, thresh = cv.threshold(gray_canvas, cv.THRESH_BINARY) # turn canvas to balck & white image
    thresh = cv.cvtColor(thresh, cv.COLOR_GRAY2BGR) # convert to bgr color
    img = cv.bitwise_and(img, thresh) # extract color appear in BOTH 
    img = cv.bitwise_or(img, canvas) # extract color appear in EITHER 

    img[0:125, 0:1280] = header # set the header image on-top
    cv.imshow('Image', img)
    cv.imshow('Canvas', canvas)
    
  cv.waitKey(1)


#### Project #5 - AI Virtual Mouse
description: simply simulate mouse behavior by hand with previous built-in hand tracking module

In [None]:
!pip install autopy

In [None]:
import autopy

In [None]:
cap = cv.CaptureVideo(0)
camera_width, camera_height = 640, 480 # define camera size
screen_width, screen_height = autopy.screen.size() # size of screen
reduction = 100 # reduce frame to track mouse movement
smooth_val = 7 # smoothing value to slow

# set camera size
cap.set(3, camera_width)
cap.set(4, camera_height)
previous_time = 0
# using hand tracking module
handDetector = handDetector()
prev_x, prev_y = 0, 0
cur_x, cur_y = 0, 0

while True:
  _, img = cap.read()
  handDetector.hand_detect(img, draw=False)
  landmarks = handDetector.landmarks_detect(img, draw=False)

  if landmarks:
    x1, y1 = landmarks[8][1:] # index finger
    x2, y2 = landmarks[12][1:] # middle finger
    fingers_up - handDetector.finger_up() # list of raising fingers

    if fingers_up[1] and fingers_up[2]: # clicking mode
      # check distance with class function
      distance, _ , locations = find_distance(img, 8, 12, draw=False)
      midpoint = locations[4:]

      if distance < 40: # small distance
        # cv.circle(img, (x1, y1), 10, (0,0,255), -1)
        autopy.mouse.click() # click



    if fingers_up[1]: # moving mode: only index finger is up
      # w, h of mouse tracking area
      track_w, track_h = camera_width - reduction, camera_height - reduction

      # draw a rectangle around the mouse tracking area
      cv.rectangle(img, (reduction, reduction), (track_w, track_h), (0,0,255), 2)
      cv.circle(img, (x1, y1), 10, (0,0,255), -1)

      # convert coordinates to screen size
      cx = np.interp(x1, [0, track_w], [0, screen_width])
      cy = np.interp(y1, [0, track_h], [0, screen_height])
      
      cur_x = prev_x + (cx - prev_x) / smooth_val
      cur_y = prev_y + (cy - prev_y) / smooth_val

      # move the mouse
      autopy.mouse.move(screen_width-cur_x, cur_y) # flip it, when we move to the right it go to the right 

      # update previous location of x, y
      prev_x, prev_y = cur_x, cur_y

  # frame rate
  current_time = time.time()
  fps = 1/(current_time-previous_time)
  previous_time = current_time
  # display fps
  cv.putText(img, f'FPS: {str(int(fps))}', org=(10, 50), fontFace=cv.FONT_HERSHEY_COMPLEX, 
             fontScale=1.2, color=(255,0,0), thickness=2)
  cv.waitKey(1)

  cv.waitKey(1)
