# Importing Libraries

#### interact with the Windows Core Audio APIs for managing audio devices and audio sessions.

In [1]:
pip install pycaw

Note: you may need to restart the kernel to use updated packages.


#### cv2 for OpenCV functions

#### mediapipe for media processing tasks

#### math and time for mathematical operations and time-related functions

#### numpy for numerical operations

#### ctypes and comtypes for working with Windows COM interfaces

#### pycaw for audio endpoint volume control.

In [2]:
import cv2
import mediapipe as mp
import math
import time
import numpy as np
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

This `HandDetector` class utilizes MediaPipe and OpenCV to detect and track hand landmarks in an image, draw landmarks, determine hand orientation (left/right), identify which fingers are up, and calculate distances between specific hand landmarks.

In [3]:

class HandDetector:

    '''

#### 1. What is the function of this code?

This code initializes the `HandDetector` class, which is used for detecting and tracking hands in images or videos.

#### 2. What is the input?

The input parameters are:
- `mode` (default: `False`): Whether to use static mode.
- `maxHands` (default: `2`): Maximum number of hands to detect.
- `detectionCon` (default: `0.5`): Minimum confidence for hand detection.
- `minTrackCon` (default: `0.5`): Minimum confidence for hand tracking.

#### 3. What is the output?

The output is an initialized `HandDetector` object with attributes:
- `mode`, `maxHands`, `detectionCon`, `minTrackCon`: Set to the input values.
- `mpHands`, `hands`, `mpDraw`: MediaPipe modules for hand detection and drawing.
- `tipIds`: Indices of finger tips.
- `fingers`, `lmList`: Empty lists for finger states and landmarks.'''

    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):

        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.minTrackCon = minTrackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.detectionCon, self.minTrackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
        self.fingers = []
        self.lmList = []

        

    '''

1. **Function Purpose**:
   - Detects hands in an image and optionally draws landmarks.

2. **Inputs**:
   - `img`: Image in BGR format.
   - `draw` (bool, default=True): Whether to draw landmarks.

3. **Output**:
   - The input image with landmarks drawn if hands are detected and `draw` is True.'''




    def findHands(self, img, draw=True):

        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
        return img
    
    '''### 1. What is the function of this code?
The `findPosition` function locates hand landmarks in an image, calculates their coordinates, optionally draws them, and finds the hand's bounding box.

### 2. What is the input?
The inputs are:
- `img`: The image with the hand.
- `handNo` (default 0): The hand to process if there are multiple.
- `draw` (default True): Whether to draw the landmarks and bounding box.

### 3. What is the output?
The outputs are:
- `self.lmList`: A list of coordinates for each hand landmark.
- `bboxInfo`: Information about the bounding box, including its coordinates, size, and center.'''

    def findPosition(self, img, handNo=0, draw=True):

        xList = []
        yList = []
        bbox = []
        bboxInfo = []
        self.lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                h, w, c = img.shape
                px, py = int(lm.x * w), int(lm.y * h)
                xList.append(px)
                yList.append(py)
                self.lmList.append([px, py])
                if draw:
                    cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
            xmin, xmax = min(xList), max(xList)
            ymin, ymax = min(yList), max(yList)
            boxW, boxH = xmax - xmin, ymax - ymin
            bbox = xmin, ymin, boxW, boxH
            cx, cy = bbox[0] + (bbox[2] // 2), \
                     bbox[1] + (bbox[3] // 2)
            bboxInfo = {"id": id, "bbox": bbox, "center": (cx, cy)}

            if draw:
                cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20), (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                              (0, 255, 0), 2)

        return self.lmList, bboxInfo
    
    '''

1. **Function**: The `fingersUp` function checks which fingers are raised on a detected hand. It uses finger landmark positions to determine if each finger is extended or not, returning a list of their states.

2. **Input**: The function uses `self.results.multi_hand_landmarks` (detected hand landmarks) and `self.lmList` (specific hand landmark positions), set by the `findHands` and `findPosition` methods.

3. **Output**: The function returns a list called `fingers` with five elements. Each element is 1 (finger raised) or 0 (finger not raised), in the order: thumb, index, middle, ring, and little finger.

- `myHandType = self.handType()`: Checks if the hand is right or left.
- Thumb's state: Checked by x-coordinate of its landmarks.
- Other fingers' states: Checked by y-coordinate of their landmarks.
- Returns the `fingers` list showing which fingers are up.'''

    def fingersUp(self):

        if self.results.multi_hand_landmarks:
            myHandType = self.handType()
            fingers = []
            # Thumb
            if myHandType == "Right":
                if self.lmList[self.tipIds[0]][0] > self.lmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if self.lmList[self.tipIds[0]][0] < self.lmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)

            # 4 Fingers
            for id in range(1, 5):
                if self.lmList[self.tipIds[id]][1] < self.lmList[self.tipIds[id] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers
    
    '''

#### 1. What is the function of this code?

The `findDistance` method calculates the Euclidean distance between two points (landmarks) on a detected hand and optionally draws visual indicators on the input image. This is useful for various hand gesture recognition tasks, such as measuring the distance between fingertips.

#### 2. What is the input?

The inputs to the `findDistance` method are:
- `p1` and `p2`: Indices of the landmarks on the hand whose distance is to be calculated.
- `img`: The image in which the hand landmarks are detected and where the visual indicators (if `draw` is `True`) will be drawn.
- `draw`: A boolean flag indicating whether to draw visual indicators on the image. The default value is `True`.

#### 3. What is the output?

The outputs of the `findDistance` method are:
- `length`: The Euclidean distance between the two specified landmarks.
- `img`: The input image with visual indicators drawn (if `draw` is `True`).
- A list containing the coordinates of the two specified landmarks and the midpoint between them in the format `[x1, y1, x2, y2, cx, cy]`.

### Detailed Steps

1. **Checking for Hand Landmarks**:
   - The method first checks if there are detected hand landmarks (`self.results.multi_hand_landmarks`).

2. **Extracting Coordinates**:
   - It extracts the coordinates `(x1, y1)` and `(x2, y2)` of the two specified landmarks (`p1` and `p2`) from `self.lmList`.

3. **Calculating Midpoint**:
   - It calculates the midpoint `(cx, cy)` between the two landmarks.

4. **Drawing Visual Indicators** (if `draw` is `True`):
   - Circles are drawn at the two landmarks and the midpoint.
   - A line is drawn connecting the two landmarks.

5. **Calculating Distance**:
   - The Euclidean distance between the two landmarks is calculated using the `math.hypot` function.

6. **Returning Results**:
   - The method returns the calculated distance, the image with visual indicators, and the coordinates of the landmarks and their midpoint.'''

    def findDistance(self, p1, p2, img, draw=True):

        if self.results.multi_hand_landmarks:
            x1, y1 = self.lmList[p1][0], self.lmList[p1][1]
            x2, y2 = self.lmList[p2][0], self.lmList[p2][1]
            cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

            if draw:
                cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
                cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
                cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
                cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)

            length = math.hypot(x2 - x1, y2 - y1)
            return length, img, [x1, y1, x2, y2, cx, cy]

            '''### Simplified Explanation

1. **What is the function of this code?**
   - The `handType` method checks if the detected hand is right or left by comparing the x-coordinates of landmarks 5 and 17.

2. **What is the input?**
   - It doesn't take any inputs directly. It uses the internal data (`self.results.multi_hand_landmarks` and `self.lmList`) set by other methods.

3. **What is the output?**
   - It returns either "Right" or "Left" based on the hand detected.'''
        


    def handType(self):

        if self.results.multi_hand_landmarks:
            if self.lmList[17][0] < self.lmList[5][0]:
                return "Right"
            else:
                return "Left"


In [4]:
'''1. What is the function of this code?
This code captures video from the webcam and uses hand detection to control the system volume based on the distance between the thumb and index finger. It tracks the hand in real-time and adjusts the volume accordingly. Additionally, it displays the video feed with visual feedback on the hand detection and volume level.

2. What is the input?
The primary input for this code is the video stream from the webcam (typically the default camera on the computer, indicated by cv2.VideoCapture(0)). The code also takes input from the detected positions of the hand landmarks to determine the distance between the thumb and index finger.

3. What is the output?
The output of this code includes:

Video Display: A real-time video feed is shown in a window titled "hands". This feed includes visual annotations such as the hand landmarks, distance indicators, volume bar, and FPS (frames per second).
Volume Control: The system volume is adjusted based on the distance between the thumb and index finger. This change is also visually represented by a volume bar and percentage on the video feed.
Visual Feedback: Circles and rectangles are drawn on the video to indicate detected hand positions and the volume level bar.'''


if __name__ == "__main__":

    cap = cv2.VideoCapture(0)
    cap.set(3, 1280)
    cap.set(4, 720)
    pTime = 0
    detector = HandDetector(detectionCon=0)
    colorR = (255, 0, 255)
    devices = AudioUtilities.GetSpeakers()
    interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
    volume = cast(interface, POINTER(IAudioEndpointVolume))
    volRange = volume.GetVolumeRange()
    minVol = volRange[0]
    maxVol = volRange[1]
    vol = 0
    volBar = 400
    volPer = 0

    cx, cy, w, h = 100, 100, 200, 200

    while True:
        success, img = cap.read()
        img = cv2.flip(img, 1)
        img = detector.findHands(img)
        lmList, _ = detector.findPosition(img)

        if lmList:
            length, _, _ = detector.findDistance(4, 8, img)
            cursor = lmList[8]

            vol = np.interp(length, [50, 300], [minVol, maxVol])
            volBar = np.interp(length, [50, 300], [400, 150])
            volPer = np.interp(length, [50, 300], [0, 100])
            volume.SetMasterVolumeLevel(vol, None)
            if length < 50:
                cv2.circle(img, (cursor[0], cursor[1]), 15, (0, 255, 0), cv2.FILLED)

            # Draw the volume bar
            cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3)
            cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED)
            cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 3)

        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime

        cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 3)
        cv2.imshow("hands", img)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
            
    cap.release()
    cv2.destroyAllWindows()
