# Using Media Pipe to detect hands and create models

In [1]:
import mediapipe as mp
import cv2
import numpy as np
import uuid
import os
import csv
import time
import uuid
%matplotlib inline

## Using drawing_utils and solutions.hands for detecting and outlining hands

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [3]:
OUTPUT_DATA_DIR = 'data'
KEYPOINTS_FNAME = 'keypoints'
DIRECTION = {
    'up': 0,
    'down': 1,
    'back': 2,
    'forward': 3,
    'left': 4,
    'right': 5
}

## Defining Keypoints

Keypoints is used to store Mediapipe hand landmarks temporary before storing it in a keypoints.csv file

![Landmarks by media pipe](https://google.github.io/mediapipe/images/mobile/hand_landmarks.png)

> “Hands,” mediapipe. https://google.github.io/mediapipe/solutions/hands.html.


Keypoints store data as [sample1, sample2, sample3,..., sampleN]<br />
where<br />
```
sample1 = [label, x0, y0, x1, y1, ..., x21, y21] 
```

In [4]:
keypoints = []

In [5]:
# landmark = results.multi_hand_landmarks[0]
def extract_keypoints(landmark, direction):
    if not landmark: return
    temp_list = [DIRECTION[direction]]
    for data_point in enumerate(landmark[0].landmark):
        temp_list.extend(
            [data_point[1].x, data_point[1].y]
        )
    
    keypoints.append(temp_list)


## Creating hand connection using add_hands_connection()

### <p>Mediapipe works with RGB not BGR <br />

```
  frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
```
</p>

### Writeable = False
<p>Turn off writeable so image is passed as reference into hands.process so it can be processed faster</p>


In [6]:
def add_hands_connection(frame, hands):
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
        for num, hand in enumerate(results.multi_hand_landmarks):
            mp_drawing.draw_landmarks(
                image, hand, 
                mp_hands.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(121,22,78), thickness=2, circle_radius=4),
                mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2),
            )
    return image, results

In [7]:
def write_text_on_video(frame, index, number_img, direction):
    index += 1
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(frame,
               f"{direction}: {index}/{number_img}",
               (50, 50),
                font, 1, 
                (255, 255, 255),
                2,
                cv2.LINE_4)

## Using solutions.hands.Hands

### min_detection_confidence 
> threshold for initial hand detection to successful 0.8 means 80% chance of succefully detecting hand
### min_tracking_confidence 
> threshold to track detected hands




In [8]:
def creating_keypoint_data(number_img=10):
    cap = cv2.VideoCapture(0)
    for key in DIRECTION:
        capturing_image_idx = 0
        print(f"Capturing for {key}")
        
        last_recorded_time = time.time()
        with mp_hands.Hands(min_detection_confidence=0.9, min_tracking_confidence=0.7) as hands:
                while cap.isOpened():  
                    if (capturing_image_idx >= number_img): break
                    ret, frame = cap.read()
                    image, results = add_hands_connection(frame, hands)
                    image = cv2.flip(image, 1)
                    write_text_on_video(image, 
                                    capturing_image_idx, 
                                    number_img, 
                                    key)
                    cv2.imshow("Hand tacking", image)
                    curr_time = time.time()
                    if (curr_time - last_recorded_time) >= 3:
                        capturing_image_idx += 1
                        print(f" \
                              \tcapturing image ->>>> \
                              {capturing_image_idx} of {key}\n")
                        landmark_result = results.multi_hand_landmarks
                        extract_keypoints(landmark_result, key) 
                        last_recorded_time = curr_time
                        
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                            break

    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
            

In [9]:
creating_keypoint_data(20)

Capturing for up


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


                               	capturing image ->>>>                               1 of up

Capturing for down
Capturing for back
Capturing for forward
Capturing for left
Capturing for right


In [24]:
unique_id = uuid.uuid4().hex

In [25]:
if not os.path.isdir(OUTPUT_DATA_DIR):
    os.mkdir(OUTPUT_DATA_DIR)
    
path_to_csv = os.path.join(OUTPUT_DATA_DIR, f"{KEYPOINTS_FNAME}_{unique_id}.csv")    
with open(path_to_csv, 'w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerows(keypoints)

In [1]:
import os
os.rename(path_to_csv, os.path.join(OUTPUT_DATA_DIR, f"{KEYPOINTS_FNAME".csv}))

NameError: name 'path_to_csv' is not defined