In [7]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
import pandas as pd
import os

DEMO_LENGTH = 10000

In [None]:
#test script for grabbing webcam frames

cap = cv2.VideoCapture(0)

for i in range(0, 100):
	res, frame = cap.read()
	if res:
		cv2.imshow("frame", frame)
		cv2.waitKey(1)
	else:
		print("Couldn't grab frame\n")

cv2.destroyAllWindows()
cap.release()

In [8]:
# Setup Google Hands API and initialize the video stream

# initialize video stream 
cap = cv2.VideoCapture(0) #Note, you need a webcam to be plugged in

# setup google hands api
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.75, min_tracking_confidence=0.75)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [9]:
# Helper function for drawing the landmarks on the screen

def drawHands(results, image):
	image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
	if results.multi_hand_landmarks:
		for hand_landmarks in results.multi_hand_landmarks:
			mp_drawing.draw_landmarks(
				image,
				hand_landmarks,
				mp_hands.HAND_CONNECTIONS,
				mp_drawing_styles.get_default_hand_landmarks_style(),
				mp_drawing_styles.get_default_hand_connections_style())

		retval = 0
	else:
		retval = -1

	return retval, image

In [11]:
# Demo of capturing landmarks
landmark_list = []

for i in range(0, DEMO_LENGTH):
	res, frame = cap.read()
	if not res:
		print("Couldn't grab frame, continuing to next iteration")
	else:
		# Convert to RGB (opencv is natively BGR)
		frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
		# Get hand landmarks 
		results = hands.process(frame)
		# Show landmarks superimposed on hand
		stat, image = drawHands(results, frame)
		if stat != 0:
			#print("Didn't detect hand")
			continue
		else:
			landmark_list.append(results)
			cv2.imshow("Landmarks", cv2.flip(image, 1))
			if cv2.waitKey(5) & 0xFF == 27:
				# Exit demo
				break
#cap.release()
cv2.destroyAllWindows()

In [None]:
# Exploring results variable

print(landmark_list[0].multi_hand_landmarks)
print(len(landmark_list))

In [27]:
# Function to create tabular data given landmarks

def tabulate_training(class_name, landmark_list):
	output = pd.DataFrame()
	for res in landmark_list:
		for lms in res.multi_hand_landmarks:
			map = {"class" : class_name}
			for id, lm in enumerate(lms.landmark):
				map.update({f"x{id}": lm.x, f"y{id}": lm.y, f"z{id}": lm.z})
			output = output.append(map, ignore_index=True)
	return output

In [95]:
# Test data tabulation

df = tabulate_training("stop")
df.keys()
df.head()

Unnamed: 0,class,x0,y0,z0,x1,y1,z1,x2,y2,z2,...,z17,x18,y18,z18,x19,y19,z19,x20,y20,z20
0,stop,0.25226,0.741342,0.0,0.333063,0.721361,-0.038322,0.393355,0.652485,-0.070456,...,-0.124288,0.15372,0.514585,-0.167335,0.144373,0.453468,-0.186584,0.139733,0.397697,-0.199893
1,stop,0.254086,0.740715,0.0,0.339084,0.700779,-0.028755,0.402925,0.624416,-0.051615,...,-0.095957,0.145521,0.470683,-0.129994,0.129839,0.408852,-0.147285,0.121764,0.350553,-0.159227
2,stop,0.259596,0.734767,0.0,0.34505,0.688579,-0.027602,0.410532,0.606795,-0.047735,...,-0.090573,0.143592,0.443679,-0.123582,0.125729,0.383944,-0.140936,0.115205,0.327756,-0.15264
3,stop,0.259695,0.720178,0.0,0.348577,0.677222,-0.032187,0.415211,0.593063,-0.053567,...,-0.092355,0.140997,0.421667,-0.127465,0.121394,0.361382,-0.14622,0.108424,0.304768,-0.158666
4,stop,0.272157,0.710538,0.0,0.359213,0.666697,-0.035065,0.424839,0.579234,-0.058048,...,-0.096894,0.143589,0.40769,-0.133802,0.122728,0.346034,-0.154512,0.10851,0.288053,-0.168556


In [13]:
# Capture training data for a particular class
def capture_training_data():
	landmark_list = []
	for i in range(0, DEMO_LENGTH):
		res, frame = cap.read()
		if not res:
			print("Couldn't grab frame, continuing to next iteration")
		else:
			# Convert to RGB (opencv is natively BGR)
			frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
			# Get hand landmarks 
			results = hands.process(frame)
			# Show landmarks superimposed on hand
			stat, image = drawHands(results, frame)
			if stat != 0:
				#print("Didn't detect hand")
				continue
			else:
				landmark_list.append(results)
				cv2.imshow("Landmarks", cv2.flip(image, 1))
				if cv2.waitKey(5) & 0xFF == 27:
					# Exit demo
					break
	cv2.destroyAllWindows()
	return landmark_list

In [28]:
# Collect training data for stop command
stop_landmark_list = capture_training_data()
stp_df = tabulate_training("stop", stop_landmark_list)
stp_df.head()
stp_df.shape[0]

In [31]:
# Collect training data for scroll down
scroll_down_landmark_list = capture_training_data()
scr_dwn_df = tabulate_training("scroll_down", scroll_down_landmark_list)
scr_dwn_df.head()
scr_dwn_df.shape[0]

In [34]:
# Collect training data for scroll up
scroll_up_landmark_list = capture_training_data()
scr_up_df = tabulate_training("scroll_up", scroll_up_landmark_list)
scr_up_df.head()
scr_up_df.shape[0]

In [37]:
# Collect training data for window right
wnd_right_landmark_list = capture_training_data()
wnd_right_df = tabulate_training("window_right", wnd_right_landmark_list)
wnd_right_df.head()
wnd_right_df.shape[0]

In [40]:
# Collect training data for turn off pc
power_off_landmark_list = capture_training_data()
power_off_df = tabulate_training("power_off", power_off_landmark_list)
power_off_df.head()
power_off_df.shape[0]

In [57]:
# Done grabbing training data, clean up capture

cap.release()

In [54]:
# Stich dataframes together and write to csv

overall_df = pd.DataFrame()
overall_df = overall_df.append(stp_df, ignore_index=True)
overall_df = overall_df.append(scr_dwn_df, ignore_index=True)
overall_df = overall_df.append(scr_up_df, ignore_index=True)
overall_df = overall_df.append(wnd_right_df, ignore_index=True)
overall_df = overall_df.append(power_off_df, ignore_index=True)

In [56]:
# Check overall dataframe properties

overall_df.shape[0]
overall_df.head()
overall_df.tail()

# Sanity check
print((stp_df.shape[0] + scr_dwn_df.shape[0] + scr_up_df.shape[0] + wnd_right_df.shape[0] + power_off_df.shape[0]) == overall_df.shape[0])

True


In [58]:
# Write training data to csv
overall_df.to_csv("../data/training.csv")