Author: Kyle Herbruger
Date: 10/17/2023

Most of the hand tracking code comes from: https://www.section.io/engineering-education/creating-a-hand-tracking-module/

This program tracks the users on camera hands, and saves the data to two .csv files.
Will write hand location as 0 in the case of no hand being detected to keep time scale accurate.

In [1]:
import cv2
import mediapipe as mp
import numpy as np
import sys
import matplotlib.pyplot as plt
import time

# Used to convert protobuf message to a dictionary. 
from google.protobuf.json_format import MessageToDict

In [2]:
class handTracker():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5,modelComplexity=1,trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.modelComplex = modelComplexity
        self.trackCon = trackCon
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,self.modelComplex,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils    
        
    def handsFinder(self,image,draw=True):
        imageRGB = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imageRGB)

        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                
                if draw:
                    self.mpDraw.draw_landmarks(image, handLms, self.mpHands.HAND_CONNECTIONS)
        return image
    
    def positionFinder(self,image, handNo=0, draw=True):
        lmlist = []
        if self.results.multi_hand_landmarks:
            Hand = self.results.multi_hand_landmarks[handNo]
#            experiment to get offset for hands lmlist.append(int(image.shape[0])*self.results.multi_hand_landmarks)
            for id, lm in enumerate(Hand.landmark):
                h,w,c = image.shape
                cx,cy,cz = int(lm.x*1920.0), int(lm.y*1080.0), int(lm.z*1000.0)
                lmlist.append([id,cx,cy,cz])
            # Draws out pink circle on pinky tip
            if draw:
                cv2.circle(image,(cx,cy), 15 , (255,0,255), cv2.FILLED)

        return lmlist

In [3]:
def main(song, side):
    fileLoc = f'./TriCam/{song.upper()}/{song}'
    #cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture(f'./TriCam/{song.upper()}/{song}_cut_{side}.mp4')
    fps = cap.get(cv2.CAP_PROP_FPS)
    numFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    tracker = handTracker()
    print(f'{song}_{side}: {int((numFrames/fps)/60)}:{int((numFrames/fps)%60)} ({fps} fps)')
    
    timer = int(numFrames) - 5 # Roughly ow many frames there are to check in the video
    t = timer
    lmListLeft_CSV = []
    lmListRight_CSV = []
    
    # Tracks frame skipped, done, and gone through
    frame_tracker = np.ones(3)
    # 0: frames loaded
    # 1: frames scanned
    # 2: frames skipped
    frame_tracker_status_update = np.ones(3)
    
    # Clock to show user on time progress/left/taken
    clock_start = time.time()
    time_ran = 0
    time_left_guess = 0
    # loop to repeatedly scan webcam and process video
    while (t > 0):
        success,image = cap.read()
        if not success:
            print(t)
        #cv2.imshow("frame", image)
        image = cv2.flip(image, 1)
        image = tracker.handsFinder(image)
        # Original imp: lmListLeft = tracker.positionFinder(image,0)
        frame_tracker[0] += 1
        # Check if hand detected at all
        if tracker.results.multi_handedness:
            frame_tracker[1] += 1
            if len(tracker.results.multi_handedness) == 2: # Check if both hands detected
                lmListLeft_CSV.append(tracker.positionFinder(image,0))
                lmListRight_CSV.append(tracker.positionFinder(image,1))
#                 print('LR')
            else: # Determine which hand is detected
                for i in tracker.results.multi_handedness:
                    label = MessageToDict(i)['classification'][0]['label'] 
                    if label == 'Left':  # Left hand 
                        lmListLeft_CSV.append(tracker.positionFinder(image,0))
                        lmListRight_CSV.append(list(np.zeros(21)))
                        
                    if label == 'Right': # Right hand
                        lmListRight_CSV.append(tracker.positionFinder(image,0))
                        lmListLeft_CSV.append(list(np.zeros(21)))
        else: 
            frame_tracker[2] += 1
            lmListLeft_CSV.append(list(np.zeros(21)))
            lmListRight_CSV.append(list(np.zeros(21)))
            
        # End of handedness detection    
        #cv2.imshow("Video",image)
        cv2.waitKey(1)
        t = t - 1
        if (t % 500 == 0):
            # Updates user on time taken, progress, and eta for completion.
            time_ran = time.time() - clock_start
            perc_done = ((timer - t)*100)/timer
            time_left_guess = time_ran/perc_done * (100 - perc_done)
            
            print(f'   Time: {int(time_ran/60)}:{int(time_ran%60)} ({perc_done:.2f}%)')
            print(f'    ETA: {int(time_left_guess/60)}:{int(time_left_guess%60)}')
    
    cap.release()
    cv2.destroyAllWindows()
    print(f'   Took {int((time.time() - clock_start)/60)}:{int(time.time() - clock_start)%60}.')

    npArrLeft = np.zeros((max(len(lmListLeft_CSV), timer) + 1, 21, 4))
    npArrRight = np.zeros((max(len(lmListRight_CSV), timer) + 1, 21, 4))
    
    for ii in range(len(lmListLeft_CSV)):
        for ib in range(len(lmListLeft_CSV[ii])):
            npArrLeft[ii, ib] = np.array(lmListLeft_CSV[ii][ib])
    for ii in range(len(lmListRight_CSV)):
        for ib in range(len(lmListRight_CSV[ii])):
            npArrRight[ii, ib] = np.array(lmListRight_CSV[ii][ib])
            
    npArrLeft = npArrLeft.astype(int)
    npArrRight = npArrRight.astype(int)
    
    np.savetxt('handIDNum.csv', npArrLeft[:,:,0], delimiter=',')
    np.savetxt(f'{fileLoc}_{side}_LX.csv', npArrLeft[:,:,1], delimiter=',', fmt='%d')
    np.savetxt(f'{fileLoc}_{side}_LY.csv', npArrLeft[:,:,2], delimiter=',', fmt='%d')
    np.savetxt(f'{fileLoc}_{side}_LZ.csv', npArrLeft[:,:,3], delimiter=',', fmt='%d')

    np.savetxt(f'{fileLoc}_{side}_RX.csv', npArrRight[:,:,1], delimiter=',', fmt='%d')
    np.savetxt(f'{fileLoc}_{side}_RY.csv', npArrRight[:,:,2], delimiter=',', fmt='%d')
    np.savetxt(f'{fileLoc}_{side}_RZ.csv', npArrRight[:,:,3], delimiter=',', fmt='%d')

In [4]:
if __name__ == "__main__":
    songs = ['wu2s','drwn','strfl']
    sides = ['left','right','center']
    time_top_start = time.time()
    for song in songs:
        for side in sides:
            top_proc_start = time.time()
            main(song, side)
            proc_time = time.time() - top_proc_start
            print(f'Processed {song}_{side}. Took {int(proc_time/60)}:{int(proc_time%60)}')
    time_top_start = time_top_start - time.time()
    print(f'Processed {song}_{side}. Took {int(time_top_start/60)}:{int(time_top_start%60)}')
    print("Done XD")

wu2s_left: 3:28 (30.0 fps)
   Time: 0:21 (4.09%)
    ETA: 8:28
   Time: 1:0 (12.08%)
    ETA: 7:16
   Time: 1:38 (20.08%)
    ETA: 6:32
   Time: 2:16 (28.07%)
    ETA: 5:50
   Time: 3:0 (36.06%)
    ETA: 5:20
   Time: 3:39 (44.05%)
    ETA: 4:38
   Time: 4:19 (52.05%)
    ETA: 3:58
   Time: 4:59 (60.04%)
    ETA: 3:19
   Time: 5:38 (68.03%)
    ETA: 2:39
   Time: 6:19 (76.02%)
    ETA: 1:59
   Time: 6:59 (84.02%)
    ETA: 1:19
   Time: 7:39 (92.01%)
    ETA: 0:39
   Time: 8:20 (100.00%)
    ETA: 0:0
   Took 8:20.
Processed wu2s_left. Took -8:38
wu2s_right: 3:28 (30.0 fps)
   Time: 0:21 (4.09%)
    ETA: 8:33
   Time: 1:2 (12.08%)
    ETA: 7:33
   Time: 1:41 (20.08%)
    ETA: 6:42
   Time: 2:19 (28.07%)
    ETA: 5:58
   Time: 2:58 (36.06%)
    ETA: 5:16
   Time: 3:37 (44.05%)
    ETA: 4:36
   Time: 4:21 (52.05%)
    ETA: 4:1
   Time: 5:12 (60.04%)
    ETA: 3:28
   Time: 5:55 (68.03%)
    ETA: 2:46
   Time: 6:35 (76.02%)
    ETA: 2:4
   Time: 7:14 (84.02%)
    ETA: 1:22
   Time: 7:54 (92.