In [1]:
# imports 
import cv2
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import ipywidgets as widgets
import mediapipe as mp
import tensorflow as tf
from mediapipe import solutions
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2
from fastai.vision.all import show_image
from ipywidgets import interact, interactive, fixed, interact_manual
import import_ipynb
from landmarks import *

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
# mp_holistic = mp.solutions.holistic

importing Jupyter notebook from landmarks.ipynb


In [2]:
# mediapipe hand detector (make sure hand_landmarker.task is in folder)
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options, min_hand_detection_confidence=.01, num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

def convert_to_mediapipe(path):
    
    ### initialize ###
    # open video 
    vid = cv2.VideoCapture(path)
    assert vid.isOpened(), 'Make sure that the video is in a format accepted by c2v.VideoCapture()' 
    
    # load first frame 
    boo, image = vid.read()
    assert boo, 'Unable to load first frame of video'
    
    ### loop over frames and apply mediapipe ### 
    count = 1
    images, landmarks = [], []
    while boo:
        
        # convert image to mediapipe
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
        
        # use mediapipe to finc image
        result = detector.detect(mp_image)
        
        # cv2.imwrite("image"+str(count)+".jpeg", image)
        # add to lists 
        images += [image ]
        landmarks += [ result ]
        
        # update 
        boo, image = vid.read()
        count += 1
        
    return images, landmarks 
    

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
# Labeling

# Encode
E = {" ":0, "!":1, "#":2, "$":3, "%":4, "&":5, "'":6, "(":7, ")":8, "*":9, "+":10, ",":11, "-":12, ".":13, 
          "/":14, "0":15, "1":16, "2":17, "3":18, "4":19, "5":20, "6":21, "7":22, "8":23, "9":24, ":":25, ";":26,
          "=":27, "?":28, "@":29, "[":30, "_":31, "a":32, "b":33, "c":34, "d":35, "e":36, "f":37, "g":38, "h":39,
          "i":40, "j":41, "k":42, "l":43, "m":44, "n":45, "o":46, "p":47, "q":48, "r":49, "s":50, "t":51, "u":52, 
          "v":53, "w":54, "x":55, "y":56, "z":57, "~":58}
# Decode
D = {j:i for i,j in E.items()}

# Encode - converts letter to number
def Encode(x):
    return E[x]

# Decode - converts number to letter
def Decode(x):
    return D[x]


# TODO: current only has right hand
def video_labels(self):
     
    # load prediction model
    M = tf.keras.models.load_model('alphabetmodel.h5')
    X = tf.keras.Sequential([ M, tf.keras.layers.Softmax() ])
   
    L = []
    for frame in self.landmarks:
        if frame.hand_landmarks:
            
            # create the landmark
            x = landmark_pb2.LandmarkList()
            for v in frame.hand_landmarks[0]:
                x.landmark.add( x=v.x, y=v.y, z=v.z ) 
            
            # center the landmark
            x = center(x)
            
            # extract the data 
            data = np.array([ [ Landmark_vector(x,i) for i in range(1,21) ] ])
            #print(data.shape)
            p = X.predict(data, verbose=0)
    
            # prediction (encoded as an integer)
            pred = np.argmax(p)
    
            # answer
            ans = Decode(pred)
            
            # add to L
            L += [ans]
        else:
            L += ['?']  
    self.labels = L
    
    return self.labels

In [4]:
#######################
##### Video class #####
#######################
                
### Video file Class
class video_file:           
    def __init__(self, path):
        
        # initialize
        frames, landmarkers = convert_to_mediapipe(path)
        
        self.path = path
        self.frames = frames 
        self.landmarks = landmarkers
        self.total_frames = len(frames)
        self.landmark_percentage = 0 if self.total_frames == 0 else len([ x for x in self.landmarks if x.hand_landmarks]) / self.total_frames
        video_labels(self) # turn this off if you do not want to automatically label video

    def __repr__(self): 
            a = " path: {0}\n Number of frames: {1}\n Percentage of frames with landmarks: {2}".format( self.path,
                                                                                                        self.total_frames, 
                                                                                                        self.landmark_percentage ) 

            return a 


In [5]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
    hand_landmarks_list = detection_result.hand_landmarks
    handedness_list = detection_result.handedness
    annotated_image = np.copy(rgb_image)

    # Loop through the detected hands to visualize.
    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx]
        handedness = handedness_list[idx]

        # Draw the hand landmarks.
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
        landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks])
        solutions.drawing_utils.draw_landmarks(
                                                annotated_image,
                                                hand_landmarks_proto,
                                                solutions.hands.HAND_CONNECTIONS,
                                                solutions.drawing_styles.get_default_hand_landmarks_style(),
                                                solutions.drawing_styles.get_default_hand_connections_style())

        # Get the top left corner of the detected hand's bounding box.
        height, width, _ = annotated_image.shape
        x_coordinates = [landmark.x for landmark in hand_landmarks]
        y_coordinates = [landmark.y for landmark in hand_landmarks]
        text_x = int(min(x_coordinates) * width)
        text_y = int(min(y_coordinates) * height) - MARGIN

        # Draw handedness (left or right hand) on the image.
        cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

    return annotated_image
  

In [6]:
def show_annotated_video(self):
    
    data = self.frames
    land = self.landmarks
    labels = self.labels
    n = len(data)
    
    # show function
    def show_frame(i):
        
        # annotated image and show
        annotated_image = draw_landmarks_on_image(data[i], land[i])
        show_image(annotated_image, figsize=(6,6), title=f'Frame: {i} of {len(data)}     Prediction: {labels[i]}')
    
    return show_frame

In [7]:
#path = 'IMG_0937.MOV'
#V = video_file(path)
print(V)

2023-08-31 11:01:24.836900: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [9]:
#f = show_annotated_video(V)
#interact(f, i=widgets.IntSlider(min=0, max=len(V.frames)-1, step=1, value=0, layout=widgets.Layout(width='1000px')))

interactive(children=(IntSlider(value=0, description='i', layout=Layout(width='1000px'), max=587), Output()), …

<function __main__.show_annotated_video.<locals>.show_frame(i)>

In [8]:
# model_path = '/Users/benjaminbreen/Desktop/test_hand.jpg'
# model_path = 

# Load the input image from an image file.
# mp_image = mp.Image.create_from_file(model_path)

# create img
# img = cv2.imread(model_path)

# STEP 2: Create an HandLandmarker object.
#base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
#options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
#detector = vision.HandLandmarker.create_from_options(options)

# STEP 3: Load the input image.
# image = mp.Image.create_from_file(model_path)
#mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)

# STEP 4: Detect hand landmarks from the input image.
#detection_result = detector.detect(mp_image)