In [22]:
import numpy as np
import cv2
import time
import matplotlib.pyplot as plt
import glob
import mediapipe as mp
from constants import LIPS_POSITIONS, FACE_OVAL,HAND_POSITIONS,HAND_CONNECTIONS
from google.protobuf.json_format import MessageToDict
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks.python.components.containers import NormalizedLandmark

DEFAULT_HAND = np.load("defaultHand.npy")


In [7]:
class Drawing:
    def __init__(self):
        self.mpFace = mp.solutions.face_mesh
        self.mpDrawHands = mp.solutions.drawing_utils # Initializing drawing object for hands
        self.mpDrawFace = mp.solutions.drawing_utils # Initializing drawing object for Face
        self.mp_drawing_styles =mp.solutions.drawing_styles
        self.mp_drawing_face = self.mpDrawFace.DrawingSpec(color=(0,0,200),thickness=0,circle_radius=1) #Initializing drawing specifications for face
        self.mp_drawing_hands = self.mpDrawHands.DrawingSpec(color=(255,0,0),thickness=0,circle_radius=1) #Initializing drawing specifications for hand
        self.mpHands = mp.solutions.hands
    def drawLandmarks(self,img,resultsFace,resultsHands,img_size=(700,720)):
        img=img.copy()
        colors={"Right":(100,100,100),"Left":(0,0,255)}
        if resultsFace is not None:
            for var in resultsFace:
                cv2.circle(img, (int(var[0]*img_size[0]),int(var[1]*img_size[0])), 1, (0, 0, 255), -1)
        for key in resultsHands:
            points={}
            for i,var in enumerate(resultsHands[key]):
                point = (int(var[0]*img_size[0]),int(var[1]*img_size[0]))
                cv2.circle(img, point, 3, colors[key], -1)
                points[i]=point
            for conn in HAND_CONNECTIONS:
                cv2.line(img, points[conn], points[HAND_CONNECTIONS[conn]], (216, 223, 230), 2)
        return img
        

In [13]:
class LandmarkExtractor:
    def __init__(self):
        self.mpHands = mp.solutions.hands # Load mediapipe hands module
        self.mpFace = mp.solutions.face_mesh
        self.hands = self.mpHands.Hands( # Initialize hands model
            max_num_hands=2,
            model_complexity=1,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            static_image_mode=False)
        
         # Load mediapipe face module
        self.faces = self.mpFace.FaceMesh( # Initialize Face model
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            static_image_mode=False)
    def getHandLandmarks(self,hand,scale=False,img_size=(700,720)):
        list_hand_positions=[]
       # print(type(resultsFace.multi_face_landmarks[0]))

        for cord in HAND_POSITIONS:
            x1,y1,z1=self.__getCoordinates(hand,cord,scale,img_size)

            list_hand_positions.append((x1,y1,z1))
        return np.array(list_hand_positions)    
    def findHands(self,img,resultsFace):
        hands={}
        imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # Transform to RGB
        results = self.hands.process(imgRGB) # Feeding image through Hands model
        #print(resultsFace.multi_face_landmarks[0])
        #face = resultsFace.multi_face_landmarks[0]
        if results.multi_handedness!=None:
            for i,hand in enumerate(results.multi_handedness):
                if hand.classification[0].label == "Left":
                    handType="Right"
                else:
                    handType="Left"
                hands[handType]=results.multi_hand_landmarks[i]

        for key in hands:
            hands[key]=self.getHandLandmarks(hands[key])
        return hands # Returning values from model prediction
        
    def findFace(self, img):
        imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # Transform image to RGB
        results = self.faces.process(imgRGB) # Feeding image through Face model
        return results # Returning values from model prediction
        
    
    def __getCoordinates(self,landmarks,index,scale,img_size): 
        x=landmarks.landmark[index].x
        y=landmarks.landmark[index].y
        z=landmarks.landmark[index].z
        if scale:
            x=x*img_size[0]
            y=y*img_size[1]
        return x,y,z  
        
    def getLipsLandmarks(self,resultsFace,scale=False,img_size=(700,720)):
        list_lips_positions=[]
        if resultsFace.multi_face_landmarks:
            landmarkovi=resultsFace.multi_face_landmarks[0]

            for cord in LIPS_POSITIONS:
                x1,y1,z1=self.__getCoordinates(landmarkovi,cord[0],scale,img_size)
                x2,y2,z2=self.__getCoordinates(landmarkovi,cord[1],scale,img_size)

                avg_x=float((x1+x2)/2)
                avg_y=float((y1+y2)/2)

                list_lips_positions.append((avg_x,avg_y,z1))
        return np.array(list_lips_positions)
 
    def getFaceLandmarks(self,resultsFace,scale=False,img_size=(700,720)):
        list_face_positions=[]
       # print(type(resultsFace.multi_face_landmarks[0]))
        if resultsFace.multi_face_landmarks:
            landmarkovi=resultsFace.multi_face_landmarks[0]

            for cord in FACE_OVAL:
                x1,y1,z1=self.__getCoordinates(landmarkovi,cord,scale,img_size)

                list_face_positions.append((x1,y1,z1))
        return np.array(list_face_positions)

In [14]:
class VideoLoader:
    def __init__(self):
        self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.landmark_extractor=LandmarkExtractor()
        self.drawing = Drawing()

    def loadVideo(self,path,output_path=None):
        
        cap = cv2.VideoCapture(path)
        if output_path is not None:
            out = cv2.VideoWriter(output_path,self.fourcc, 15,(700,720))

        use_frame=True
        frames=[]  
        i = 0
        while(True):
            ret, frame = cap.read() #reading frames
            if ret: #if frame exist ret=True, otherwise False
                if use_frame: # this means we will skip every other frame
                    frame=frame[:, 300:1000,:] #cropping image, retainig all 3 rgb channels
                    frames.append(frame)
                    i+=1
                   # print((i*(1000/15))/1000)
                    resultsFace=self.landmark_extractor.findFace(frame) #using function defined above to detect facial landmarks in a frame (findFace)
                    resultsFace=self.landmark_extractor.getFaceLandmarks(resultsFace)
                    resultsHands=self.landmark_extractor.findHands(frame,resultsFace) #using function defined above to detect hand landmarks in a frame (findHnds)
                    if output_path is not None:
                        out.write(self.drawing.drawLandmarks(frame.copy(),resultsFace,resultsHands)) #drawing landmarks on frames by using function defined above (drawLadmarks)
        
                    use_frame=False
                else:
                    use_frame=True
            else:
                break
        if output_path is not None:
            out.release() #close writing stream
        return frames

In [15]:
videoLoader = VideoLoader()

In [16]:
frames=videoLoader.loadVideo("../../ASLens - test data 1/-g45vqccdzI-1-rgb_front.mp4",output_path='novitest41.mp4')

In [None]:
r=15
landmark_extractor=LandmarkExtractor()
resultsFace=landmark_extractor.findFace(frames[r]) #using function defined above to detect facial landmarks in a frame (findFace)
resultsFace=landmark_extractor.getFaceLandmarks(resultsFace)
resultsHands=landmark_extractor.findHands(frames[r],resultsFace)

In [None]:
resultsFace

In [19]:
(resultsFace[:,0]+resultsFace[:,1])/2

NameError: name 'resultsFace' is not defined

In [210]:
tada ={1:3,2:3}

In [212]:
for i in tada:
    print(i)

1
2


In [None]:
mp.