In [1]:
import numpy as np
import cv2
import time
import matplotlib.pyplot as plt
import glob
import mediapipe as mp
from constants import LIPS_POSITIONS
from google.protobuf.json_format import MessageToDict


In [2]:
class Drawing:
    def __init__(self):
        self.mpFace = mp.solutions.face_mesh
        self.mpDrawHands = mp.solutions.drawing_utils # Initializing drawing object for hands
        self.mpDrawFace = mp.solutions.drawing_utils # Initializing drawing object for Face
        self.mp_drawing_styles =mp.solutions.drawing_styles
        self.mp_drawing_face = self.mpDrawFace.DrawingSpec(color=(0,0,200),thickness=0,circle_radius=1) #Initializing drawing specifications for face
        self.mp_drawing_hands = self.mpDrawHands.DrawingSpec(color=(255,0,0),thickness=0,circle_radius=1) #Initializing drawing specifications for hand
        self.mpHands = mp.solutions.hands
    def drawLandmarks(self,img,resultsFace,resultsHands):
        img=img.copy()
        if resultsFace.multi_face_landmarks:
            for face_landmarks in resultsFace.multi_face_landmarks:   
                self.mpDrawFace.draw_landmarks( # Draw face lendmark
                  image=img,
                  landmark_list=face_landmarks,
                  connections=self.mpFace.FACEMESH_CONTOURS,
                  landmark_drawing_spec=self.mp_drawing_face,
                  connection_drawing_spec=self.mp_drawing_styles.get_default_face_mesh_tesselation_style())
            
        if "Left" in resultsHands.keys():
            self.mpDrawHands.draw_landmarks(image=img, # Draw hand landmarks
                                landmark_list=resultsHands["Left"],
                                      
                                connections=self.mpHands.HAND_CONNECTIONS,
                                landmark_drawing_spec=self.mp_drawing_hands)
        return img
        

In [7]:
class LandmarkExtractor:
    def __init__(self):
        self.mpHands = mp.solutions.hands # Load mediapipe hands module
        self.mpFace = mp.solutions.face_mesh
        self.hands = self.mpHands.Hands( # Initialize hands model
            max_num_hands=2,
            model_complexity=1,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            static_image_mode=False)
        
         # Load mediapipe face module
        self.faces = self.mpFace.FaceMesh( # Initialize Face model
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            static_image_mode=False)
        
    def findHands(self,img):
        hands={}
        imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # Transform to RGB
        results = self.hands.process(imgRGB) # Feeding image through Hands model
        if results.multi_handedness!=None:
            classification=[]
            for hand in results.multi_handedness:
                classification.append(MessageToDict(hand)['classification'][0])
            print(classification)
            if len(classification)==1:
                hands[results.multi_hand_landmarks[0].classification[0].label]=results.multi_hand_landmarks[0]
            else:
                hands[results.multi_hand_landmarks[0].classification[0].label]=results.multi_hand_landmarks[0]
                hands[results.multi_hand_landmarks[1].classification[0].label]=results.multi_hand_landmarks[1]

            #for hand in classification:
            #    index=hand['index']
            #    if len(classification)==1 and index>0:
            #        index=index-1
            #    hands[hand['label']]=results.multi_hand_landmarks[index]
        return hands # Returning values from model prediction
        
    def findFace(self, img):
        imgRGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # Transform image to RGB
        results = self.faces.process(imgRGB) # Feeding image through Face model
        return results # Returning values from model prediction
        
    
    def __getCoordinates(self,landmarks,index,scale,img_size): 
        x=landmarks.landmark[index].x
        y=landmarks.landmark[index].y
        z=landmarks.landmark[index].z
        if scale:
            x=x*img_size[0]
            y=y*img_size[1]
        return x,y,z  
        
    def getLipsLandmarks(self,resultsFace,scale=False,img_size=(700,720)):
        list_lips_positions=[]
        for cord in LIPS_POSITIONS:
            landmarkovi=resultsFace.multi_face_landmarks[0]
            x1,y1,z1=self.__getCoordinates(landmarkovi,cord[0],scale,img_size)
            x2,y2,z2=self.__getCoordinates(landmarkovi,cord[1],scale,img_size)
            
            avg_x=float((x1+x2)/2)
            avg_y=float((y1+y2)/2)
            
            list_lips_positions.append((avg_x,avg_y,z1))
        return list_lips_positions
 
    

In [8]:
class VideoLoader:
    def __init__(self):
        self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.landmark_extractor=LandmarkExtractor()
        self.drawing = Drawing()

    def loadVideo(self,path,output_path=None):
        
        cap = cv2.VideoCapture(path)
        if output_path is not None:
            out = cv2.VideoWriter(output_path,self.fourcc, 15,(700,720))

        use_frame=True
        frames=[]  
        i = 0
        while(True):
            ret, frame = cap.read() #reading frames
            if ret: #if frame exist ret=True, otherwise False
                if use_frame: # this means we will skip every other frame
                    frame=frame[:, 300:1000,:] #cropping image, retainig all 3 rgb channels
                    frames.append(frame)
                    i+=1
                    print((i*(1000/15))/1000)
                    resultsFace=self.landmark_extractor.findFace(frame) #using function defined above to detect facial landmarks in a frame (findFace)
                    resultsHands=self.landmark_extractor.findHands(frame) #using function defined above to detect hand landmarks in a frame (findHnds)
                    if output_path is not None:
                        out.write(self.drawing.drawLandmarks(frame.copy(),resultsFace,resultsHands)) #drawing landmarks on frames by using function defined above (drawLadmarks)
        
                    use_frame=False
                else:
                    use_frame=True
            else:
                break
        if output_path is not None:
            out.release() #close writing stream
        return frames

In [9]:
videoLoader = VideoLoader()

In [10]:
videoLoader.loadVideo("../../ASLens - test data 1/-g45vqccdzI-1-rgb_front.mp4",output_path='new11.mp4')

0.06666666666666667
0.13333333333333333
0.2
0.26666666666666666
[{'index': 0, 'score': 0.993051, 'label': 'Left'}]


AttributeError: classification