In [1]:
# import gdown
import zipfile
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import os
import matplotlib.pyplot as plt
import numpy as np
import pickle
import xml.etree.ElementTree as ET
import torch
from torchvision.transforms import v2
import albumentations as A
from torch.utils.data import DataLoader, Dataset
from albumentations.pytorch import ToTensorV2
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import mediapipe as mp
from ldmodel import EfficientNetLandMark
# from torchsummary import summary

In [2]:
class Filter():
    def __init__(self, landmark_model, transform):
        self.modelFile = "res10_300x300_ssd_iter_140000.caffemodel"
        self.configFile = "deploy.prototxt"
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.landmark_detect = landmark_model
        self.transform = transform
        self.face_detect = cv2.dnn.readNetFromCaffe(self.configFile, self.modelFile)
        self.landmark_detect.eval()
        

    def landmarks_detect(self, image):
        height, width, channels = image.shape
        
        image = self.transform(image=image)['image']
        image = image.to(self.device)
        in_height, in_width, in_channel = image.shape
        image = image.unsqueeze(0)
        landmarks = self.landmark_detect(image)
        landmarks = landmarks.reshape(68, 2)
        
        # convert to 0-224 axis
        landmarks = (landmarks + 0.5) * in_width
        # convert to original axis
        landmarks[:, 0] = landmarks[:, 0] / in_width * width
        landmarks[:, 1] = landmarks[:, 1] / in_width * height
        
        return landmarks
        
        
    def full_landmarks_detect_image(self, image_path, radius=1):
        # mediapipe
        self.face_detect = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=1)

        # Load the image (np.ndarray)
        image = cv2.imread(image_path)

        # Convert BGR to RGB (np.ndarray)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width, channels = image_rgb.shape

        # Detect faces in the image
        faces = self.face_detect.process(image_rgb)

        # Draw rectangles around the detected faces
        for face in faces.detections:
            bbox = face.location_data.relative_bounding_box
            left = int(round(bbox.xmin * width))
            top = int(round(bbox.ymin * height))
            right = int(round((bbox.xmin + bbox.width) * width))
            bottom = int(round((bbox.ymin + bbox.height) * height))
            
            landmarks = self.landmarks_detect(image[top:bottom, left:right])
            landmarks = landmarks.detach().cpu().numpy()
            landmarks = landmarks + [left, top]
            
            cv2.rectangle(image_rgb, (left, top), (right, bottom), (0, 255, 0), 2)
            for x, y in landmarks:
                cv2.circle(image_rgb, (int(round(x)), int(round(y))), radius=radius, color=(0, 255, 0), thickness=-1)

        # Display the image with detected faces
        plt.imshow(image_rgb)
        plt.show()


    def full_landmarks_detect_video(self, radius=1):
        # ResNet
        self.face_detect = cv2.dnn.readNetFromCaffe(self.configFile, self.modelFile)

        # Load the image (np.ndarray)
        cap = cv2.VideoCapture(0)

        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                break

            # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            height, width, channels = image.shape
            
            # Detect face
            blob = cv2.dnn.blobFromImage(image=cv2.resize(image, (300, 300)), scalefactor=1.0, 
                                        size=(300, 300), mean=(104.0, 177.0, 123.0))
            self.face_detect.setInput(blob)
            detections = self.face_detect.forward()

            # Draw rectangles around the detected faces
            for i in range(detections.shape[2]):
                confidence = detections[0, 0, i, 2]
                
                if confidence > 0.5:
                    left, top, right, bottom = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
                    top = max(int(round(top)) + 20, 0)
                    left = max(int(round(left)) - 20, 0)
                    bottom = min(int(round(bottom)), height)
                    right = min(int(round(right)) + 20, width)
                    
                    landmarks = self.landmarks_detect(image[top:bottom, left:right])
                    landmarks = landmarks.detach().cpu().numpy()
                    landmarks = landmarks + [left, top]
                    
                    cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
                    for x, y in landmarks:
                        cv2.circle(image, (int(round(x)), int(round(y))), radius=radius, color=(0, 255, 0), thickness=-1)

            # Display the image with detected faces
            cv2.imshow("Result", image)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()


    def full_landmarks_detect_video2(self, radius=1):
        # mediapipe
        self.face_detect = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.5, model_selection=1)

        # Load the image (np.ndarray)
        cap = cv2.VideoCapture(0)

        while cap.isOpened():
            ret, image = cap.read()
            if not ret:
                break

            # Convert BGR to RGB (np.ndarray)
            image_rgb = image
            height, width, channels = image_rgb.shape

            cv2.imshow("Result", image_rgb)
            
            # Detect faces in the image
            faces = self.face_detect.process(image_rgb)
            if not faces.detections:    
                continue

            # Draw rectangles around the detected faces
            for face in faces.detections:
                bbox = face.location_data.relative_bounding_box
                left = int(round(bbox.xmin * width))
                top = int(round(bbox.ymin * height))
                right = int(round((bbox.xmin + bbox.width) * width))
                bottom = int(round((bbox.ymin + bbox.height) * height))
                
                landmarks = self.landmarks_detect(image[top:bottom, left:right])
                landmarks = landmarks.detach().cpu().numpy()
                landmarks = landmarks + [left, top]
                
                cv2.rectangle(image_rgb, (left, top), (right, bottom), (0, 255, 0), 2)
                for x, y in landmarks:
                    cv2.circle(image_rgb, (int(round(x)), int(round(y))), radius=radius, color=(0, 255, 0), thickness=-1)

            # Display the image with detected faces
            cv2.imshow("Result", image_rgb)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

In [3]:
transform_pred = A.Compose([
    A.Resize(height=224, width=224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [4]:
numOfPoints = 68
model = EfficientNetLandMark(numOfPoints)
model.load_state_dict(torch.load('best_model.pth', map_location=torch.device('cpu')))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model.to(device)



cuda


EfficientNetLandMark(
  (model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (

In [5]:
filter = Filter(model, transform_pred)

In [None]:
image_path = 'image\\3.jpg'
filter.full_landmarks_detect_image(image_path, 5)

In [6]:
filter.full_landmarks_detect_video(radius=2)