In [130]:
import dlib
import os
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset

import torchvision.transforms as transforms

from skimage import io
import cv2

In [152]:
path_to_file = 'data/Menpo_68p/test/aflw__face_44574.jpg'

In [153]:
def face_bbox(file_path: str) -> torch.FloatTensor:
    detector = dlib.get_frontal_face_detector()
    img = io.imread(file_path)
    dets, _, _ = detector.run(img, 1, -1)
    try:
        face_bbox = torch.tensor([dets[0].left(), dets[0].top(), dets[0].right(), dets[0].bottom()], dtype=torch.float)
    except IndexError:
        print(f"Following face wasn't recognized {file_path}")
        face_bbox = [[None]*4]
    return face_bbox

In [154]:
face_bbox(path_to_file)

tensor([ 81.,  53., 210., 182.])

In [21]:
path_to_points = 'data/Menpo_68p/test/aflw__face_39844.pts'

In [136]:
def read_keypoint(jpg_path: str) -> torch.FloatTensor:
    #assume .pts file is in the same fir as .jpg
    pts_name = jpg_path[:-4] + '.pts'
    with open(pts_name) as f:
        lines = f.readlines()
        if lines[0].startswith('version'):  # to support different formats
            lines = lines[3:-1]
        mat = np.fromstring(''.join(lines), sep=' ')
        mat_tensor = torch.tensor((mat.reshape((68, 2))), dtype=torch.float)
        visibility = torch.ones([68, 1], dtype=torch.float)
        keypoint = torch.cat((mat_tensor, visibility), dim=1)                    
    return keypoint

In [82]:
read_keypoint(path_to_points).shape

torch.Size([68, 3])

In [144]:
class FaceLandmarksDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, dir_to_jpgs: str, transform=None):
        """
        Arguments:
            dir_to_folder (string): Path to folder with .jpg and .pts files.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = dir_to_jpgs
        self.transform = transform
        self.images = []
        for idx, fname in enumerate(os.listdir(self.root_dir)):
            cur_path = os.path.join(self.root_dir, fname)
            if cur_path.endswith('.jpg'):
                self.images.append(cur_path)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        bbox = face_bbox(img_path) # [x1, y1, x2, y2]
        keypoints = read_keypoint(img_path) # (FloatTensor[K, 3]) format K : [x, y, visibility]
        labels = torch.tensor([1], dtype=torch.int64)
        img = img / 255 # normalize values  
        img_height, img_width, _ = img.shape
        if bbox == [[None]*4]:
            # ловим неопределяшки от dlib -> пустые таргеты
            bbox = torch.empty((0, 4), dtype=torch.float)
            keypoints2d = torch.empty((0, 21, 3), dtype=torch.float)
            labels = torch.tensor([0], dtype=torch.int64)

        if self.transform is not None:
            img = self.transform(img) # to tensor, from shape (H, W, C) -> (C, H, W)
        img = img.to(torch.float)

        target = {
            'path': img_path,
            'boxes': bbox,
            'keypoints': keypoints,
            'labels': labels
        }

        return img, target

    def __len__(self):
        return len(self.images)

In [145]:
path_to_folder = 'data/Menpo_68p/test/'
batch_size = 16

In [146]:
transform = transforms.Compose([transforms.ToTensor()])

trainset = FaceLandmarksDataset(dir_to_jpgs=path_to_folder, transform=transform)

In [147]:
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

In [151]:
train_loader.dataset.__getitem__(0)

(tensor([[[0.1725, 0.1569, 0.1725,  ..., 0.9804, 1.0000, 0.3490],
          [0.1686, 0.1529, 0.1647,  ..., 0.9804, 1.0000, 0.3490],
          [0.1608, 0.1451, 0.1569,  ..., 0.9804, 1.0000, 0.3490],
          ...,
          [0.8863, 0.9961, 0.9882,  ..., 0.9804, 1.0000, 0.3490],
          [0.9490, 0.9922, 0.9490,  ..., 0.9804, 1.0000, 0.3490],
          [1.0000, 0.9647, 0.9255,  ..., 0.9804, 1.0000, 0.3490]],
 
         [[0.0784, 0.0667, 0.0627,  ..., 0.9804, 1.0000, 0.3490],
          [0.0745, 0.0588, 0.0627,  ..., 0.9804, 1.0000, 0.3490],
          [0.0667, 0.0510, 0.0549,  ..., 0.9804, 1.0000, 0.3490],
          ...,
          [0.8353, 0.9569, 0.9529,  ..., 0.9804, 1.0000, 0.3490],
          [0.9059, 0.9569, 0.9216,  ..., 0.9804, 1.0000, 0.3490],
          [0.9686, 0.9373, 0.8941,  ..., 0.9804, 1.0000, 0.3490]],
 
         [[0.0314, 0.0118, 0.0157,  ..., 0.9804, 1.0000, 0.3490],
          [0.0275, 0.0118, 0.0118,  ..., 0.9804, 1.0000, 0.3490],
          [0.0196, 0.0039, 0.0039,  ...,

In [None]:
def get_model():
    # initialize the model
    model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=False,
                                                                   num_keypoints=68, 
                                                                   )

    return model