In [1]:
import torch
import sys
import cv2
import torchvision
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from torchvision import transforms
from insightface.insight_face import iresnet100

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weights = torch.load("./insightface/16_backbone.pth", map_location = device)

model = iresnet100()
model.load_state_dict(weights)
model.to(device)
model.eval()

IResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (prelu): PReLU(num_parameters=64)
  (layer1): Sequential(
    (0): IBasicBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (prelu): PReLU(num_parameters=64)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): IBas

In [3]:
face_preprocessor = transforms.Compose([transforms.ToTensor(), 
                                   transforms.Resize((112, 112)),
                                   transforms.Normalize(mean = [0.5, 0.5, 0.5], std = [0.5, 0.5, 0.5])])

In [4]:
def get_embedded_databases(root_img_path):
    images_paths, images_embedding = [], []
    
    for folder in os.listdir(root_img_path):
        full_folder = os.path.join(root_img_path, folder)
        if os.path.isdir(full_folder):
            for name in os.listdir(full_folder):
                img_path = full_folder + "/" + name
                img_face = face_preprocessor(Image.open(img_path).convert("RGB")).to(device)
                with torch.no_grad():
                    embeded_img_face = model(img_face[None, :])[0].cpu().numpy()
                
                images_embedding.append(embeded_img_face)
                images_paths.append(name.split(".")[0])
    
    images_paths = np.array(images_paths)
    images_embedding = np.array(images_embedding)
    
    return images_paths, images_embedding/np.linalg.norm(images_embedding, axis=1, keepdims=True)

In [5]:
root_img_path = "./faces_database/"
images_paths, images_embedding = get_embedded_databases(root_img_path)
print(images_embedding.shape)

(10, 512)


In [6]:
query_path = "./face_My.jpg"
img_query = face_preprocessor(Image.open(query_path).convert("RGB")).to(device)
with torch.no_grad():
    emb_query = model(img_query[None, :]).cpu().numpy()
    emb_query = emb_query/np.linalg.norm(emb_query)

cos_measure = images_embedding @ emb_query.T
print(images_paths[cos_measure.argmax()].split("_")[0])

# mse_measure = np.linalg.norm(images_embedding - emb_query, axis=-1)
# images_paths[mse_measure.argmin()].split("_")[0]

My


## Recognize in one image (leverage OD with Yolov5)
Follow these steps:

- Get all images in database to Insightface model to create database vectors.
- Crop query images by Yolov5, then feed it into Insightface model to create query vector.
- Compare query image's vector with database vectors with cosine similarity or MSE.

In [7]:
sys.path.insert(0, "yolov5_face")
from models.experimental import attempt_load
from utils.datasets import letterbox
from utils.general import check_img_size, non_max_suppression_face, scale_coords
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = attempt_load("yolov5_face/yolov5m-face.pt", map_location=device)

Fusing layers... 


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [8]:
converted_size = 640
conf_threshold = 0.4
iou_threshold = 0.5

In [9]:
def resize_image(img0, img_size):
    h0, w0 = orgimg.shape[:2]  # orig hw
    r = img_size / max(h0, w0)  # resize image to img_size
    if r != 1:  # always resize down, only resize up if training with augmentation
        interp = cv2.INTER_AREA if r < 1  else cv2.INTER_LINEAR
        img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)

    imgsz = check_img_size(img_size, s=model.stride.max())  # check img_size
    img = letterbox(img0, new_shape=imgsz)[0]

    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1).copy()  # BGR to RGB, to 3x416x416

    img = torch.from_numpy(img).to(device)
    img = img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    
    return img

In [10]:
%%time

path_query = "./test_people.jpg"
orgimg = cv2.imread(path_query)  # BGR 
img = resize_image(orgimg.copy(), converted_size)

with torch.no_grad():
    pred = model(img[None, :])[0]
    
# Apply NMS
det = non_max_suppression_face(pred, conf_threshold, iou_threshold)[0]
bboxs = np.int32(scale_coords(img.shape[1:], det[:, :4], orgimg.shape).round().cpu().numpy())
    

for i in range(len(bboxs)):
    x1, y1, x2, y2 = bboxs[i]
    roi = orgimg[y1:y2, x1:x2]
    roi = face_preprocessor(Image.fromarray(roi)).to(device)
    
    with torch.no_grad():
        emb_query = model(roi[None, :]).cpu().numpy()
        emb_query = emb_query/np.linalg.norm(emb_query)
    
    scores = (emb_query @ images_embedding.T)[0]
    # scores = np.linalg.norm(norm_roi_emb - norm_images_emb)
    id_min = np.argmax(scores)
    score = scores[id_min]
    name = images_paths[id_min]
    
    if score < 0.2:
        caption= "UN_KNOWN"
    else:
        caption = f"{name.split('_')[0].upper()}:{score:.2f}"

    t_size = cv2.getTextSize(caption, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
    cv2.rectangle(orgimg, (x1, y1), (x2, y2), (0, 146, 230), 3)
    cv2.rectangle(
        orgimg, (x1, y1), (x1 + t_size[0], y1 + t_size[1]), (0, 146, 230), -1)
    cv2.putText(orgimg, caption, (x1, y1 + t_size[1]), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)

RuntimeError: Sizes of tensors must match except in dimension 2. Got 7 and 8 (The offending index is 0)

In [None]:
plt.figure(figsize=(5, 3), dpi=300)
plt.axis('off')
plt.imshow(orgimg[:,:,::-1]);

## Test video recognition

In [None]:
l = []
fps = 25
numsecond = 10