In [None]:
# Step 0: Load models and library necessary 
from PIL import Image 
import cv2 
import numpy as np 

# YOLO
from ultralytics import YOLO 
yolo_model = YOLO('yolov8x.pt') 

# import torchreid
# reid_model = torchreid.models.build_model(
#     name='mudeep',
#     num_classes=1000,
#     pretrained=True
# )

# ReID model: Sử dụng một pre-trained model như ResNet 
import torch 
from torchvision import models, transforms 
resnet_model = models.resnet50(pretrained=True) 
resnet_model.eval() 

# Face recognition model 
import face_recognition

In [1]:
# import torch 
# from torchvision import models, transforms 
# resnet_model = models.resnet50(pretrained=True) 
# resnet_model.eval() 

# print(type(resnet_model))



<class 'torchvision.models.resnet.ResNet'>


In [None]:
# Step 1: Detect object with YOLO 
def detect_objects(image, yolo_model):
    """
    Para:
        image: PIL Image - be convert('RGB') yet
        yolo_model: model be used
    Result:
        object_images: a list of single object image detect crop from bounding box 

    How to use - Eg:
        from ultralytics import YOLO 
        yolo_model = YOLO('yolov8x.pt') 
        img1 = Image.open("/path/to/img").convert('RGB')
        object_images_1 = detect_objects(img1, yolo_model)
    """
    # Sử dụng YOLO để xác định vùng chứa object
    results = yolo_model(image)
    # Lấy thông tin về object và bounding box
    boxes = results[0].boxes.xywh   # tensor
    # Cắt và lưu các vùng chứa object
    object_images = []
    for box in boxes:
        xmin, ymin, width, height = map(int, box)
        xmax = xmin + width
        ymax = ymin + height
        object_image = image.crop((xmin, ymin, xmax, ymax))
        object_images.append(object_image)

    return object_images

# Step 2: ReID model take the same object in each camera as 1
def extract_features(image, reid_model):
    """
    Para:
        image: PIL Image - picture of a single object be convert('RGB') yet
        reid_model: model be used
    Result:
        object_images: a list of single object image detect crop from bounding box 

    How to use - Eg: 
        resnet_model = models.resnet50(pretrained=True)
        resnet_model.eval()
        img1 = Image.open("/path/to/img").convert('RGB')
        features1 = extract_features(img1, resnet_model)
    """
    preprocess = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    input_tensor = preprocess(image)
    input_batch = torch.unsqueeze(input_tensor, 0)

    with torch.no_grad():
        output = reid_model(input_batch)

    return output 

def reid_similar(features1, features2, threshold = 0.8):
    """
    Para: 
        features1, features2: output of extract_features(image, reid_model) function - it is feature vector get after object go throw model
        threshold: double - the level consider 2 object is the same or not 
    Result:
        True if 2 object is the same, False otherwise 
    """
    from sklearn.metrics.pairwise import cosine_similarity
    return cosine_similarity(features1, features2) >= threshold

# Step 3: Face recognition from each object crop and id by reid model 
def _face_rec(image):
    """
    Note: image input should only have a object person 
    face_recognition_model: model be used (this case is import face_recognition)
    Para:
        image: numpy array it can be the PIL.Image with convert in RGB mode
        ***To know more about the para then see this function in library***
        def load_image_file(file, mode='RGB'):
            ---
            Loads an image file (.jpg, .png, etc) into a numpy array

            :param file: image file name or file object to load
            :param mode: format to convert the image to. Only 'RGB' (8-bit RGB, 3 channels) and 'L' (black and white) are supported.
            :return: image contents as numpy array
            ---
            im = PIL.Image.open(file)
            if mode:
                im = im.convert(mode)
            return np.array(im)
    Result:
        (bounding box of face, feature vector of the face)
    """
    bounding_boxes = face_recognition.face_locations(image)
    if len(bounding_boxes) < 1:
        return (None, None)
    feature_vectors = face_recognition.face_encodings(image, bounding_boxes)
    return (bounding_boxes[0], feature_vectors[0])

def _draw_boxes_with_names(image, face_locations, face_names):
    """ 
    Draw bounding box in the picture
    """
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Draw bounding box
        cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)

        # Draw name 
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(image, name, (left + 6, bottom - 6), font, 0.5, (255, 255, 255), 1)

def load_data_base():
    """ 
    Return dict{name (str): vector feature of the face with that name (numpy array from face_recognition.face_encodings)}
    """
    read_face_dict = {}
    with open("face_dictionary.txt", "r") as file:
        lines = file.readlines()
        for line in lines:
            person_name, face_encoding_str = line.split(":")
            face_encoding = np.array(eval(face_encoding_str))  # Sử dụng eval để chuyển đổi chuỗi thành list
            read_face_dict[person_name.strip()] = face_encoding
    return read_face_dict

def face_similar(face_db_regis, image, tolerance=0.6):
    """ 
    Check the only face in image is in database or not 

    :param face_db_regis: The dictionary {name: vector feature} from load_data_base()
    :param image: The image contain only one object person - crop from big image in step use YOLO to detect object
    :param tolerance: The smaller make more correct when recognition 
    :return: True if the face be detect in database, False if not  
    """
    face_to_check = _face_rec(image)
    if len(face_to_check) == 0:
        return False 
    for name, vector in face_db_regis.items():
        # So sánh khuôn mặt trong ảnh với tất cả khuôn mặt trong cơ sở dữ liệu
        match = face_recognition.compare_faces([vector], face_to_check[1], tolerance=tolerance)[0]

        if match:
            _draw_boxes_with_names(image, face_to_check[0], name)
            return True

    return False

      


In [None]:
video_path = 'VideoSend_0.mkv'
cap = cv2.VideoCapture(video_path)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    

cap.release()