In [1]:
!pip install ultralytics face_recognition

Collecting ultralytics
  Downloading ultralytics-8.1.34-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l- done
Collecting dlib>=19.7 (from face_recognition)
  Downloading dlib-19.24.2.tar.gz (11.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB[0m [31m90.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependen

In [2]:
import os
import shutil
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
import face_recognition

In [3]:
def calculate_pose_score(results):
    list_keypoints_labels = ["Nose", "Left-eye", "Right-eye", "Left-ear", "Right-ear", "Left-shoulder", "Right-shoulder", "Left-elbow", "Right-elbow", "Left-wrist", "Right-wrist", "Left-hip", "Right-hip", "Left-knee", "Rigt-knee", "Left-ankle", "Right-ankle"]
    
    pose_keypoints_conf = results[0].keypoints.conf[0].tolist()
    num_detected_keypoints = 0
    total_keypoints = len(pose_keypoints_conf)
    
    for cf in pose_keypoints_conf:
        if cf > 0.7:  # Check if the landmark is visible
            num_detected_keypoints += 1

    # Calculate the percentage of detected landmarks compared to total landmarks.
    pose_score = (num_detected_keypoints / total_keypoints) * 100
    
    dico_pose = {list_keypoints_labels[i]: pose_keypoints_conf[i] for i in range(len(list_keypoints_labels))}

    return pose_score, dico_pose

In [4]:
# Load a model
model = YOLO('yolov8m-pose.pt')  # load an official model

video_path = './Mens Fall-Winter 2024 Show LOUIS VUITTON.mp4'
box_folder = './defile-2024-lv-box'
best_folder = './defile-2024-lv-best'

if not os.path.exists(box_folder):
    os.makedirs(box_folder)

if not os.path.exists(best_folder):
    os.makedirs(best_folder)

index = 0
START = 1500 # index frame beginning of the show
END = 56000 # index frame ending of the show

# Ouvrir la vidéo
video_capture = cv2.VideoCapture(video_path)

bestconf = 0
box_amount = 0
first_box = True
new_mannequin = 0

if not os.path.exists(os.path.join(box_folder, str(new_mannequin))):
    os.makedirs(os.path.join(box_folder, str(new_mannequin)))
    
if not os.path.exists(os.path.join(best_folder, str(new_mannequin))):
    os.makedirs(os.path.join(best_folder, str(new_mannequin)))

dico_boxes = {}
logs = []

while True:
    success, frame = video_capture.read()

    if not success:
        break

    if index >= START and index <= END:
        if index % 2 == 0:            
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convertir en RGB pour matplotlib

            # Predict with the model
            results = model(image)  # predict on an image
            
            if len(results[0].boxes.conf) != 0:
            
                conf = results[0].boxes.conf[0].tolist()

                if conf > 0.90 and (results[0].keypoints.conf[0].tolist()[0] > 0.9 and results[0].keypoints.conf[0].tolist()[1] > 0.9 and results[0].keypoints.conf[0].tolist()[2] > 0.9):
                    
                    box_coords = results[0].boxes.xyxy[0].tolist()

                    # Extraire les coordonnées de la boîte
                    left, top, right, bottom = map(int, box_coords)

                    left = (0 if left - 50 < 0 else left - 50)
                    top = (0 if top - 50 < 0 else top - 50)

                    right = (image.shape[1] if right + 50 > image.shape[1] else right + 50)
                    bottom = (image.shape[0] if bottom + 50 > image.shape[0] else bottom + 50)

                    # Extraire la région de la boîte de l'image d'origine
                    box_region = image[top:bottom, left:right]

                    # Créer une nouvelle image avec la boîte sur un fond blanc
                    box_image = np.zeros((bottom - top, right - left, 3), dtype=np.uint8)
                    box_image.fill(255)  # Remplir avec du blanc
                    box_image[0:bottom - top, 0:right - left] = box_region
                    
                    # plt.imshow(box_image)
                    
                    new_box_encoding = face_recognition.face_encodings(box_image)
                    
                    if new_box_encoding != []:        
                        
                        if first_box:
                            last_box_encoding = new_box_encoding
                            first_box = False

                        else:
                            face_matches = face_recognition.compare_faces(last_box_encoding, new_box_encoding[0])

                            if face_matches[0] == True:
                                pass
                            else:
                                print("It's not the same mannequin!")

                                chemin_box_max_score, score_max = max(dico_boxes.items(), key=lambda x: x[1])

                                shutil.copy(chemin_box_max_score, os.path.join(best_folder, str(new_mannequin)))

                                bestconf = 0
                                box_amount = 0

                                new_mannequin += 1

                                if not os.path.exists(os.path.join(box_folder, str(new_mannequin))):
                                    os.makedirs(os.path.join(box_folder, str(new_mannequin)))

                                if not os.path.exists(os.path.join(best_folder, str(new_mannequin))):
                                    os.makedirs(os.path.join(best_folder, str(new_mannequin)))

                                logs.append(dico_boxes)
                                dico_boxes = {}

                            last_box_encoding = new_box_encoding                        
                    
                        if conf > bestconf:
                            box_amount += 1

                            bestconf = conf

                            box_image_save = cv2.cvtColor(box_image, cv2.COLOR_BGR2RGB)

                            box_path = f"{box_folder}/{new_mannequin}/box_{index}.jpg"
                            cv2.imwrite(box_path, box_image_save)

                            pose_score, dico_pose = calculate_pose_score(results)

                            dico_boxes[box_path] = pose_score
    
    index += 1

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-pose.pt to 'yolov8m-pose.pt'...


100%|██████████| 50.8M/50.8M [00:00<00:00, 71.3MB/s]



0: 384x640 22 persons, 132.9ms
Speed: 10.7ms preprocess, 132.9ms inference, 456.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 13.6ms
Speed: 4.2ms preprocess, 13.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 13.5ms
Speed: 2.9ms preprocess, 13.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 13.5ms
Speed: 3.2ms preprocess, 13.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 persons, 13.5ms
Speed: 3.0ms preprocess, 13.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 13.6ms
Speed: 3.0ms preprocess, 13.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 13.5ms
Speed: 3.1ms preprocess, 13.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 13.6ms
Speed: 3.0ms preprocess, 13.6ms inference, 1.5ms postprocess per ima