# Working with YOLO

In [1]:
# Import necessary libraries
import cv2
import numpy as np
from pathlib import Path
import fitz
import torch
import matplotlib.pyplot as plt
import time

# YOLO
from ultralytics import YOLO

In [2]:
# Define paths
input_dir = Path("photo/images")
pdf_dir = Path("pdfs")
output_dir_pose = Path("face_detected_YOLO")
cropped_dir_pose = Path("cropped_faces_YOLO")
pdf_cropped_dir = Path("pdf_cropped_faces_YOLO")

output_dir_pose.mkdir(parents=True, exist_ok=True)
cropped_dir_pose.mkdir(parents=True, exist_ok=True)
pdf_cropped_dir.mkdir(parents=True, exist_ok=True)

In [3]:
# Load YOLOv5 Pose model
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
model = YOLO('yolov8n-pose.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-pose.pt to 'yolov8n-pose.pt'...


100%|██████████| 6.52M/6.52M [00:02<00:00, 3.37MB/s]


In [4]:
def detect_faces_yolo_pose(img, img_name, save_dir):
    results = model(img)
    
    face_count = 0
    for result in results:
        # Obtener bounding boxes (person class = 0)
        boxes = result.boxes.xyxy[result.boxes.cls == 0].cpu().numpy()
        
        for box in boxes:
            xmin, ymin, xmax, ymax = map(int, box[:4])
            face_crop = img[ymin:ymax, xmin:xmax]
            face_count += 1
            face_filename = save_dir / f"{img_name}_face_{face_count}.jpg"
            cv2.imwrite(str(face_filename), cv2.cvtColor(face_crop, cv2.COLOR_RGB2BGR))
            print(f"Saved: {face_filename.name}")
    
    # Save annotated image in output_dir_pose
    for result in results:
        annotated_img = result.plot()
        annotated_path = output_dir_pose / f"{img_name}_annotated.jpg"
        cv2.imwrite(str(annotated_path), cv2.cvtColor(annotated_img, cv2.COLOR_RGB2BGR))
    print(f"Annotated image saved: {annotated_path.name}")

In [5]:
def process_images():
    for folder in input_dir.iterdir():
        if not folder.is_dir():
            continue
        for img_path in folder.glob("*.jpg"):
            img = cv2.imread(str(img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            detect_faces_yolo_pose(img_rgb, img_path.stem, cropped_dir_pose)

In [6]:
def pdf_to_images(pdf_path, dpi=300):
    pdf_document = fitz.open(str(pdf_path))
    img_paths = []
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        zoom = dpi / 72
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat)
        img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, 3))
        img_paths.append((img_array, page_num + 1))
    return img_paths

In [7]:
def process_images(max_images=25):
    image_counter = 0
    for folder in input_dir.iterdir():
        if not folder.is_dir():
            continue
        for img_path in folder.glob("*.jpg"):
            if image_counter >= max_images:
                print(f"Reached the max limit of {max_images} images.")
                return
            img = cv2.imread(str(img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            detect_faces_yolo_pose(img_rgb, img_path.stem, cropped_dir_pose)
            image_counter += 1

In [8]:
process_images(max_images=25)


0: 640x384 (no detections), 57.0ms
Speed: 3.7ms preprocess, 57.0ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)
Annotated image saved: 00_annotated.jpg

0: 640x384 (no detections), 34.3ms
Speed: 1.5ms preprocess, 34.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)
Annotated image saved: 01_annotated.jpg

0: 640x384 (no detections), 36.0ms
Speed: 1.7ms preprocess, 36.0ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 384)
Annotated image saved: 02_annotated.jpg

0: 640x384 (no detections), 34.4ms
Speed: 1.7ms preprocess, 34.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)
Annotated image saved: 03_annotated.jpg

0: 640x384 1 person, 32.8ms
Speed: 1.7ms preprocess, 32.8ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 384)
Saved: 04_face_1.jpg
Annotated image saved: 04_annotated.jpg

0: 640x384 1 person, 35.7ms
Speed: 1.8ms preprocess, 35.7ms inference, 2.9ms postprocess per image at shape (1, 3, 640, 

In [9]:
def process_pdfs():
    pdf_files = list(pdf_dir.glob("*.pdf"))
    for pdf_file in pdf_files:
        print(f"Processing PDF: {pdf_file.name}")
        try:
            pages = pdf_to_images(pdf_file)
            for img_array, page_num in pages:
                img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                img_name = f"{pdf_file.stem}_page_{page_num}"
                detect_faces_yolo_pose(img_rgb, img_name, pdf_cropped_dir)
        except Exception as e:
            print(f"Error processing {pdf_file.name}: {str(e)}")

In [10]:
process_pdfs()