In [None]:
# Import necessary libraries
import cv2
import numpy as np
from pathlib import Path
import fitz
import torch
import matplotlib.pyplot as plt
import time

# YOLO
from ultralytics import YOLO

In [None]:
# Define paths
input_dir = Path("photo/images")
pdf_dir = Path("pdfs")
output_dir_pose = Path("face_detected_YOLO")
cropped_dir_pose = Path("cropped_faces_YOLO")
pdf_cropped_dir = Path("pdf_cropped_faces_YOLO")

output_dir_pose.mkdir(parents=True, exist_ok=True)
cropped_dir_pose.mkdir(parents=True, exist_ok=True)
pdf_cropped_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# Load YOLOv5 Pose model
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
model = YOLO('yolov8n-pose.pt')

In [None]:
def detect_faces_yolo_pose(img, img_name, save_dir):
    results = model(img)
    
    face_count = 0
    for result in results:
        # Obtener bounding boxes (person class = 0)
        boxes = result.boxes.xyxy[result.boxes.cls == 0].cpu().numpy()
        
        for box in boxes:
            xmin, ymin, xmax, ymax = map(int, box[:4])
            face_crop = img[ymin:ymax, xmin:xmax]
            face_count += 1
            face_filename = save_dir / f"{img_name}_face_{face_count}.jpg"
            cv2.imwrite(str(face_filename), cv2.cvtColor(face_crop, cv2.COLOR_RGB2BGR))
            print(f"Saved: {face_filename.name}")
    
    # Save annotated image in output_dir_pose
    for result in results:
        annotated_img = result.plot()
        annotated_path = output_dir_pose / f"{img_name}_annotated.jpg"
        cv2.imwrite(str(annotated_path), cv2.cvtColor(annotated_img, cv2.COLOR_RGB2BGR))
    print(f"Annotated image saved: {annotated_path.name}")

In [None]:
def process_images():
    for folder in input_dir.iterdir():
        if not folder.is_dir():
            continue
        for img_path in folder.glob("*.jpg"):
            img = cv2.imread(str(img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            detect_faces_yolo_pose(img_rgb, img_path.stem, cropped_dir_pose)

In [None]:
def pdf_to_images(pdf_path, dpi=300):
    pdf_document = fitz.open(str(pdf_path))
    img_paths = []
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        zoom = dpi / 72
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat)
        img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, 3))
        img_paths.append((img_array, page_num + 1))
    return img_paths

In [None]:
def process_images(max_images=25):
    image_counter = 0
    for folder in input_dir.iterdir():
        if not folder.is_dir():
            continue
        for img_path in folder.glob("*.jpg"):
            if image_counter >= max_images:
                print(f"Reached the max limit of {max_images} images.")
                return
            img = cv2.imread(str(img_path))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            detect_faces_yolo_pose(img_rgb, img_path.stem, cropped_dir_pose)
            image_counter += 1

In [None]:
process_images(max_images=25)

In [None]:
def process_pdfs():
    pdf_files = list(pdf_dir.glob("*.pdf"))
    for pdf_file in pdf_files:
        print(f"Processing PDF: {pdf_file.name}")
        try:
            pages = pdf_to_images(pdf_file)
            for img_array, page_num in pages:
                img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                img_name = f"{pdf_file.stem}_page_{page_num}"
                detect_faces_yolo_pose(img_rgb, img_name, pdf_cropped_dir)
        except Exception as e:
            print(f"Error processing {pdf_file.name}: {str(e)}")

In [None]:
process_pdfs()

---

# YOLO

In [1]:
# Import necessary libraries

# Images
import cv2
import numpy as np
from pathlib import Path
import torch
import matplotlib.pyplot as plt
import time
from typing import List, Optional, Dict, Any
from ultralytics import YOLO

# PDF's
import fitz

In [2]:
# Face detection using YOLO with precision, efficiency, and portability considerations.
class FaceDetector:
    def __init__(self):
        # Configuration
        self.input_dir = Path("photo/images")
        self.output_dir = Path("face_detected_YOLO")
        self.cropped_dir = Path("cropped_faces_YOLO")
        self.no_face_detected: List[Path] = []

        # Create directories
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.cropped_dir.mkdir(parents=True, exist_ok=True)

        # Load YOLO model
        self.model = YOLO('yolov8n-pose.pt')

    # Detect faces using YOLO with timing metric
    def detect_faces(self, img_path: Path, is_pdf: bool = False) -> Optional[int]:
        start_time = time.time()
        
        try:
            img = cv2.imread(str(img_path)) if isinstance(img_path, Path) else img_path
            if img is None:
                print(f"Error reading image: {img_path}")
                return None

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # Detection
            detect_start = time.time()
            results = self.model(img_rgb)
            detect_time = time.time() - detect_start

            face_count = 0
            for result in results:
                boxes = result.boxes.xyxy[result.boxes.cls == 0].cpu().numpy()
                face_count += len(boxes)
                
                for box in boxes:
                    self._process_detection(img_rgb, img_path, box, face_count, is_pdf)

            # Save annotated image if not PDF
            if not is_pdf and isinstance(img_path, Path):
                annotated_img = results[0].plot()
                output_path = self.output_dir / img_path.name
                cv2.imwrite(str(output_path), cv2.cvtColor(annotated_img, cv2.COLOR_RGB2BGR))

            elapsed = time.time() - start_time
            print(f"YOLO: {getattr(img_path, 'name', 'PDF_page')} | "
                  f"Faces: {face_count} | Detect: {detect_time:.3f}s | Total: {elapsed:.3f}s")
            
            return face_count

        except Exception as e:
            print(f"Error processing {getattr(img_path, 'name', 'PDF_page')}: {str(e)}")
            return None

    # Process and save a single face detectio
    def _process_detection(self, img: np.ndarray, img_path, box, idx: int, is_pdf: bool) -> None:
        xmin, ymin, xmax, ymax = map(int, box[:4])
        face_crop = img[ymin:ymax, xmin:xmax]
        
        crop_name = f"{getattr(img_path, 'stem', 'PDF_page')}_face_{idx}.jpg"
        output_path = self.cropped_dir / crop_name
        cv2.imwrite(str(output_path), cv2.cvtColor(face_crop, cv2.COLOR_RGB2BGR))

        # Draw rectangle if not PDF
        if not is_pdf:
            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

    # Display sample images where no faces were detecte
    def show_no_face_samples(self, sample_size: int = 3) -> None:
        print(f"\nImages without faces detected: {len(self.no_face_detected)}")
        for img_path in self.no_face_detected[:sample_size]:
            try:
                img = cv2.cvtColor(cv2.imread(str(img_path)), cv2.COLOR_BGR2RGB)
                plt.figure(figsize=(8, 6))
                plt.imshow(img)
                plt.title(f"No face detected: {img_path.name}")
                plt.axis('off')
                plt.show()
            except Exception as e:
                print(f"Error displaying {img_path}: {e}")

In [3]:
class PDFProcessor:
    def __init__(self, face_detector: FaceDetector):
        self.pdf_dir = Path('pdfs')
        self.output_img_dir = Path('pdf_images_YOLO')
        self.pdf_cropped_dir = Path('pdf_cropped_faces_YOLO')
        self.detector = face_detector
        self.detector.cropped_dir = self.pdf_cropped_dir

        self.output_img_dir.mkdir(parents=True, exist_ok=True)
        self.pdf_cropped_dir.mkdir(parents=True, exist_ok=True)

    # Process all PDF files and return total processing tim
    def process_pdfs(self) -> float:
        pdf_files = list(self.pdf_dir.glob("*.pdf"))
        if not pdf_files:
            print("No PDF files found")
            return 0.0

        start_time = time.time()
        print("\nStarting PDF processing...")
        
        for pdf_file in pdf_files:
            self._process_pdf(pdf_file)
            
        return time.time() - start_time

    # Process a single PDF fil
    def _process_pdf(self, pdf_file: Path) -> None:
        print(f"\nProcessing PDF: {pdf_file.name}")
        pdf_start = time.time()

        try:
            doc = fitz.open(str(pdf_file))
            for page_num in range(len(doc)):
                page_start = time.time()
                page = doc.load_page(page_num)
                pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
                img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
                    (pix.height, pix.width, 3))
                img = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)

                # Process with face detector
                temp_path = self.output_img_dir / f"temp_{pdf_file.stem}_p{page_num}.png"
                cv2.imwrite(str(temp_path), img)
                faces = self.detector.detect_faces(temp_path, is_pdf=True)
                temp_path.unlink(missing_ok=True)

                page_time = time.time() - page_start
                print(f"Page {page_num+1}: {faces or 0} faces, processed in {page_time:.2f}s")

        except Exception as e:
            print(f"Error processing {pdf_file.name}: {str(e)}")
        
        pdf_time = time.time() - pdf_start
        print(f"Finished {pdf_file.name} in {pdf_time:.2f} seconds")

In [None]:
# Main execution
if __name__ == "__main__":
    # Initialize face detector with timing
    print("Initializing YOLO face detector...")
    init_start = time.perf_counter()
    face_detector = FaceDetector()
    init_time = time.perf_counter() - init_start
    print(f"Detector initialized in {init_time:.2f} seconds\n")
    
    # Process images with detailed timing and averages
    print("\n" + "="*50)
    print("Starting image processing...")
    img_start = time.perf_counter()
    
    # Track processing metrics
    image_counter = 0
    total_faces = 0
    total_time = 0.0
    max_images = 25
    
    # Process images with progress tracking
    for folder in face_detector.input_dir.iterdir():
        if not folder.is_dir():
            continue
            
        for img_path in folder.glob("*.jpg"):
            if image_counter >= max_images:
                break
                
            start_time = time.time()
            faces_detected = face_detector.detect_faces(img_path)  # Changed from detect_faces_MTCNN
            elapsed = time.time() - start_time
            
            total_time += elapsed
            if faces_detected is not None:
                total_faces += faces_detected
            image_counter += 1
            
            # Display running averages
            avg_time = total_time / image_counter
            avg_faces = total_faces / image_counter if image_counter > 0 else 0
            print(f"Progress: {image_counter}/{max_images} | "
                  f"Avg time: {avg_time:.3f}s | "
                  f"Avg faces: {avg_faces:.1f}")
    
    img_elapsed = time.perf_counter() - img_start
    
    # Image processing summary
    print("\n" + "="*50)
    print("IMAGE PROCESSING SUMMARY")
    print(f"Total images processed: {image_counter}")
    print(f"Images with faces detected: {image_counter - len(face_detector.no_face_detected)}")
    print(f"Images without faces: {len(face_detector.no_face_detected)}")
    print(f"Total faces detected: {total_faces}")
    print(f"Total processing time: {img_elapsed:.2f} seconds")
    print(f"Average time per image: {total_time/image_counter:.3f} seconds")
    print(f"Average faces per image: {total_faces/image_counter:.1f}")
    print("="*50 + "\n")
    
    # Show samples with no faces detected
    if face_detector.no_face_detected:
        print(f"Showing {min(3, len(face_detector.no_face_detected))} samples without detected faces...")
        face_detector.show_no_face_samples(sample_size=3)

    # Process PDFs with comprehensive timing
    print("\n" + "="*50)
    print("Starting PDF processing...")
    pdf_start = time.perf_counter()
    
    pdf_processor = PDFProcessor(face_detector)
    pdf_time = pdf_processor.process_pdfs()  
    pdf_elapsed = time.perf_counter() - pdf_start
    
    print("\n" + "="*50)
    print("PDF PROCESSING SUMMARY")
    print(f"Total processing time: {pdf_elapsed:.2f} seconds")
    if pdf_time > 0:  
        print(f"PDF-only processing time: {pdf_time:.2f} seconds")
    print("="*50 + "\n")
    
    # Final summary
    print("\n" + "="*50)
    print("FINAL SUMMARY")
    print(f"Total execution time: {time.perf_counter() - init_start:.2f} seconds")
    print(f"Total images processed: {image_counter}")
    print(f"Total faces detected: {total_faces}")
    print(f"Average time per image: {total_time/image_counter:.3f} seconds")
    print(f"Average faces per image: {total_faces/image_counter:.1f}")
    print("="*50)

Initializing YOLO face detector...
Detector initialized in 0.38 seconds


Starting image processing...

0: 640x384 (no detections), 44.3ms
Speed: 3.2ms preprocess, 44.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)
YOLO: 00.jpg | Faces: 0 | Detect: 1.374s | Total: 1.613s
Progress: 1/25 | Avg time: 1.617s | Avg faces: 0.0

0: 640x384 (no detections), 32.1ms
Speed: 1.4ms preprocess, 32.1ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)
YOLO: 01.jpg | Faces: 0 | Detect: 0.038s | Total: 0.280s
Progress: 2/25 | Avg time: 0.950s | Avg faces: 0.0

0: 640x384 (no detections), 33.5ms
Speed: 1.3ms preprocess, 33.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)
YOLO: 02.jpg | Faces: 0 | Detect: 0.039s | Total: 0.287s
Progress: 3/25 | Avg time: 0.730s | Avg faces: 0.0

0: 640x384 (no detections), 32.8ms
Speed: 1.5ms preprocess, 32.8ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 384)
YOLO: 03.jpg | Faces: 0 | Detect: 0.039s 