In [1]:
# Extra
import cv2
import numpy as np
from sort import Sort

# System
import os
import tempfile
import urllib.request

# Data processing
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.ndimage import rotate

# Styling
from tqdm import tqdm  # Barra de carga
from IPython.display import clear_output # Limpiar la salida de la celda
from time import sleep  # Importar sleep

# Deep learning
from ultralytics import YOLO
from deepface import DeepFace

# Openvino
import subprocess
from openvino.runtime import Core


## Modelos

In [2]:
# Modelos
yolo_model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

# asignar modelo de detección de rostros
detector = "RetinaFace"

In [3]:
'''
RetinaFace Citation

@article{serengil2024lightface,
  title     = {A Benchmark of Facial Recognition Pipelines and Co-Usability Performances of Modules},
  author    = {Serengil, Sefik and Ozpinar, Alper},
  journal   = {Journal of Information Technologies},
  volume    = {17},
  number    = {2},
  pages     = {95-107},
  year      = {2024},
  doi       = {10.17671/gazibtd.1399077},
  url       = {https://dergipark.org.tr/en/pub/gazibtd/issue/84331/1399077},
  publisher = {Gazi University}
}

@inproceedings{serengil2020lightface,
  title        = {LightFace: A Hybrid Deep Face Recognition Framework},
  author       = {Serengil, Sefik Ilkin and Ozpinar, Alper},
  booktitle    = {2020 Innovations in Intelligent Systems and Applications Conference (ASYU)},
  pages        = {23-27},
  year         = {2020},
  doi          = {10.1109/ASYU50717.2020.9259802},
  url          = {https://ieeexplore.ieee.org/document/9259802},
  organization = {IEEE}
}

@inproceedings{serengil2021lightface,
  title        = {HyperExtended LightFace: A Facial Attribute Analysis Framework},
  author       = {Serengil, Sefik Ilkin and Ozpinar, Alper},
  booktitle    = {2021 International Conference on Engineering and Emerging Technologies (ICEET)},
  pages        = {1-4},
  year         = {2021},
  doi          = {10.1109/ICEET53442.2021.9659697},
  url          = {https://ieeexplore.ieee.org/document/9659697},
  organization = {IEEE}
}
'''

'\nRetinaFace Citation\n\n@article{serengil2024lightface,\n  title     = {A Benchmark of Facial Recognition Pipelines and Co-Usability Performances of Modules},\n  author    = {Serengil, Sefik and Ozpinar, Alper},\n  journal   = {Journal of Information Technologies},\n  volume    = {17},\n  number    = {2},\n  pages     = {95-107},\n  year      = {2024},\n  doi       = {10.17671/gazibtd.1399077},\n  url       = {https://dergipark.org.tr/en/pub/gazibtd/issue/84331/1399077},\n  publisher = {Gazi University}\n}\n\n@inproceedings{serengil2020lightface,\n  title        = {LightFace: A Hybrid Deep Face Recognition Framework},\n  author       = {Serengil, Sefik Ilkin and Ozpinar, Alper},\n  booktitle    = {2020 Innovations in Intelligent Systems and Applications Conference (ASYU)},\n  pages        = {23-27},\n  year         = {2020},\n  doi          = {10.1109/ASYU50717.2020.9259802},\n  url          = {https://ieeexplore.ieee.org/document/9259802},\n  organization = {IEEE}\n}\n\n@inproceedin

## Proceso

In [4]:
def create_heatmap(df, heatmap_path):
    heatmap = df.groupby(["cuadrante_x", "cuadrante_y"]).size().unstack(fill_value=0)
    heatmap = heatmap.div(heatmap.max().max())
    heatmap = rotate(heatmap, 270)
    
    plt.figure(figsize=(16, 8))  # Adjusting the figure size
    ax = sns.heatmap(heatmap, cmap="viridis", cbar_kws={'label': 'Frecuencia'})

    # Set axis labels and title
    ax.set_xlabel("Cuadrante X")
    ax.set_ylabel("Cuadrante Y")
    plt.title("Heatmap de Frecuencia de Personas en el Video")

    # Save the heatmap as an image
    plt.savefig(heatmap_path)
    
def apply_model_to_video(video_url, output_video_path, heatmap_path, last_frame_path):    
    cap = cv2.VideoCapture(video_url)
    if not cap.isOpened():
        print("Error opening video file")
        return None, None

    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    heatmap = np.zeros((h, w), dtype=np.float32)
    tracker = Sort()
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    pbar = tqdm(total=total_frames, desc="Processing Frames")
    grid_size = 50
    data = []
    last_saved_second = -1
    
    last_frame = None  # Para guardar el último frame

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        total_seconds = frame_number / fps
        minutes = int(total_seconds // 60)
        seconds = int(total_seconds % 60)
        timestamp = f"{minutes:02d}:{seconds:02d}"

        results = yolo_model.track(frame, persist=True)

        for detection in results:
            class_id = None
            if (detection.boxes is not None) and (len(detection.boxes) > 0):
                class_id = int(detection.boxes.cls[0])

            if class_id == None or yolo_model.names[class_id] != "person":
                continue

            if yolo_model.names[class_id] != "person":
                continue

            box = detection.boxes.xyxy.cpu().numpy().astype(int)
            tracks = tracker.update(box).astype(int)

            for xmin, ymin, xmax, ymax, track_id in tracks:
                # Analizamos la cara detectada con DeepFace
                face_region = frame[ymin:ymax, xmin:xmax]
                analysis = DeepFace.analyze(face_region, actions=['age', 'gender'], enforce_detection=False)
                print(analysis)

                if analysis:
                    gender = analysis[0].get('gender', None)
                    # Verificar si 'gender' es un diccionario y obtener la clave con el valor máximo
                    if isinstance(gender, dict):
                        gender = max(gender, key=gender.get)
                    age = analysis[0].get('age', None)
                    
                    if age != 0 and gender != 0:
                        text = f"ID:{track_id} Age:{age}, Gender:{gender}"
                    elif age != 0:
                        text = f"ID:{track_id} Age:{age}"
                    elif gender != 0:
                        text = f"ID:{track_id} Gender:{gender}"
                    else:
                        text = f"ID:{track_id}"
                else:
                    text = f"ID:{track_id}"

                # Ajusta la posición del texto
                text_position = (xmin, ymin - 10)  # Justo arriba del rectángulo

                cv2.putText(frame, text, text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

                grid_x = (xmin + xmax) // 2 // grid_size
                grid_y = (ymin + ymax) // 2 // grid_size

                current_second = int(total_seconds)
                if current_second != last_saved_second:
                    data.append({
                        "ID": track_id,
                        "cuadrante_x": grid_x,
                        "cuadrante_y": grid_y,
                        "timestamp": timestamp
                    })
                    last_saved_second = current_second

                heatmap[ymin:ymax, xmin:xmax] += 1

        heatmap_normalized = cv2.normalize(heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
        heatmap_colored = cv2.applyColorMap(heatmap_normalized, cv2.COLORMAP_JET)
        overlay = cv2.addWeighted(frame, 0.6, heatmap_colored, 0.4, 0)

        for x in range(0, w, grid_size):
            cv2.line(overlay, (x, 0), (x, h), (255, 255, 255), 1, cv2.LINE_AA)
        for y in range(0, h, grid_size):
            cv2.line(overlay, (0, y), (w, y), (255, 255, 255), 1, cv2.LINE_AA)

        
        last_frame = overlay.copy()

        clear_output(wait=True)
        out.write(overlay)
        pbar.update(1)

    df = pd.DataFrame(data)
    cap.release()
    out.release()
    
    create_heatmap(df, heatmap_path)
    cv2.imwrite(last_frame_path, last_frame)

In [5]:
# Apply the model to the video in this path
urlVideo = 'https://pub-f9ef82ae3ee74240886857c6bf5f4495.r2.dev/1726401447453_whatsap.mp4'

apply_model_to_video(urlVideo, "output.mp4", "heatmap.png", "last_frame.png")

Processing Frames:  20%|██        | 284/1413 [20:11<1:31:35,  4.87s/it]


0: 384x640 7 persons, 82.0ms
Speed: 2.0ms preprocess, 82.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)


Action: gender: 100%|██████████| 2/2 [00:00<00:00,  3.50it/s]


[{'age': 38, 'region': {'x': 0, 'y': 0, 'w': 107, 'h': 65, 'left_eye': None, 'right_eye': None}, 'face_confidence': 0, 'gender': {'Woman': 12.311319261789322, 'Man': 87.68868446350098}, 'dominant_gender': 'Man'}]


Action: gender: 100%|██████████| 2/2 [00:00<00:00,  3.81it/s]


[{'age': 34, 'region': {'x': 0, 'y': 0, 'w': 95, 'h': 181, 'left_eye': None, 'right_eye': None}, 'face_confidence': 0, 'gender': {'Woman': 7.481583952903748, 'Man': 92.51841902732849}, 'dominant_gender': 'Man'}]


Action: gender: 100%|██████████| 2/2 [00:00<00:00,  4.03it/s]


[{'age': 33, 'region': {'x': 0, 'y': 0, 'w': 92, 'h': 173, 'left_eye': None, 'right_eye': None}, 'face_confidence': 0, 'gender': {'Woman': 22.95670509338379, 'Man': 77.04329490661621}, 'dominant_gender': 'Man'}]


Action: gender: 100%|██████████| 2/2 [00:00<00:00,  3.98it/s]


[{'age': 33, 'region': {'x': 0, 'y': 0, 'w': 193, 'h': 311, 'left_eye': None, 'right_eye': None}, 'face_confidence': 0, 'gender': {'Woman': 19.886426627635956, 'Man': 80.11357188224792}, 'dominant_gender': 'Man'}]


