Imports

In [1]:
import os
import re
import datetime
from typing import List, Tuple

from PIL import Image
import numpy as np

# Detector: ultralytics YOLO
from ultralytics import YOLO

Configurações

In [None]:
INPUT_FRAMES_FOLDER = "../HighResImage" 
OUTPUT_CROPS_FOLDER = "CroppedPersonsHighResTeste"

YOLO_WEIGHTS = "../yolo11n.pt" #Idealmente utilizar a versão mais recente e a "n", pois é mais leve 
YOLO_CONF = 0.35                           
CAMERA_ID = 1

IOU_MATCH_THRESHOLD = 0.3                 
MIN_BOX_AREA = 400                         
ATTR_TOP_K = 2
ATTR_CONF_THRESHOLD = 0.05

FRAME_LIMIT = 1000 #Limitar número de frames processados (Bom para testes)

IMAGE_EXTS = (".jpg", ".jpeg")

Funções de apoio

In [3]:
#Apenas para make sure diretório existe
def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)

#Para listar files com frames dentro de folder
def list_frame_files(input_folder: str):
    files = []
    for root, _, filenames in os.walk(input_folder):
        for f in sorted(filenames):
            if os.path.splitext(f)[1].lower() in IMAGE_EXTS:
                files.append(os.path.join(root, f))
    return files

#Capturar timestamp do filename frame
def parse_timestamp_from_filename(fname: str):
    base = os.path.basename(fname)
    m = re.search(r"(\d{4})[-_]?(\d{2})[-_]?(\d{2})[_T\-]?(\d{2})[:_]?(\d{2})[:_]?(\d{2})", base)
    if m:
        year, mon, day, h, mn, s = m.groups()
        try:
            dt = datetime.datetime(int(year), int(mon), int(day), int(h), int(mn), int(s))
            return dt.isoformat()
        except Exception:
            pass
    m2 = re.search(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})", base)
    if m2:
        return m2.group(1)
    ts = os.path.getmtime(fname)
    return datetime.datetime.fromtimestamp(ts).isoformat()

# Calcula o IoU (Intersection over Union) entre dois bounding boxes.
# Retorna um valor entre 0 e 1 que indica a sobreposição das caixas.
def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interW = max(0, xB - xA)
    interH = max(0, yB - yA)
    interArea = interW * interH
    if interArea == 0:
        return 0.0
    boxAArea = max(1.0, (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    boxBArea = max(1.0, (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
    return interArea / float(boxAArea + boxBArea - interArea)

Tracker baseado em IoU para dar ID único para indivíduos/grupos

In [4]:
class SimpleTracker:
    """
    Rastreador simples baseado em IoU (Intersection over Union).

    A cada frame:
      - Associa detecções novas a objetos existentes com base no IoU.
      - Atribui IDs únicos a novos objetos.
      - Remove objetos que sumiram há muitos frames.
    """

    def __init__(self, iou_threshold=0.3, max_lost=30):
        self.next_id = 1
        self.tracks = {}
        self.iou_threshold = iou_threshold
        self.max_lost = max_lost

    """
    Atualiza os rastros com as detecções do frame atual.
    Retorna uma lista de tuplas (track_id, bounding_box).
    """
    def update(self, detections: List[Tuple[float, float, float, float]], frame_idx: int):
        assignments = []
        unmatched_dets = set(range(len(detections)))
        for tid, info in list(self.tracks.items()):
            best_i = -1
            best_iou = 0.0
            for di in list(unmatched_dets):
                val = iou(info['box'], detections[di])
                if val > best_iou:
                    best_iou = val
                    best_i = di
            if best_i != -1 and best_iou >= self.iou_threshold:
                self.tracks[tid]['box'] = detections[best_i]
                self.tracks[tid]['last_seen'] = frame_idx
                self.tracks[tid]['lost'] = 0
                assignments.append((tid, detections[best_i]))
                unmatched_dets.remove(best_i)
            else:
                self.tracks[tid]['lost'] += 1
        to_delete = [tid for tid, info in self.tracks.items() if info['lost'] > self.max_lost]
        for tid in to_delete:
            del self.tracks[tid]
        for di in sorted(unmatched_dets):
            tid = self.next_id
            self.next_id += 1
            self.tracks[tid] = {'box': detections[di], 'last_seen': frame_idx, 'lost': 0}
            assignments.append((tid, detections[di]))
        return assignments

Detector de indivíduos/grupos baseado em YOLO

In [5]:
class Detector:
    def __init__(self, weights: str = None, conf: float = 0.25, device: str = "cpu"):
        self.device = device
        if weights:
            self.model = YOLO(weights)
        else:
            self.model = YOLO("yolov8n")
        self.conf = conf

    """
    Detecta todos os objetos que YOLO foi treinado para detectar e cria registros.
    """
    def detect(self, image: np.ndarray):
        res = self.model.predict(image, imgsz=640, conf=self.conf, verbose=False, device=self.device)
        results = res[0]
        detections = []
        if getattr(results, 'boxes', None) is not None:
            boxes = results.boxes.xyxy.cpu().numpy()
            scores = results.boxes.conf.cpu().numpy()
            cls = results.boxes.cls.cpu().numpy().astype(int)
            names = getattr(self.model, "names", None)
            for b, s, c in zip(boxes, scores, cls):
                detections.append({
                    'bbox': [float(b[0]), float(b[1]), float(b[2]), float(b[3])],
                    'confidence': float(s),
                    'class_id': int(c),
                    'class_name': names[int(c)] if names is not None and int(c) in names else str(int(c))
                })
        return detections

Pipeline principal para recortar pessoas

In [6]:
def run_pipeline():
    ensure_dir(OUTPUT_CROPS_FOLDER)

    # Tentar utilizar GPU CUDA, se tiver
    try:
        import torch
        device = "cuda" if torch.cuda.is_available() else "cpu"
    except Exception:
        device = "cpu"
    print(f"Using device: {device}")

    # Inicializando módulos
    detector = Detector(weights=YOLO_WEIGHTS, conf=YOLO_CONF, device=device)
    tracker = SimpleTracker(iou_threshold=IOU_MATCH_THRESHOLD, max_lost=30)

    frame_files = list_frame_files(INPUT_FRAMES_FOLDER)
    # Para limitar quantidade de frames processados (útil para testes rápidos)
    if FRAME_LIMIT:
        frame_files = frame_files[:FRAME_LIMIT]
    if not frame_files:
        print("Frames não foram encontrados em ", INPUT_FRAMES_FOLDER)
        return

    print(f"Encontrou {len(frame_files)} frames, processando...")

    frame_idx = 0
    saved_track_ids = set()  # guarda IDs já salvos (somente um frame por ID)

    for frame_path in frame_files:
        frame_idx += 1
        pil = Image.open(frame_path).convert("RGB")
        img_np = np.array(pil)

        dets = detector.detect(img_np)

        # Utilizando apenas bounding boxes com pessoas
        person_dets_xyxy = []
        for d in dets:
            name = d.get('class_name', '').lower()
            if name in ("person", "people", "human") or int(d.get('class_id', -1)) == 0:
                x1, y1, x2, y2 = [int(round(v)) for v in d['bbox']]
                area = (x2 - x1) * (y2 - y1)
                if area >= MIN_BOX_AREA:
                    person_dets_xyxy.append([x1, y1, x2, y2])

        assignments = tracker.update(person_dets_xyxy, frame_idx)
        timestamp = parse_timestamp_from_filename(frame_path)
        frame_base = os.path.splitext(os.path.basename(frame_path))[0]

        # Para cada track: salva APENAS se ainda não salvamos aquele track_id
        for track_id, box in assignments:
            if track_id in saved_track_ids:
                continue  # já salvamos esse ID anteriormente

            x1, y1, x2, y2 = [int(v) for v in box]
            crop = pil.crop((x1, y1, x2, y2))

            fname_ts = timestamp.replace(":", "").replace("-", "")
            crop_fname = f"cam{CAMERA_ID}_trk{track_id}_{frame_base}_{fname_ts}.jpg"
            crop_path = os.path.join(OUTPUT_CROPS_FOLDER, crop_fname)
            try:
                crop.save(crop_path, format="JPEG", quality=90)
                saved_track_ids.add(track_id)
            except Exception as e:
                print(f"Falha ao salvar crop para track {track_id}:", e)

        # Simulando TQDM
        if frame_idx % 50 == 0:
            print(f"Processou {frame_idx}/{len(frame_files)} frames")

    print(f"Pronto. Crops de IDs únicos salvos em: {OUTPUT_CROPS_FOLDER} (total salvos: {len(saved_track_ids)})")

Rodar main pipeline principal

In [10]:
if __name__ == "__main__":
    run_pipeline()

Using device: cuda
Encontrou 1 frames, processando...
Pronto. Crops de IDs únicos salvos em: CroppedPersonsHighResTeste (total salvos: 9)
