<a href="https://colab.research.google.com/github/Matheus-Godinho-Magalhaes/Final_Paper/blob/main/TG2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Instalando bibliotecas necess√°rias

In [None]:
!pip install ultralytics gTTS moviepy

Collecting ultralytics
  Downloading ultralytics-8.3.145-py3-none-any.whl.metadata (37 kB)
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting click<8.2,>=7.1 (from gTTS)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Dow

# Conectando ao google drive

In [None]:
# Conectando o meu google drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# Vers√£o Final

In [None]:
## Vers√£o final

from ultralytics import YOLO
import torch
import cv2
import random
import os
import time
from gtts import gTTS
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
from pathlib import Path
import numpy as np
import logging
from datetime import datetime

# --- IN√çCIO DA SE√á√ÉO DE CONFIGURA√á√ÉO ---

# 1. Caminhos de Arquivos e Modelo
MODEL_PATH = "/content/gdrive/MyDrive/Modelos treinados/07-05-25/ufmg_yolov11m_run12/weights/best.pt"
VIDEO_NAME = "longo-IMG_5389.mp4"
VIDEO_BASE_PATH_GDRIVE = "/content/gdrive/MyDrive/videos_base"

# 2. Par√¢metros do Modelo e Detec√ß√£o/Rastreamento
CONFIDENCE_THRESHOLD = 0.60
IOU_THRESHOLD = 0.40

# 3. Par√¢metros dos Alertas Sonoros e L√≥gica
CLASSES_PARA_ALERTA = {
    "banco": "Banco",
    "faixa_pedestre": "Faixa de pedestre",
    "placa_onibus": "Ponto de √¥nibus"
}

# Par√¢metros para a l√≥gica de re-detec√ß√£o p√≥s-oclus√£o
MAX_TIME_LOST_SECONDS = 5

# --- MODIFICA√á√ÉO: Thresholds de Dist√¢ncia de Reaparecimento por Classe ---
REAPPEAR_DISTANCE_THRESHOLDS_BY_CLASS = {
    "banco": 150,          # Mais sens√≠vel para bancos, para distinguir os pr√≥ximos
    "faixa_pedestre": 600, # Mais tolerante para faixas, pois s√£o extensas
    "placa_onibus": 350,   # Um valor intermedi√°rio para placas
    "default": 200         # Um valor padr√£o se a classe n√£o estiver listada
}

SEQUENTIAL_ALERT_GAP = 0.3
VIDEO_WIDTH_PERCENTAGE_FOR_CENTER = 0.40

# --- FIM DA SE√á√ÉO DE CONFIGURA√á√ÉO ---


# Verifica se h√° uma GPU dispon√≠vel
if torch.cuda.is_available():
    print(f"‚úÖ GPU dispon√≠vel: {torch.cuda.get_device_name(0)}")
else:
    print("‚ùå GPU n√£o est√° ativada.")

# Configura√ß√£o do logger
log_filename = f"alertas_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
logging.basicConfig(
    filename=log_filename,
    level=logging.INFO,
    format="%(asctime)s - %(message)s",
    datefmt="%H:%M:%S"
)

# Carrega o modelo YOLO personalizado
if os.path.exists(MODEL_PATH):
    model = YOLO(MODEL_PATH)
    print(f"‚úÖ Modelo carregado de: {MODEL_PATH}")
else:
    print(f"‚ùå ERRO: Arquivo do modelo n√£o encontrado em: {MODEL_PATH}")
    model = None

# Define os caminhos de sa√≠da
video_path = os.path.join(VIDEO_BASE_PATH_GDRIVE, VIDEO_NAME)
video_temp_path = os.path.join("/content", "output_bboxes_silent", VIDEO_NAME)
video_final_path = os.path.join("/content", "output_bboxes_with_audio", VIDEO_NAME)
audio_folder = "/content/audio_alerts"

os.makedirs(audio_folder, exist_ok=True)
os.makedirs("/content/output_bboxes_silent", exist_ok=True)
os.makedirs("/content/output_bboxes_with_audio", exist_ok=True)

print(f"üßπ Limpando a pasta de √°udios: {audio_folder}")
for file_name in os.listdir(audio_folder):
    file_full_path = os.path.join(audio_folder, file_name)
    if os.path.isfile(file_full_path):
        try:
            os.remove(file_full_path)
        except Exception as e:
            print(f"Erro ao remover {file_full_path}: {e}")
print("‚úÖ Pasta de √°udios limpa.")


if model:
    if not os.path.exists(video_path):
        print(f"‚ùå ERRO: V√≠deo de entrada n√£o encontrado em: {video_path}")
        cap = None
    else:
        cap = cv2.VideoCapture(video_path)

    if cap and cap.isOpened():
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        print(f"üìπ Processando v√≠deo: {VIDEO_NAME} ({width}x{height} @ {fps:.2f} FPS)")

        frame_center_x = width // 2
        offset = int(frame_center_x * VIDEO_WIDTH_PERCENTAGE_FOR_CENTER)
        left_boundary = frame_center_x - offset
        right_boundary = frame_center_x + offset

        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(video_temp_path, fourcc, fps, (width, height))

        def get_color(class_name_str):
            random.seed(hash(class_name_str) % 1000)
            r, g, b = [random.randint(0, 255) for _ in range(3)]
            while r + g + b > 650:
                r, g, b = [random.randint(0, 255) for _ in range(3)]
            return (int(b), int(g), int(r))

        alert_audio_paths = []
        alert_audio_timings = []
        frame_count = 0
        alerted_object_ids = set()
        lost_tracks = {}
        previous_frame_track_ids = set()
        proximo_tempo_disponivel_para_audio = 0.0

        print(f"üöÄ Come√ßando processamento com RASTREAMENTO. Alertas para: {list(CLASSES_PARA_ALERTA.keys())}. Confian√ßa m√≠nima: {CONFIDENCE_THRESHOLD}")

        device_to_use = "cuda" if torch.cuda.is_available() else "cpu"
        results_generator = model.track(
            source=video_path,
            stream=True,
            device=device_to_use,
            conf=CONFIDENCE_THRESHOLD,
            iou=IOU_THRESHOLD,
            persist=True,
            tracker="bytetrack.yaml"
        )

        try:
            for result in results_generator:
                frame = result.orig_img.copy()
                current_frame_original_timestamp = frame_count / fps
                current_frame_track_ids = set()
                alerts_data_for_current_frame = []

                boxes = result.boxes
                if boxes is not None and boxes.id is not None:
                    track_ids = boxes.id.int().tolist()
                    current_frame_track_ids.update(track_ids)

                    for i, track_id in enumerate(track_ids):
                        class_id = int(boxes.cls[i].item())
                        model_class_names_dict = result.names
                        if class_id not in model_class_names_dict:
                            continue

                        class_name_from_model = model_class_names_dict[class_id]
                        conf_value = boxes.conf[i].item()
                        box_coords = boxes.xyxy[i].tolist()
                        center_coords = ((box_coords[0] + box_coords[2]) // 2, (box_coords[1] + box_coords[3]) // 2)

                        x1, y1, x2, y2 = map(int, box_coords)
                        label_color = get_color(str(track_id))
                        label_text = f"ID: {track_id} - {class_name_from_model} {conf_value:.2f}"
                        cv2.rectangle(frame, (x1, y1), (x2, y2), label_color, 2)
                        (text_w, text_h), _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                        cv2.rectangle(frame, (x1, y1 - text_h - 10), (x1 + text_w + 4, y1), (0,0,0), -1)
                        cv2.putText(frame, label_text, (x1 + 2, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

                        if class_name_from_model in CLASSES_PARA_ALERTA and track_id not in alerted_object_ids:
                            is_reappeared_object = False
                            # --- MODIFICA√á√ÉO: Obter o threshold de dist√¢ncia de reaparecimento para a classe atual ---
                            current_reappear_dist_thresh = REAPPEAR_DISTANCE_THRESHOLDS_BY_CLASS.get(
                                class_name_from_model, REAPPEAR_DISTANCE_THRESHOLDS_BY_CLASS["default"]
                            )

                            for lost_id, lost_data in list(lost_tracks.items()):
                                if lost_data["class_name"] == class_name_from_model:
                                    dist = np.linalg.norm(np.array(center_coords) - np.array(lost_data["last_center"]))
                                    # --- MODIFICA√á√ÉO: Usar o threshold espec√≠fico da classe ---
                                    if dist < current_reappear_dist_thresh:
                                        is_reappeared_object = True
                                        print(f"üîç Objeto ID {track_id} ({class_name_from_model}) reapareceu (era ID {lost_id} @ {dist:.0f}px). Silenciando. Thresh: {current_reappear_dist_thresh}px")
                                        logging.info(f"RECONEX√ÉO: ID novo {track_id} ({class_name_from_model}) conectado ao ID perdido {lost_id}.")
                                        alerted_object_ids.add(track_id)
                                        if lost_id in lost_tracks: del lost_tracks[lost_id]
                                        break

                            if not is_reappeared_object:
                                alerted_object_ids.add(track_id)
                                direction = "√† frente"
                                if center_coords[0] < left_boundary: direction = "√† esquerda"
                                elif center_coords[0] > right_boundary: direction = "√† direita"
                                spoken_class_name = CLASSES_PARA_ALERTA[class_name_from_model]
                                alert_text_to_generate = f"{spoken_class_name} {direction}"

                                alert_details = {
                                    "text": alert_text_to_generate, "class_name": class_name_from_model,
                                    "track_id": track_id, "frame_num": frame_count,
                                    "original_ts_for_log": current_frame_original_timestamp, "conf": conf_value
                                }
                                alerts_data_for_current_frame.append(alert_details)
                                print(f"‚ûï Alerta para ID {track_id} ({alert_text_to_generate}) adicionado √† fila deste frame.")

                        if track_id in alerted_object_ids:
                             lost_tracks[track_id] = {
                                "class_name": class_name_from_model, "last_center": center_coords,
                                "last_timestamp": current_frame_original_timestamp
                            }

                if alerts_data_for_current_frame:
                    base_start_time_for_this_frame_alerts = max(current_frame_original_timestamp, proximo_tempo_disponivel_para_audio)
                    current_alert_play_time = base_start_time_for_this_frame_alerts

                    for alert_info in alerts_data_for_current_frame:
                        # ... (l√≥gica de gera√ß√£o de TTS e agendamento - SEM MUDAN√áAS INTERNAS AQUI) ...
                        alert_text = alert_info["text"]
                        class_name = alert_info["class_name"]
                        track_id = alert_info["track_id"]
                        original_ts = alert_info["original_ts_for_log"]
                        conf_val = alert_info["conf"]
                        print(f"üîä Gerando √°udio para: '{alert_text}' (ID: {track_id}) para tocar em {current_alert_play_time:.2f}s")
                        try:
                            tts = gTTS(alert_text, lang='pt-br')
                            audio_filename = f"{class_name}_{track_id}_{alert_info['frame_num']}_{int(current_alert_play_time*100)}.mp3"
                            audio_path = os.path.join(audio_folder, audio_filename)
                            tts.save(audio_path)
                            try:
                                temp_audio_clip = AudioFileClip(audio_path)
                                audio_duration = temp_audio_clip.duration
                                temp_audio_clip.close()
                            except Exception as e_dur:
                                print(f"‚ö†Ô∏è Erro ao obter dura√ß√£o do √°udio {audio_filename}: {e_dur}. Usando 2s.")
                                audio_duration = 2.0
                            alert_audio_paths.append(audio_path)
                            alert_audio_timings.append(current_alert_play_time)
                            logging.info(f"ALERTA AGENDADO (ID: {track_id}): '{alert_text}' (orig_ts: {original_ts:.2f}s, play_at: {current_alert_play_time:.2f}s, conf: {conf_val:.2f})")
                            current_alert_play_time += audio_duration + SEQUENTIAL_ALERT_GAP
                        except Exception as e_tts_seq:
                            print(f"‚ùå Erro no processo TTS/Sequenciamento para '{alert_text}': {e_tts_seq}")
                            logging.error(f"Erro TTS/Sequenciamento para '{alert_text}': {e_tts_seq}")
                    proximo_tempo_disponivel_para_audio = current_alert_play_time

                disappeared_ids = previous_frame_track_ids - current_frame_track_ids
                for track_id_lost in disappeared_ids:
                    if track_id_lost in lost_tracks: # Somente se estava sendo rastreado E J√Å FOI ALERTADO
                        print(f"üëª Objeto ID {track_id_lost} ({lost_tracks[track_id_lost]['class_name']}) perdido de vista em ts={current_frame_original_timestamp:.2f}s.")
                        logging.info(f"OBJETO PERDIDO: ID {track_id_lost} ({lost_tracks[track_id_lost]['class_name']}) sumiu em ts={current_frame_original_timestamp:.2f}s.")

                active_lost_ids_to_remove = []
                for lost_id, lost_data in list(lost_tracks.items()): # Usar list() para permitir modifica√ß√£o durante itera√ß√£o
                    if current_frame_original_timestamp - lost_data["last_timestamp"] > MAX_TIME_LOST_SECONDS:
                        if lost_id not in current_frame_track_ids:
                            print(f"üóëÔ∏è Removendo ID {lost_id} ({lost_data['class_name']}) da mem√≥ria de perdidos (muito tempo).")
                            logging.info(f"MEM√ìRIA LIMPA: ID {lost_id} ({lost_data['class_name']}) removido.")
                            active_lost_ids_to_remove.append(lost_id)
                for id_to_remove in active_lost_ids_to_remove:
                    if id_to_remove in lost_tracks: del lost_tracks[id_to_remove]

                previous_frame_track_ids = current_frame_track_ids.copy()

                cv2.line(frame, (left_boundary, 0), (left_boundary, height), (0, 0, 255), 2)
                cv2.line(frame, (right_boundary, 0), (right_boundary, height), (0, 0, 255), 2)
                out.write(frame)
                frame_count += 1
                if frame_count % 100 == 0:
                    print(f"  Processado frame {frame_count}...")

        # ... (Resto do c√≥digo: except, finally, MoviePy - SEM MUDAN√áAS) ...
        except Exception as e_proc:
            print(f"‚ùå Ocorreu um erro durante o processamento do v√≠deo: {e_proc}")
            logging.error(f"Erro no loop de processamento: {e_proc}")
        finally:
            if cap: cap.release()
            if 'out' in locals() and out.isOpened(): out.release()
            print(f"‚úÖ V√≠deo processado sem √°udio salvo em: {video_temp_path}")

        print("üé¨ Sincronizando √°udio...")
        final_video_path_actual = video_temp_path

        if not alert_audio_paths:
            print("‚ö†Ô∏è Nenhum alerta de √°udio foi gerado. Copiando v√≠deo sem √°udio para final.")
            if os.path.exists(video_temp_path):
                import shutil
                try:
                    shutil.copyfile(video_temp_path, video_final_path)
                    final_video_path_actual = video_final_path
                except Exception as e_copy:
                    print(f"Erro ao copiar v√≠deo final: {e_copy}")
        else:
            try:
                base_clip = VideoFileClip(video_temp_path)
                audio_clips_list = [AudioFileClip(p).set_start(t) for p, t in zip(alert_audio_paths, alert_audio_timings)]

                if audio_clips_list:
                    final_audio = CompositeAudioClip(audio_clips_list)
                    final_video = base_clip.set_audio(final_audio)
                    final_video.write_videofile(video_final_path, codec='libx264', audio_codec='aac',
                                                temp_audiofile=os.path.join(audio_folder, 'temp-audio.m4a'),
                                                remove_temp=True, preset='medium',
                                                ffmpeg_params=["-async", "1", "-vsync", "passthrough"])
                    final_video_path_actual = video_final_path

                    if 'base_clip' in locals(): base_clip.close()
                    if 'final_audio' in locals(): final_audio.close()
                    if 'final_video' in locals(): final_video.close()
                    for clip_to_close in audio_clips_list: clip_to_close.close()
                else:
                     print("‚ö†Ô∏è Lista de clipes de √°udio vazia. Copiando v√≠deo sem √°udio.")
                     if os.path.exists(video_temp_path):
                        import shutil
                        shutil.copyfile(video_temp_path, video_final_path)
                        final_video_path_actual = video_final_path
            except Exception as e_moviepy:
                print(f"‚ùå Erro ao juntar √°udio e v√≠deo com MoviePy: {e_moviepy}")
                print(f"‚ÑπÔ∏è O v√≠deo processado sem √°udio est√° dispon√≠vel em: {video_temp_path}")
                logging.error(f"Erro MoviePy: {e_moviepy}")
        print(f"üéâ Processamento conclu√≠do. V√≠deo final salvo em: {final_video_path_actual}")
    else:
        print(f"‚ùå N√£o foi poss√≠vel abrir o v√≠deo: {video_path}")
else:
    print("‚ùå Modelo n√£o carregado. Processamento de v√≠deo n√£o iniciado.")

‚úÖ GPU dispon√≠vel: Tesla T4
‚úÖ Modelo carregado de: /content/gdrive/MyDrive/Modelos treinados/07-05-25/ufmg_yolov11m_run12/weights/best.pt
üßπ Limpando a pasta de √°udios: /content/audio_alerts
‚úÖ Pasta de √°udios limpa.
üìπ Processando v√≠deo: longo-IMG_5389.mp4 (1080x1920 @ 60.00 FPS)
üöÄ Come√ßando processamento com RASTREAMENTO. Alertas para: ['banco', 'faixa_pedestre', 'placa_onibus']. Confian√ßa m√≠nima: 0.6

video 1/1 (frame 1/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 25.1ms




‚ûï Alerta para ID 1 (Banco √† frente) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Banco √† frente' (ID: 1) para tocar em 0.00s
video 1/1 (frame 2/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 36.5ms
video 1/1 (frame 3/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 24.2ms
video 1/1 (frame 4/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 29.9ms
video 1/1 (frame 5/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 92.0ms
video 1/1 (frame 6/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 24.2ms
video 1/1 (frame 7/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 24.5ms
video 1/1 (frame 8/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 40.7ms
video 1/1 (frame 9/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 banco, 24.2ms
video 1/1 (frame 



‚ûï Alerta para ID 4 (Banco √† frente) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Banco √† frente' (ID: 4) para tocar em 12.13s
video 1/1 (frame 730/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 20.3ms
video 1/1 (frame 731/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 21.0ms
video 1/1 (frame 732/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 21.0ms
video 1/1 (frame 733/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 21.0ms
video 1/1 (frame 734/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 20.9ms
video 1/1 (frame 735/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 19.9ms
video 1/1 (frame 736/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 19.9ms
video 1/1 (frame 737/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos,



‚ûï Alerta para ID 9 (Faixa de pedestre √† frente) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Faixa de pedestre √† frente' (ID: 9) para tocar em 22.70s
video 1/1 (frame 1364/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 1 faixa_pedestre, 20.5ms
video 1/1 (frame 1365/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 19.9ms
üëª Objeto ID 9 (faixa_pedestre) perdido de vista em ts=22.73s.
video 1/1 (frame 1366/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 19.9ms
video 1/1 (frame 1367/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 19.9ms
video 1/1 (frame 1368/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 19.9ms
video 1/1 (frame 1369/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 bancos, 20.0ms
video 1/1 (frame 1370/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 2 b



‚ûï Alerta para ID 15 (Ponto de √¥nibus √† direita) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Ponto de √¥nibus √† direita' (ID: 15) para tocar em 33.58s
video 1/1 (frame 2017/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 21.8ms
üëª Objeto ID 15 (placa_onibus) perdido de vista em ts=33.60s.
video 1/1 (frame 2018/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.8ms
video 1/1 (frame 2019/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.8ms
üîç Objeto ID 16 (placa_onibus) reapareceu (era ID 15 @ 27px). Silenciando. Thresh: 350px
video 1/1 (frame 2020/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.7ms
video 1/1 (frame 2021/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.8ms
video 1/1 (frame 2022/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no det



‚ûï Alerta para ID 33 (Faixa de pedestre √† direita) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Faixa de pedestre √† direita' (ID: 33) para tocar em 36.62s
video 1/1 (frame 2134/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 21.8ms
üëª Objeto ID 33 (faixa_pedestre) perdido de vista em ts=35.55s.
video 1/1 (frame 2135/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 26.6ms
video 1/1 (frame 2136/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 22.5ms
video 1/1 (frame 2137/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 22.7ms
video 1/1 (frame 2138/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 22.6ms
üëª Objeto ID 33 (faixa_pedestre) perdido de vista em ts=35.62s.
video 1/1 (frame 2139/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 22.6ms



‚ûï Alerta para ID 36 (Faixa de pedestre √† frente) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Faixa de pedestre √† frente' (ID: 36) para tocar em 43.17s
video 1/1 (frame 2592/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 20.0ms
video 1/1 (frame 2593/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 2594/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 2595/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 2596/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 2597/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 2598/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (



‚ûï Alerta para ID 37 (Ponto de √¥nibus √† esquerda) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Ponto de √¥nibus √† esquerda' (ID: 37) para tocar em 49.87s
video 1/1 (frame 2994/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 20.3ms
üëª Objeto ID 37 (placa_onibus) perdido de vista em ts=49.88s.
video 1/1 (frame 2995/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 20.2ms
video 1/1 (frame 2996/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.3ms
video 1/1 (frame 2997/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.3ms
video 1/1 (frame 2998/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 placa_onibus, 21.4ms
üîç Objeto ID 41 (placa_onibus) reapareceu (era ID 37 @ 91px). Silenciando. Thresh: 350px
video 1/1 (frame 2999/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 plac



‚ûï Alerta para ID 42 (Faixa de pedestre √† frente) adicionado √† fila deste frame.
üîä Gerando √°udio para: 'Faixa de pedestre √† frente' (ID: 42) para tocar em 61.93s
video 1/1 (frame 3718/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 3719/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 3720/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 3721/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 3722/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 3723/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 3724/4693) /content/gdrive/MyDrive/videos_base/longo-IMG_5389.mp4: 640x384 (no detections), 19.9ms
üëª Objeto 



MoviePy - Done.
Moviepy - Writing video /content/output_bboxes_with_audio/longo-IMG_5389.mp4






Moviepy - Done !
Moviepy - video ready /content/output_bboxes_with_audio/longo-IMG_5389.mp4
üéâ Processamento conclu√≠do. V√≠deo final salvo em: /content/output_bboxes_with_audio/longo-IMG_5389.mp4


# √Årea normalizada (teste)

In [None]:
## √ÅREA NORMALIZADA - para calcular o BB e saber se o obj est√° perto ou distante

from ultralytics import YOLO
import torch
import cv2
import random
import os
import time
from gtts import gTTS
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
from pathlib import Path
import numpy as np
import logging
from datetime import datetime

# --- IN√çCIO DA SE√á√ÉO DE CONFIGURA√á√ÉO ---

MODEL_PATH = "/content/gdrive/MyDrive/Modelos treinados/07-05-25/ufmg_yolov11m_run12/weights/best.pt"
VIDEO_NAME = "curto-IMG_5389-2.mp4" # Use um v√≠deo bom para esta nova calibra√ß√£o
VIDEO_BASE_PATH_GDRIVE = "/content/gdrive/MyDrive/videos_base"

CONFIDENCE_THRESHOLD = 0.60
IOU_THRESHOLD = 0.40

CLASSES_PARA_ALERTA = {
    "banco": "Banco",
    "faixa_pedestre": "Faixa de pedestre",
    "placa_onibus": "Ponto de √¥nibus"
}

MAX_TIME_LOST_SECONDS = 5
REAPPEAR_DISTANCE_THRESHOLDS_BY_CLASS = {
    "banco": 150, "faixa_pedestre": 600, "placa_onibus": 350, "default": 200
}

SEQUENTIAL_ALERT_GAP = 0.3
VIDEO_WIDTH_PERCENTAGE_FOR_CENTER = 0.40

# --- MODIFICA√á√ÉO: Thresholds de Proximidade por √ÅREA NORMALIZADA ---
# Estes valores s√£o exemplos e precisar√£o ser calibrados por voc√™ observando o v√≠deo com 'NA'
# 'proximo': se a √°rea normalizada for MAIOR que este valor
# 'medio': se a √°rea normalizada for MENOR que este valor (para ser 'distante'), ou seja, entre 'medio' e 'proximo' √© 'm√©dio'
PROXIMITY_THRESHOLDS_NORMALIZED_AREA = {
    "banco":          {"proximo": 0.04, "medio": 0.005}, # Ex: >4% da tela = pr√≥ximo; <0.5% = distante
    "faixa_pedestre": {"proximo": 0.08, "medio": 0.01},  # Faixas podem ocupar mais √°rea
    "placa_onibus":   {"proximo": 0.03, "medio": 0.004},
    "default":        {"proximo": 0.04, "medio": 0.005}
}

# --- FIM DA SE√á√ÉO DE CONFIGURA√á√ÉO ---

# ... (C√≥digo de setup inicial, logger, etc. - SEM MUDAN√áAS) ...
# Verifica se h√° uma GPU dispon√≠vel
if torch.cuda.is_available():
    print(f"‚úÖ GPU dispon√≠vel: {torch.cuda.get_device_name(0)}")
else:
    print("‚ùå GPU n√£o est√° ativada.")

# Configura√ß√£o do logger
log_filename = f"alertas_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
logging.basicConfig(
    filename=log_filename,
    level=logging.INFO,
    format="%(asctime)s - %(message)s",
    datefmt="%H:%M:%S"
)

# Carrega o modelo YOLO personalizado
if os.path.exists(MODEL_PATH):
    model = YOLO(MODEL_PATH)
    print(f"‚úÖ Modelo carregado de: {MODEL_PATH}")
else:
    print(f"‚ùå ERRO: Arquivo do modelo n√£o encontrado em: {MODEL_PATH}")
    model = None

# Define os caminhos de sa√≠da
video_path = os.path.join(VIDEO_BASE_PATH_GDRIVE, VIDEO_NAME)
video_temp_path = os.path.join("/content", "output_bboxes_silent", VIDEO_NAME)
video_final_path = os.path.join("/content", "output_bboxes_with_audio", VIDEO_NAME)
audio_folder = "/content/audio_alerts"

os.makedirs(audio_folder, exist_ok=True)
os.makedirs("/content/output_bboxes_silent", exist_ok=True)
os.makedirs("/content/output_bboxes_with_audio", exist_ok=True)

print(f"üßπ Limpando a pasta de √°udios: {audio_folder}")
for file_name in os.listdir(audio_folder):
    file_full_path = os.path.join(audio_folder, file_name)
    if os.path.isfile(file_full_path):
        try:
            os.remove(file_full_path)
        except Exception as e:
            print(f"Erro ao remover {file_full_path}: {e}")
print("‚úÖ Pasta de √°udios limpa.")


if model:
    if not os.path.exists(video_path):
        print(f"‚ùå ERRO: V√≠deo de entrada n√£o encontrado em: {video_path}")
        cap = None
    else:
        cap = cv2.VideoCapture(video_path)

    if cap and cap.isOpened():
        frame_width_video = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # Largura do frame do v√≠deo
        frame_height_video = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Altura do frame do v√≠deo
        fps = cap.get(cv2.CAP_PROP_FPS)
        total_frame_area = frame_width_video * frame_height_video
        if total_frame_area == 0: # Preven√ß√£o de divis√£o por zero
            print("‚ùå ERRO: √Årea total do frame √© zero. Verifique as dimens√µes do v√≠deo.")
            exit()

        print(f"üìπ Processando v√≠deo para CALIBRA√á√ÉO DE √ÅREA NORMALIZADA: {VIDEO_NAME} ({frame_width_video}x{frame_height_video} @ {fps:.2f} FPS)")

        frame_center_x = frame_width_video // 2
        offset = int(frame_center_x * VIDEO_WIDTH_PERCENTAGE_FOR_CENTER)
        left_boundary = frame_center_x - offset
        right_boundary = frame_center_x + offset

        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        video_temp_with_norm_areas_path = os.path.join("/content", "output_bboxes_silent", f"norm_areas_{VIDEO_NAME}")
        out = cv2.VideoWriter(video_temp_with_norm_areas_path, fourcc, fps, (frame_width_video, frame_height_video))

        def get_color(class_name_str):
            random.seed(hash(class_name_str) % 1000)
            r, g, b = [random.randint(0, 255) for _ in range(3)]
            while r + g + b > 650:
                r, g, b = [random.randint(0, 255) for _ in range(3)]
            return (int(b), int(g), int(r))

        alert_audio_paths = []
        alert_audio_timings = []
        frame_count = 0
        alerted_object_ids = set()
        lost_tracks = {}
        previous_frame_track_ids = set()
        proximo_tempo_disponivel_para_audio = 0.0

        print(f"üöÄ Come√ßando processamento. Alertas para: {list(CLASSES_PARA_ALERTA.keys())}. Confian√ßa m√≠nima: {CONFIDENCE_THRESHOLD}")

        device_to_use = "cuda" if torch.cuda.is_available() else "cpu"
        results_generator = model.track(
            source=video_path, stream=True, device=device_to_use,
            conf=CONFIDENCE_THRESHOLD, iou=IOU_THRESHOLD, persist=True, tracker="bytetrack.yaml"
        )

        try:
            for result in results_generator:
                frame = result.orig_img.copy()
                current_frame_original_timestamp = frame_count / fps
                current_frame_track_ids = set()
                alerts_data_for_current_frame = []

                boxes = result.boxes
                if boxes is not None and boxes.id is not None:
                    track_ids = boxes.id.int().tolist()
                    current_frame_track_ids.update(track_ids)

                    for i, track_id in enumerate(track_ids):
                        class_id = int(boxes.cls[i].item())
                        model_class_names_dict = result.names
                        if class_id not in model_class_names_dict: continue

                        class_name_from_model = model_class_names_dict[class_id]
                        conf_value = boxes.conf[i].item()
                        box_coords = boxes.xyxy[i].tolist()
                        center_coords = ((box_coords[0] + box_coords[2]) // 2, (box_coords[1] + box_coords[3]) // 2)

                        x1, y1, x2, y2 = map(int, box_coords)

                        # --- MODIFICA√á√ÉO: Calcular LARGURA, ALTURA e √ÅREA NORMALIZADA da caixa ---
                        box_width_pixels = x2 - x1
                        box_height_pixels = y2 - y1
                        box_area_pixels = box_width_pixels * box_height_pixels
                        normalized_box_area = box_area_pixels / total_frame_area if total_frame_area > 0 else 0

                        label_color = get_color(str(track_id))
                        # --- MODIFICA√á√ÉO: Adicionar √ÅREA NORMALIZADA ao r√≥tulo ---
                        label_text = f"ID:{track_id} {class_name_from_model} C:{conf_value:.2f} NA:{normalized_box_area:.4f}"

                        cv2.rectangle(frame, (x1, y1), (x2, y2), label_color, 2)
                        (text_w, text_h), _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                        cv2.rectangle(frame, (x1, y1 - text_h - 10), (x1 + text_w + 4, y1), (0,0,0), -1)
                        cv2.putText(frame, label_text, (x1 + 2, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

                        if class_name_from_model in CLASSES_PARA_ALERTA and track_id not in alerted_object_ids:
                            is_reappeared_object = False
                            current_reappear_dist_thresh = REAPPEAR_DISTANCE_THRESHOLDS_BY_CLASS.get(
                                class_name_from_model, REAPPEAR_DISTANCE_THRESHOLDS_BY_CLASS["default"]
                            )
                            for lost_id, lost_data in list(lost_tracks.items()):
                                if lost_data["class_name"] == class_name_from_model:
                                    dist = np.linalg.norm(np.array(center_coords) - np.array(lost_data["last_center"]))
                                    if dist < current_reappear_dist_thresh:
                                        is_reappeared_object = True
                                        alerted_object_ids.add(track_id)
                                        if lost_id in lost_tracks: del lost_tracks[lost_id]
                                        break

                            if not is_reappeared_object:
                                alerted_object_ids.add(track_id)
                                direction = "√† frente"
                                if center_coords[0] < left_boundary: direction = "√† esquerda"
                                elif center_coords[0] > right_boundary: direction = "√† direita"
                                spoken_class_name = CLASSES_PARA_ALERTA[class_name_from_model]

                                # --- MODIFICA√á√ÉO: L√≥gica de Proximidade com √ÅREA NORMALIZADA ---
                                current_prox_thresh_config = PROXIMITY_THRESHOLDS_NORMALIZED_AREA.get(
                                    class_name_from_model, PROXIMITY_THRESHOLDS_NORMALIZED_AREA["default"]
                                )
                                proximity_word = ""
                                if normalized_box_area > current_prox_thresh_config["proximo"]:
                                    proximity_word = "pr√≥ximo"
                                elif normalized_box_area < current_prox_thresh_config["medio"]:
                                    proximity_word = "distante"
                                # Se n√£o for pr√≥ximo nem distante, proximity_word continua "" (m√©dio)

                                alert_text_to_generate = f"{spoken_class_name} {proximity_word} {direction}".replace("  ", " ").strip()

                                alert_details = {
                                    "text": alert_text_to_generate, "class_name": class_name_from_model,
                                    "track_id": track_id, "frame_num": frame_count,
                                    "original_ts_for_log": current_frame_original_timestamp, "conf": conf_value,
                                    "norm_area": normalized_box_area # Guardar para log/debug se quiser
                                }
                                alerts_data_for_current_frame.append(alert_details)
                                print(f"‚ûï Alerta para ID {track_id} ({alert_text_to_generate}) adicionado √† fila. NA: {normalized_box_area:.4f}")

                        if track_id in alerted_object_ids:
                             lost_tracks[track_id] = {
                                "class_name": class_name_from_model, "last_center": center_coords,
                                "last_timestamp": current_frame_original_timestamp
                            }

                if alerts_data_for_current_frame:
                    # ... (Sua l√≥gica de sequenciamento de alertas para o frame, usando proximo_tempo_disponivel_para_audio - SEM MUDAN√áA AQUI)
                    base_start_time_for_this_frame_alerts = max(current_frame_original_timestamp, proximo_tempo_disponivel_para_audio)
                    current_alert_play_time = base_start_time_for_this_frame_alerts
                    for alert_info in alerts_data_for_current_frame:
                        alert_text = alert_info["text"]
                        class_name = alert_info["class_name"]
                        track_id = alert_info["track_id"]
                        original_ts = alert_info["original_ts_for_log"]
                        conf_val = alert_info["conf"]
                        norm_area_val = alert_info["norm_area"]

                        print(f"üîä Gerando √°udio para: '{alert_text}' (ID: {track_id}, NA: {norm_area_val:.4f}) para tocar em {current_alert_play_time:.2f}s")
                        try:
                            tts = gTTS(alert_text, lang='pt-br')
                            audio_filename = f"{class_name}_{track_id}_{alert_info['frame_num']}_{int(current_alert_play_time*100)}.mp3"
                            audio_path = os.path.join(audio_folder, audio_filename)
                            tts.save(audio_path)
                            try:
                                temp_audio_clip = AudioFileClip(audio_path)
                                audio_duration = temp_audio_clip.duration
                                temp_audio_clip.close()
                            except Exception as e_dur:
                                print(f"‚ö†Ô∏è Erro ao obter dura√ß√£o do √°udio {audio_filename}: {e_dur}. Usando 2s.")
                                audio_duration = 2.0
                            alert_audio_paths.append(audio_path)
                            alert_audio_timings.append(current_alert_play_time)
                            logging.info(f"ALERTA AGENDADO (ID: {track_id}): '{alert_text}' (orig_ts: {original_ts:.2f}s, play_at: {current_alert_play_time:.2f}s, conf: {conf_val:.2f}, NA: {norm_area_val:.4f})")
                            current_alert_play_time += audio_duration + SEQUENTIAL_ALERT_GAP
                        except Exception as e_tts_seq:
                            print(f"‚ùå Erro no processo TTS/Sequenciamento para '{alert_text}': {e_tts_seq}")
                            logging.error(f"Erro TTS/Sequenciamento para '{alert_text}': {e_tts_seq}")
                    proximo_tempo_disponivel_para_audio = current_alert_play_time

                # ... (Gerenciamento de lost_tracks, desenho de linhas, etc. - SEM MUDAN√áAS) ...
                disappeared_ids = previous_frame_track_ids - current_frame_track_ids
                for track_id_lost in disappeared_ids:
                    if track_id_lost in lost_tracks:
                        pass
                active_lost_ids_to_remove = []
                for lost_id, lost_data in list(lost_tracks.items()):
                    if current_frame_original_timestamp - lost_data["last_timestamp"] > MAX_TIME_LOST_SECONDS:
                        if lost_id not in current_frame_track_ids:
                            active_lost_ids_to_remove.append(lost_id)
                for id_to_remove in active_lost_ids_to_remove:
                    if id_to_remove in lost_tracks: del lost_tracks[id_to_remove]
                previous_frame_track_ids = current_frame_track_ids.copy()

                cv2.line(frame, (left_boundary, 0), (left_boundary, height), (0, 0, 255), 2)
                cv2.line(frame, (right_boundary, 0), (right_boundary, height), (0, 0, 255), 2)
                out.write(frame)
                frame_count += 1
                if frame_count % 100 == 0:
                    print(f"  Processado frame {frame_count}...")

        except Exception as e_proc:
            print(f"‚ùå Ocorreu um erro durante o processamento do v√≠deo: {e_proc}")
            logging.error(f"Erro no loop de processamento: {e_proc}")
        finally:
            if cap: cap.release()
            if 'out' in locals() and out.isOpened(): out.release()
            print(f"‚úÖ V√≠deo com √ÅREAS NORMALIZADAS (para calibra√ß√£o) salvo em: {video_temp_with_norm_areas_path}")

        # --- Junta os √°udios de alerta com o v√≠deo processado ---
        print("üé¨ Sincronizando √°udio (se houver alertas gerados)...")
        # O v√≠deo final ser√° baseado no v√≠deo tempor√°rio que cont√©m os r√≥tulos com √°rea normalizada
        final_video_input_for_audio_composition = video_temp_with_norm_areas_path
        final_output_video_with_audio_path = video_final_path # Onde o v√≠deo final com √°udio ser√° salvo

        if not alert_audio_paths:
            print("‚ö†Ô∏è Nenhum alerta de √°udio foi gerado. Copiando v√≠deo (com labels de √°rea) para o destino final.")
            if os.path.exists(final_video_input_for_audio_composition):
                import shutil
                try:
                    shutil.copyfile(final_video_input_for_audio_composition, final_output_video_with_audio_path)
                    # Neste caso, final_video_path_actual √© o mesmo que o v√≠deo com labels mas sem novos √°udios
                    final_video_path_actual = final_output_video_with_audio_path
                    print(f"üéâ Processamento conclu√≠do. V√≠deo com labels (sem novos √°udios) salvo em: {final_video_path_actual}")
                except Exception as e_copy:
                    print(f"Erro ao copiar v√≠deo final: {e_copy}")
                    final_video_path_actual = final_video_input_for_audio_composition # Falha na c√≥pia, aponta para o temp
                    print(f"üéâ Processamento conclu√≠do. V√≠deo com labels (sem novos √°udios) est√° em: {final_video_path_actual}")

            else:
                print(f"‚ùå Arquivo de v√≠deo base para √°udio n√£o encontrado: {final_video_input_for_audio_composition}")
                final_video_path_actual = "N/A" # Nenhum v√≠deo final gerado

        else: # H√° alertas de √°udio para processar
            try:
                # --- AJUSTE AQUI: Carregar o v√≠deo correto ---
                base_clip = VideoFileClip(final_video_input_for_audio_composition)

                audio_clips_list = [AudioFileClip(p).set_start(t) for p, t in zip(alert_audio_paths, alert_audio_timings)]

                if audio_clips_list:
                    final_audio = CompositeAudioClip(audio_clips_list)
                    final_video = base_clip.set_audio(final_audio)

                    final_video.write_videofile(final_output_video_with_audio_path,
                                                codec='libx264',
                                                audio_codec='aac',
                                                temp_audiofile=os.path.join(audio_folder, 'temp-audio.m4a'),
                                                remove_temp=True,
                                                preset='medium', # 'medium' √© um bom equil√≠brio. Outras op√ß√µes: 'ultrafast', 'fast', 'slow', 'slower'
                                                ffmpeg_params=["-async", "1", "-vsync", "passthrough"])
                    final_video_path_actual = final_output_video_with_audio_path
                    print(f"üéâ Processamento conclu√≠do. V√≠deo final com √°udio e labels de √°rea salvo em: {final_video_path_actual}")

                    # Limpeza de MoviePy
                    if 'base_clip' in locals() and base_clip is not None: base_clip.close()
                    if 'final_audio' in locals() and final_audio is not None: final_audio.close()
                    if 'final_video' in locals() and final_video is not None: final_video.close()
                    for clip_to_close in audio_clips_list:
                        if clip_to_close is not None: clip_to_close.close()
                else:
                     print("‚ö†Ô∏è Lista de clipes de √°udio vazia apesar de 'alert_audio_paths' n√£o estar. Copiando v√≠deo com labels.")
                     if os.path.exists(final_video_input_for_audio_composition):
                        import shutil
                        shutil.copyfile(final_video_input_for_audio_composition, final_output_video_with_audio_path)
                        final_video_path_actual = final_output_video_with_audio_path
                        print(f"üéâ Processamento conclu√≠do. V√≠deo com labels (sem novos √°udios) salvo em: {final_video_path_actual}")


            except Exception as e_moviepy:
                print(f"‚ùå Erro ao juntar √°udio e v√≠deo com MoviePy: {e_moviepy}")
                print(f"‚ÑπÔ∏è O v√≠deo processado com labels de √°rea (sem √°udio composto) est√° dispon√≠vel em: {final_video_input_for_audio_composition}")
                logging.error(f"Erro MoviePy: {e_moviepy}")
                final_video_path_actual = final_video_input_for_audio_composition # Em caso de erro, o v√≠deo final √© o tempor√°rio

    else: # cap and cap.isOpened()
        print(f"‚ùå N√£o foi poss√≠vel abrir o v√≠deo: {video_path}")
else: # model
    print("‚ùå Modelo n√£o carregado. Processamento de v√≠deo n√£o iniciado.")

‚úÖ GPU dispon√≠vel: Tesla T4
‚úÖ Modelo carregado de: /content/gdrive/MyDrive/Modelos treinados/07-05-25/ufmg_yolov11m_run12/weights/best.pt
üßπ Limpando a pasta de √°udios: /content/audio_alerts
‚úÖ Pasta de √°udios limpa.
üìπ Processando v√≠deo para CALIBRA√á√ÉO DE √ÅREA NORMALIZADA: curto-IMG_5389-2.mp4 (1080x1920 @ 59.94 FPS)
üöÄ Come√ßando processamento. Alertas para: ['banco', 'faixa_pedestre', 'placa_onibus']. Confian√ßa m√≠nima: 0.6

video 1/1 (frame 1/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 25.7ms




‚ûï Alerta para ID 1 (Banco √† esquerda) adicionado √† fila. NA: 0.0337
üîä Gerando √°udio para: 'Banco √† esquerda' (ID: 1, NA: 0.0337) para tocar em 0.00s
video 1/1 (frame 2/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 24.5ms
video 1/1 (frame 3/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 24.2ms
video 1/1 (frame 4/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 24.2ms
video 1/1 (frame 5/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 24.1ms
video 1/1 (frame 6/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 26.9ms
video 1/1 (frame 7/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 32.8ms
video 1/1 (frame 8/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 28.8ms
video 1/1 (frame 9/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 



‚ûï Alerta para ID 3 (Banco √† frente) adicionado √† fila. NA: 0.0075
üîä Gerando √°udio para: 'Banco √† frente' (ID: 3, NA: 0.0075) para tocar em 2.10s
video 1/1 (frame 48/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 22.2ms
video 1/1 (frame 49/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 22.2ms
video 1/1 (frame 50/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 22.2ms
video 1/1 (frame 51/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 22.2ms
video 1/1 (frame 52/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 26.2ms
video 1/1 (frame 53/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 22.3ms
video 1/1 (frame 54/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 banco, 22.2ms
video 1/1 (frame 55/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 



‚ûï Alerta para ID 11 (Faixa de pedestre distante √† frente) adicionado √† fila. NA: 0.0018
üîä Gerando √°udio para: 'Faixa de pedestre distante √† frente' (ID: 11, NA: 0.0018) para tocar em 11.26s
video 1/1 (frame 677/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 1 faixa_pedestre, 24.2ms
video 1/1 (frame 678/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 1 faixa_pedestre, 29.0ms
video 1/1 (frame 679/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 1 faixa_pedestre, 30.6ms
video 1/1 (frame 680/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 24.2ms
video 1/1 (frame 681/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 25.5ms
video 1/1 (frame 682/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 2 bancos, 24.2ms
video 1/1 (frame 683/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 64



‚ûï Alerta para ID 19 (Ponto de √¥nibus distante √† direita) adicionado √† fila. NA: 0.0007
üîä Gerando √°udio para: 'Ponto de √¥nibus distante √† direita' (ID: 19, NA: 0.0007) para tocar em 22.19s
video 1/1 (frame 1332/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 placa_onibus, 25.4ms
video 1/1 (frame 1333/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 placa_onibus, 19.9ms
video 1/1 (frame 1334/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 placa_onibus, 28.9ms
video 1/1 (frame 1335/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 placa_onibus, 20.5ms
video 1/1 (frame 1336/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 placa_onibus, 29.1ms
video 1/1 (frame 1337/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 (no detections), 22.5ms
video 1/1 (frame 1338/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 pl



‚ûï Alerta para ID 35 (Faixa de pedestre distante √† direita) adicionado √† fila. NA: 0.0056
üîä Gerando √°udio para: 'Faixa de pedestre distante √† direita' (ID: 35, NA: 0.0056) para tocar em 25.87s
video 1/1 (frame 1449/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 (no detections), 20.0ms
video 1/1 (frame 1450/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 (no detections), 19.9ms
video 1/1 (frame 1451/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 1452/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 1 faixa_pedestre, 19.9ms
video 1/1 (frame 1453/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 (no detections), 20.1ms
video 1/1 (frame 1454/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640x384 (no detections), 19.9ms
video 1/1 (frame 1455/1534) /content/gdrive/MyDrive/videos_base/curto-IMG_5389-2.mp4: 640



MoviePy - Done.
Moviepy - Writing video /content/output_bboxes_with_audio/curto-IMG_5389-2.mp4





Moviepy - Done !
Moviepy - video ready /content/output_bboxes_with_audio/curto-IMG_5389-2.mp4
üéâ Processamento conclu√≠do. V√≠deo final com √°udio e labels de √°rea salvo em: /content/output_bboxes_with_audio/curto-IMG_5389-2.mp4
