In [1]:
%load_ext autoreload
%autoreload 2

Configurações inicias

In [1]:
import torch
import torchvision

print("Versão do PyTorch:", torch.__version__)
print("Versão do Torchvision:", torchvision.__version__)
print("CUDA disponível:", torch.cuda.is_available())
print("Placa de vídeo:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "Nenhuma")

Versão do PyTorch: 2.5.1+cu124
Versão do Torchvision: 0.20.1+cu124
CUDA disponível: True
Placa de vídeo: NVIDIA GeForce GTX 1660 SUPER


**Treinamento**

In [None]:
from ultralytics import YOLO
import torch


def treinar_modelo():
    device = torch.device(0)

    # Carrega o modelo
    model = YOLO(r"C:\projetosML\auto_Plate_Detection\outputs\modelos_treinados\modelo_first_640_8_2.pt")  # Primeiro dataset treinado
    model.to(device)

    print("Iniciando o treinamento...")

    # Treinamento do modelo
   
    model.train(
        data='C:\projetosML/auto_Plate_Detection\data\primeiro_dataset\data.yaml',
        epochs=60,
        batch=8,
        imgsz=544,
        optimizer="AdamW",
        pretrained=True,
        amp=False,
        augment=True,  # Ativa augmentations padrão
        hsv_h=0.015,  # Variação de matiz
        hsv_s=0.7,    # Variação de saturação
        hsv_v=0.4,    # Variação de brilho
        #scale=0.5,    # Escala para simular objetos distantes
        #translate=0.1,  # Translação
        #mosaic=1.0,   # Combina imagens
        #mixup=0.2     # Mescla imagens
    )
    # Salva o modelo treinado
    model.save(r"C:\projetosML\auto_Plate_Detection\outputs\modelos_treinados/new_modelo_br_20epochs.pt")

if __name__ == '__main__':
    treinar_modelo()


In [None]:
from ultralytics import YOLO

# Load a pretrained YOLO11n model
model = YOLO("C:\projetosML/auto_Plate_Detection\outputs\modelos_treinados\modelo_br_dataArgumetetion_40epochs.pt")

# Define path to video file
source = "C:\projetosML/auto_Plate_Detection\inputs/brasil1080p.mp4"

# Run inference on the source
#results = model(source, stream=True)  # generator of Results objects

model.predict(source=source, show=True, conf=0.5)

## Testando o modelo

##### Importando pacotes de outro diretorio

In [2]:
import sys
import os

# Como estamos em 'notebooks', precisamos subir um nível para acessar 'src'
projeto_path = os.path.dirname(os.getcwd())  # Sobe um nível
sys.path.append(projeto_path)

##### Testando o modelo com um video, sem OCR

In [None]:
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from time import time

def process_video(model_path, video_path, confiance=0.5):
    model = YOLO(model_path)
    

    cap = cv2.VideoCapture(video_path)

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    print(f"Resolução de video: {frame_width}x{frame_height}")
    print(f"FPS: {fps}")

    while cap.isOpened():
        start_time = time()

        #Lendo o frame
        ret, frame = cap.read()

        if not ret:
            print("Fim do vídeo")
            break

        #Fazendo a predição
        results = model.predict(frame, conf=confiance, batch=64, device="gpu")

        #Processando os resultados
        detections = results[0].boxes.xyxy.cpu().numpy()  # Coordenadas das caixas
        confs = results[0].boxes.conf.cpu().numpy()      # Confianças
        classes = results[0].boxes.cls.cpu().numpy()     # Classes



        for i, box in enumerate(detections):
            x1, y1, x2, y2 = box
            conf = confs[i]
            cls = classes[i]
            
            

            if conf > confiance:
                box = np.array([x1, y1, x2, y2]).astype(int)

                # Desenha a caixa delimitadora
                cv2.rectangle(frame,
                              (box[0], box[1]),
                              (box[2], box[3]),
                              (0, 255, 0),
                              2)
                
                # Escreve o texto
                label = f"{model.names[int(cls)]} ({conf:.2f})"
                cv2.putText(frame,
                            label,
                            (box[0], box[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.5,
                            (0, 255, 0),
                            2)
                
        # Exibindo o frame
        fps = 1.0 / (time() - start_time)
        cv2.putText(frame,
                    f"FPS: {fps:.2f}",
                    (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2)
        
        #Mostrando os resultados na tela
        cv2.imshow("Yolo Detections", frame)

        #Sair do loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    model_path = r"C:\projetosML\auto_Plate_Detection\outputs\modelos_treinados\new_modelo_br_20epochs.pt"
    video_path = r"C:\projetosML\auto_Plate_Detection\inputs\rife4x.mp4"
    process_video(model_path, video_path, confiance=0.5)



#### Testando o modelo com um video, **com OCR**

In [None]:
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from time import time
from src import paddleOCRdetect


def process_video(model_path, video_path, confiance=0.5):
    model = YOLO(model_path)
    

    cap = cv2.VideoCapture(video_path)

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    print(f"Resolução de video: {frame_width}x{frame_height}")
    print(f"FPS: {fps}")
    idx = 0

    while cap.isOpened():
        start_time = time()

        #Lendo o frame
        ret, frame = cap.read()

        if not ret:
            print("Fim do vídeo")
            break

        #Fazendo a predição
        results = model.predict(frame, conf=confiance, device="gpu")

        #Processando os resultados
        detections = results[0].boxes.xyxy.cpu().tolist()  # Coordenadas das caixas
        confs = results[0].boxes.conf.cpu().tolist()      # Confianças
        classes = results[0].boxes.cls.cpu().tolist()     # Classes



        for i, box in enumerate(detections):
            x1, y1, x2, y2 = box
            


            conf = confs[i]
            cls = classes[i]
            
            

            if conf > confiance:
                box = np.array([x1, y1, x2, y2]).astype(int)
                crop_obj = frame[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])]
            
                #upscale no crop_obj
                h, w = crop_obj.shape[:2]
                crop_obj = cv2.resize(crop_obj, (w*4, h*4), interpolation=cv2.INTER_LANCZOS4)

                
                plate = paddleOCRdetect.detect_text(crop_obj)
                plate = str(plate)
                if plate != None:
                    print(plate)
                    cv2.putText(frame,
                            plate,
                            (box[0], box[1] - 30),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.5,
                            (0, 255, 0),
                            2)
                
                if plate != None:
                    text = plate.upper().replace(' ', '').replace('[', '').replace(']', '').replace("'", '')
                    
                    print(f'Placa detectada: {text}')
                    # Padrão antigo: ABC1234
                    padrao_antigo = len(text) == 7 and text[:3].isalpha() and text[3:].isdigit()
                    
                    # Padrão novo: ABC1D23
                    padrao_mercosul = (len(text) == 7 and 
                          text[:3].isalpha() and 
                          text[3].isdigit() and 
                          text[4].isalpha() and 
                          text[5:].isdigit())
                    
                    if padrao_antigo or padrao_mercosul:
                        print(f"Placa válida: {text}")
                        idx += 1
                        cv2.imwrite(f"C:/projetosML/auto_Plate_Detection/outputs/tests/placa_{idx}.jpg", crop_obj)
                
                # Desenha a caixa delimitadora
                cv2.rectangle(frame,
                              (box[0], box[1]),
                              (box[2], box[3]),
                              (0, 255, 0),
                              2)
                
                # Escreve o texto
                label = f"{model.names[int(cls)]} ({conf:.2f})"
                cv2.putText(frame,
                            label,
                            (box[0], box[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.5,
                            (0, 255, 0),
                            2)
                
        # Exibindo o frame
        fps = 1.0 / (time() - start_time)
        cv2.putText(frame,
                    f"FPS: {fps:.2f}",
                    (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2)
        
        #Mostrando os resultados na tela
        cv2.imshow("Yolo Detections", frame)

        #Sair do loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    model_path = "C:\projetosML/auto_Plate_Detection\outputs\modelos_treinados\modelo_br_dataArgumetetion_40epochs.pt"
    video_path = r"C:\projetosML\auto_Plate_Detection\inputs\202501291141.mp4"
    process_video(model_path, video_path, confiance=0.5)



In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from time import time
from src import paddleOCRdetect

def process_video(model_path, video_path, output_path, confiance=0.5):
    model = YOLO(model_path)
    cap = cv2.VideoCapture(video_path)

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    print(f"Video resolution: {frame_width}x{frame_height}")
    print(f"FPS: {fps}")

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while cap.isOpened():
        start_time = time()
        ret, frame = cap.read()

        if not ret:
            print("End of video")
            break

        results = model.predict(frame, conf=confiance, device="gpu")

        detections = results[0].boxes.xyxy.cpu().tolist()
        confs = results[0].boxes.conf.cpu().tolist()
        classes = results[0].boxes.cls.cpu().tolist()

        for i, box in enumerate(detections):
            x1, y1, x2, y2 = box
            conf = confs[i]
            cls = classes[i]

            if conf > confiance:
                box = np.array([x1, y1, x2, y2]).astype(int)
                crop_obj = frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
            
                # Upscale the cropped object
                h, w = crop_obj.shape[:2]
                crop_obj = cv2.resize(crop_obj, (w*4, h*4), interpolation=cv2.INTER_LANCZOS4)

                # Perform OCR
                plate = paddleOCRdetect.detect_text(crop_obj)
                if plate:
                    # Draw the text on frame
                    cv2.putText(frame,
                            str(plate),
                            (box[0], box[1] - 30),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.5,
                            (0, 255, 0),
                            2)
                    print(f'Detected text: {plate}')

                # Draw bounding box
                cv2.rectangle(frame,
                            (box[0], box[1]),
                            (box[2], box[3]),
                            (0, 255, 0),
                            2)
                
                # Write detection label
                label = f"{model.names[int(cls)]} ({conf:.2f})"
                cv2.putText(frame,
                        label,
                        (box[0], box[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 255, 0),
                        2)

        # Add FPS counter to frame
        fps = 1.0 / (time() - start_time)
        cv2.putText(frame,
                    f"FPS: {fps:.2f}",
                    (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2)
        
        # Write the frame to output video
        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    model_path = "C:/projetosML/auto_Plate_Detection/outputs/modelos_treinados/modelo_br_dataArgumetetion_40epochs.pt"
    video_path = r"C:\projetosML\auto_Plate_Detection\inputs\simu_ue5_2x.mp4"
    output_path = "C:/projetosML/auto_Plate_Detection/outputs/processed_video_ue5_2x.mp4"
    process_video(model_path, video_path, output_path, confiance=0.5)