In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
from pathlib import Path
import math
from ultralytics import YOLO

Para importar uma rede neural nova sem pesos aprendidos utilizamos .yaml: 
$${YOLO('yolov8n.yaml')}$$

Para uma rede com pesos ja aprendidos utiliza-se .pt: YOLO('yolov8n.pt')
$${YOLO('yolov8n.pt')}$$

Yolo entrega diferentes versões:

| Modelo      | Nome   | Tamanho      | Velocidade   | Precisão        |
| ----------- | ------ | ------------ | ------------ | --------------- |
| **yolov8n** | Nano   | Menor        | Muito rápida | Menor precisão  |
| **yolov8s** | Small  | Pequena      | Rápida       | Melhor que nano |
| **yolov8m** | Medium | Média        | Moderada     | Mais precisa    |
| **yolov8l** | Large  | Grande       | Mais lenta   | Alta precisão   |
| **yolov8x** | XLarge | Muito grande | Mais lenta   | Máxima precisão |


In [3]:
#Chamando apenas a arquitetura, sem pesos
model = YOLO('yolov8n.pt')

In [5]:
results = model.train(data='config.yaml', epochs=150)

Ultralytics YOLOv8.0.58  Python-3.11.1 torch-2.1.2+cpu CPU
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=config.yaml, epochs=150, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, hide_labels=False, hide_conf=False, vid_stride=1, line_thickness=3, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None

In [11]:
best_model = YOLO(r'C:\CodigoAlfredo\OpenCv\ObjectDetection\runs\detect\train2\weights\best.pt')
results = best_model(r'c:\CodigoAlfredo\datasetObjectDetection\images\train\1a1cd031ac16112c.jpg')
result = results[0]



image 1/1 C:\CodigoAlfredo\datasetObjectDetection\images\train\1a1cd031ac16112c.jpg: 480x640 2 alpacas, 147.2ms
Speed: 1.0ms preprocess, 147.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


In [22]:
result.boxes.boxes

tensor([[100.1644,  60.5776, 534.1290, 765.2440,   0.9577,   0.0000],
        [619.3636, 491.7433, 890.3285, 672.5723,   0.9469,   0.0000]])

In [13]:
annotated = result.plot()
cv2.imwrite("saida.jpg", annotated)


True

In [None]:
import tkinter as tk

# ---------------------------
# PEGAR RESOLUÇÃO DA TELA
# ---------------------------

root = tk.Tk()
root.withdraw()

largura_tela = root.winfo_screenwidth()
altura_tela = root.winfo_screenheight()

altura_video = math.ceil(altura_tela/2)
largura_video = math.ceil(largura_tela/2)

# ---------------------------
# ARQUIVOS
# ---------------------------

videos_path = Path(r'C:\CodigoAlfredo\datasetObjectDetection\videosAlpacas')

videos_list_path = [str(videos_path/'video01.mp4'),str(videos_path/'video02.mp4'),str(videos_path/'video04.mp4')]

best_model = YOLO(r'C:\CodigoAlfredo\OpenCv\ObjectDetection\runs\detect\train2\weights\best.pt')

# ---------------------------
# PROCESSAMENTO DOS VÍDEOS
# ---------------------------

for index, video_file in enumerate(videos_list_path):
    
    video = cv2.VideoCapture(str(video_file))

    video_out_path = videos_path/f'videoOut0{index}.mp4'

   # Configurações do vídeo de saída
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
    fps = video.get(cv2.CAP_PROP_FPS)           # FPS do vídeo original
    # w = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    # h = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    #Writer é apenas um escritor, enquanto grava nao é possivel ver o video ou os frames nele
    video_out = cv2.VideoWriter(str(video_out_path), fourcc, fps, (largura_video, altura_video))

    while True:

        ret, frame = video.read()

        if not ret:
            break

        #-----------------
        # Ajuste do tamanho dos frames
        #-----------------
        frame = cv2.resize(frame, (largura_video, altura_video))   

        result = best_model(frame)[0]

        for box in result.boxes.boxes:
            x1, y1, x2, y2, conf, cls = box

            # converter para inteiro
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            media_x = math.ceil((x1+x2)/2)
            media_y = math.ceil((y1+y2)/2)
            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0),2)

            label = f'Conf {conf:.2f}'

            if(y1 - 25 < 0):
                cv2.putText(frame, label, (x1-200, media_y), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 255, 0), 2, cv2.LINE_AA)

            else:
                cv2.putText(frame, label, (media_x,y1-5), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 255, 0), 2, cv2.LINE_AA)


        cv2.imshow('Frame', frame)
        if cv2.waitKey(10) & 0xFF == 27:
            break
        
        # ---------------------------
        # SALVAR FRAME NO VIDEO OUT
        # ---------------------------
        video_out.write(frame)

    video.release()
    video_out.release()
cv2.destroyAllWindows()


0: 384x640 1 alpaca, 145.4ms
Speed: 1.6ms preprocess, 145.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 alpacas, 111.8ms
Speed: 1.0ms preprocess, 111.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 alpacas, 97.5ms
Speed: 1.0ms preprocess, 97.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 alpacas, 119.4ms
Speed: 2.0ms preprocess, 119.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 alpacas, 105.2ms
Speed: 1.0ms preprocess, 105.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 2 alpacas, 118.3ms
Speed: 2.0ms preprocess, 118.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 94.0ms
Speed: 0.0ms preprocess, 94.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 137.0ms
Speed: 0.0ms preprocess, 137.0ms inference, 1.0ms postprocess per image a

In [15]:
import tkinter as tk

root = tk.Tk()

largura_tela = root.winfo_screenwidth()
altura_tela = root.winfo_screenheight()

altura_video = math.ceil(altura_tela/2)
largura_video = math.ceil(largura_tela/2)

videos_path = Path(r'C:\CodigoAlfredo\datasetObjectDetection\videosAlpacas')

videos_list_path = [str(videos_path/'video04.mp4')]

best_model = YOLO(r'C:\CodigoAlfredo\OpenCv\ObjectDetection\runs\detect\train2\weights\best.pt')

for index, video_file in enumerate(videos_list_path):
    video = cv2.VideoCapture(str(video_file))

    while True:
        ret, frame = video.read()

        if not ret:
            break

        frame = cv2.resize(frame, (largura_video, altura_video))    

        result = best_model(frame)[0]

        for box in result.boxes.boxes:
            x1, y1, x2, y2, conf, cls = box

            # converter para inteiro
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            media_x = math.ceil((x1+x2)/2)
            media_y = math.ceil((y1+y2)/2)
            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0),2)

            label = f'Conf {conf:.2f}'

            if(y1 - 25 < 0):
                cv2.putText(frame, label, (x1-200, media_y), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 255, 0), 2, cv2.LINE_AA)

            else:
                cv2.putText(frame, label, (media_x,y1-5), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 255, 0), 2, cv2.LINE_AA)


        cv2.imshow('Frame', frame)
        if cv2.waitKey(40) & 0xFF == 27:  # sair no ESC
            break
    
video.release()
cv2.destroyAllWindows()


0: 384x640 1 alpaca, 124.1ms
Speed: 0.0ms preprocess, 124.1ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 121.4ms
Speed: 0.0ms preprocess, 121.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 243.0ms
Speed: 0.0ms preprocess, 243.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 105.8ms
Speed: 1.0ms preprocess, 105.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 104.6ms
Speed: 0.0ms preprocess, 104.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 99.2ms
Speed: 1.0ms preprocess, 99.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 101.8ms
Speed: 0.0ms preprocess, 101.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 1 alpaca, 93.0ms
Speed: 0.0ms preprocess, 93.0ms inference, 6.0ms postprocess per image at sha

In [20]:
videos = list(videos_path.rglob('*'))
videos

[WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/video01.mp4'),
 WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/video02.mp4'),
 WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/video03.mp4'),
 WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/video04.mp4'),
 WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/videoOut00.mp4'),
 WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/videoOut01.mp4'),
 WindowsPath('C:/CodigoAlfredo/datasetObjectDetection/videosAlpacas/videoOut02.mp4')]

In [36]:
import tkinter as tk

root = tk.Tk()
# root.withdraw()  # não mostra janela

largura_tela = root.winfo_screenwidth()
altura_tela = root.winfo_screenheight()

print("Largura:", largura_tela)
print("Altura:", altura_tela)

Largura: 1920
Altura: 1080
