In [None]:
import time
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
from ultralytics import YOLO

# =======================================
# CONFIGURAÇÕES
# =======================================
DATASET_PATH = "../../LabelStudio/dataset/data.yaml"
IMAGE_SIZE = 640
EPOCHS = 3  # menos épocas só para medir
DEVICE = 0  # 0 = CUDA:0
RESULTS_DIR = Path("benchmark_batchsize")
RESULTS_DIR.mkdir(exist_ok=True)

# Modelo leve para benchmark
MODEL = "yolov5n.pt"

# Batch sizes para testar
BATCH_SIZES = [x for x in range(1, 17)]

# =======================================
# FUNÇÃO: Treinar e medir tempo + memória
# =======================================
def benchmark_batch(batch_size):
    model = YOLO(MODEL)
    torch.cuda.reset_peak_memory_stats()

    start = time.perf_counter_ns()
    results = model.train(
        data=DATASET_PATH,
        epochs=EPOCHS,
        imgsz=IMAGE_SIZE,
        batch=batch_size,
        workers=1,
        device=DEVICE,
        patience=2,
        save=False,
        pretrained=True
    )
    total_time = time.perf_counter_ns() - start
    time_per_epoch = total_time / EPOCHS

    # medir memória máxima alocada (em MB)
    max_mem = torch.cuda.max_memory_allocated(device=DEVICE) / (1024**2)

    return time_per_epoch, max_mem

In [None]:
# =======================================
# LOOP PRINCIPAL
# =======================================
results_list = []

for bs in BATCH_SIZES:
    print(f"\n=== Benchmarking batch size {bs} ===")
    time_per_epoch, max_mem = benchmark_batch(bs)

    results_list.append({
        "batch_size": bs,
        "time_per_epoch_ns": time_per_epoch,
        "max_memory_MB": max_mem
    })

In [None]:
# =======================================
# AGRUPAR RESULTADOS
# =======================================
df = pd.DataFrame(results_list)
print("\nResultados brutos:")
print(df)

# salvar parquet
df.to_parquet(RESULTS_DIR / "yolov5_batchsize.parquet", index=False)

# =======================================
# PLOTS
# =======================================
sns.set(style="whitegrid")

# Tempo por época
plt.figure(figsize=(8, 5))
sns.lineplot(data=df, x="batch_size", y="time_per_epoch", marker="o")
plt.title("Tempo médio por época x Batch Size")
plt.show()

# Memória GPU
plt.figure(figsize=(8, 5))
sns.lineplot(data=df, x="batch_size", y="max_memory_MB", marker="o", color="red")
plt.title("Consumo máximo de memória (MB) x Batch Size")
plt.show()
