## Importando variáveis de ambiente
Esse notebook prevê a existência de 2 variáveis de ambiente no arquivo .env desse projeto:
- DATA_FOLDER
- TRAINED_MODELS_FOLDER
- DATASET_FOLDER

In [8]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path=".env", override=True)

DATA_FOLDER = os.getenv("DATA_FOLDER")
TRAINED_MODELS_FOLDER = os.getenv("TRAINED_MODELS_FOLDER")
DATASET_FOLDER = os.getenv("DATASET_FOLDER")

## Bibliotecas

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import models, transforms
from PIL import Image
import pandas as pd
import numpy as np

import sqlite3
import faiss

import matplotlib.pyplot as plt
from collections import defaultdict

## Configurações

In [10]:
model_path = os.path.join(TRAINED_MODELS_FOLDER, "best_resnet50.pth")  # caminho do modelo treinado

DB_PATH = os.path.join(DATA_FOLDER, "metadata.db")

device = "cuda" if torch.cuda.is_available() else "cpu"

TOP_K_CLASSES = 3
TOP_K_RESULTS = 3   # por classe

num_classes = 30

## Transform

In [11]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

## Carrega o modelo ResNet50 para classificação e extração de características

In [12]:
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)

model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

model.load_state_dict(torch.load(model_path, map_location=device))

model.to(device)
model.eval()

feature_extractor = nn.Sequential(*list(model.children())[:-1])
feature_extractor.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


## Função de classificação e busca por similariadade

In [13]:
import time

def classify_and_find_similar(img_path):
    timings = {
        "classificacao_ms": 0.0,
        "embedding_ms": 0.0,
        "faiss_por_classe_ms": {},  # {class_id: tempo_ms}
        "total_ms": 0.0
    }

    # =============================
    # Início da medição total
    # =============================
    t_total_start = time.perf_counter()

    # 1. Conectar ao banco
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()

    # 2. Carregar imagem
    img = Image.open(img_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)

    # =============================
    # CLASSIFICAÇÃO
    # =============================
    t_class_start = time.perf_counter()

    with torch.no_grad():
        outputs = model(img_tensor)
        probs = torch.softmax(outputs, dim=1).cpu().numpy().flatten()

    t_class_end = time.perf_counter()

    # 3. Obter top-K classes
    top_classes_idx = probs.argsort()[-TOP_K_CLASSES:][::-1]
    top_classes = []

    for class_id in top_classes_idx:
        cursor.execute(f"SELECT name, index_path FROM class WHERE id = {class_id}")
        row = cursor.fetchone()

        if row is None:
            print(f"[DEBUG][WARN] Classe {class_id} não encontrada no banco!")
            continue

        class_name, index_path = row
        prob = float(probs[class_id])

        top_classes.append({
            "class_id": class_id,
            "class_name": class_name,
            "index_path": index_path,
            "probability": prob
        })

    # =============================
    # EXTRAÇÃO DE EMBEDDING
    # =============================
    t_emb_start = time.perf_counter()

    with torch.no_grad():
        feats = feature_extractor(img_tensor)
        feats = feats.view(feats.size(0), -1)
        feats = torch.nn.functional.normalize(feats, p=2, dim=1)
        feats = feats.cpu().numpy().astype("float32")

    t_emb_end = time.perf_counter()

    # =============================
    # CONSULTAS FAISS
    # =============================

    similar_images = []

    for cls in top_classes:
        class_id = cls["class_id"]

        t_faiss_start = time.perf_counter()

        faiss_index = faiss.read_index(cls["index_path"])
        distances, retrieved_ids = faiss_index.search(feats, TOP_K_RESULTS)

        t_faiss_end = time.perf_counter()
        timings["faiss_por_classe_ms"][class_id] = round((t_faiss_end - t_faiss_start) * 1000, 3)

        retrieved_ids = retrieved_ids[0]
        distances = distances[0]

        # Busca metadados no SQLite
        for img_id, dist in zip(retrieved_ids, distances):
            cursor.execute(f"SELECT image_path FROM image WHERE id = {int(img_id)}")
            result = cursor.fetchone()

            if result is None:
                print(f"[DEBUG][WARN] ID {img_id} não encontrado no banco.")
                continue
            
            image_path = result[0]

            similar_images.append({
                "query_to_class": cls["class_name"],
                "retrieved_image_id": int(img_id),
                "image_path": image_path,
                "distance": float(dist)
            })

    conn.close()

    # Ordena por distância
    similar_images = sorted(similar_images, key=lambda x: x["distance"])

    # =============================
    # Tempo total
    # =============================
    t_total_end = time.perf_counter()
    timings["classificacao_ms"] = round((t_class_end - t_class_start) * 1000, 3)
    timings["embedding_ms"] = round((t_emb_end - t_emb_start) * 1000, 3)
    timings["total_ms"] = round((t_total_end - t_total_start) * 1000, 3)

    # Retorna também as métricas
    return top_classes, similar_images, timings


## Teste de desempenho

In [17]:
# ---------------------------------------------------------
# CONFIG
# ---------------------------------------------------------
TEST_ROOT = DATASET_FOLDER   # raiz contendo subpastas por classe

valid_ext = (".jpg", ".jpeg", ".png", ".bmp", ".webp")

# ---------------------------------------------------------
# LISTAR TODAS AS IMAGENS (com subpastas)
# ---------------------------------------------------------
all_images = []

for root, dirs, files in os.walk(TEST_ROOT):
    for f in files:
        if f.lower().endswith(valid_ext):
            all_images.append(os.path.join(root, f))

print(f"Total de imagens detectadas: {len(all_images)}")


# ---------------------------------------------------------
# ESTRUTURAS PARA COLETA DE MÉTRICAS
# ---------------------------------------------------------
lat_total = []
lat_class = []
lat_emb = []
lat_faiss_total = []

lat_por_classe = defaultdict(list)
faiss_por_classe = defaultdict(list)

# ---------------------------------------------------------
# WARM UP DE GPU (IMPORTANTE!)
# ---------------------------------------------------------
print("Realizando warm-up...")
for _ in range(3):
    classify_and_find_similar(all_images[0])

print("Warm-up concluído.")


# ---------------------------------------------------------
# EXECUTAR BENCHMARK
# ---------------------------------------------------------
print("\nIniciando benchmark...")

start_wall = time.perf_counter()

for i, img_path in enumerate(all_images, 1):
    classes, results, timings = classify_and_find_similar(img_path)

    # LATÊNCIAS GLOBAIS
    lat_total.append(timings["total_ms"])
    lat_class.append(timings["classificacao_ms"])
    lat_emb.append(timings["embedding_ms"])

    # FAISS total por consulta
    faiss_sum = sum(timings["faiss_por_classe_ms"].values())
    lat_faiss_total.append(faiss_sum)

    # MÉTRICAS POR CLASSE PREVISTA (top-1)
    if classes:
        c = classes[0]["class_name"]
        lat_por_classe[c].append(timings["total_ms"])
        faiss_por_classe[c].append(faiss_sum)

end_wall = time.perf_counter()

print("Benchmark concluído.")


# ---------------------------------------------------------
# THROUGHPUT
# ---------------------------------------------------------
tempo_total_benchmark_s = end_wall - start_wall
throughput_qps = len(all_images) / tempo_total_benchmark_s


# ---------------------------------------------------------
# RESUMO GLOBAL
# ---------------------------------------------------------
def resumo(nome, arr):
    arr = np.array(arr)
    print(f"\n=== {nome} ===")
    print(f"Média:     {arr.mean():.2f} ms")
    print(f"Mediana:   {np.median(arr):.2f} ms")
    print(f"P90:       {np.percentile(arr, 90):.2f} ms")
    print(f"P95:       {np.percentile(arr, 95):.2f} ms")
    print(f"P99:       {np.percentile(arr, 99):.2f} ms")
    print(f"Mínimo:    {arr.min():.2f} ms")
    print(f"Máximo:    {arr.max():.2f} ms")


# ---------------------------------------------------------
# IMPRIMIR RESULTADOS
# ---------------------------------------------------------
print("\n===================================================")
print("                 RESULTADOS GERAIS                 ")
print("===================================================\n")

resumo("Latência TOTAL da query", lat_total)
resumo("Latência da classificação", lat_class)
resumo("Latência do embedding", lat_emb)
resumo("Latência FAISS (somada)", lat_faiss_total)

print("\n---------------------------------------------------")
print(f"THROUGHPUT: {throughput_qps:.2f} queries/segundo")
print("---------------------------------------------------")


# ---------------------------------------------------------
# RANKING DAS ETAPAS POR CUSTO MÉDIO
# ---------------------------------------------------------
print("\n\n=== Ranking das etapas mais custosas (média ms) ===")

etapas = {
    "Classificação": np.mean(lat_class),
    "Embedding": np.mean(lat_emb),
    "FAISS": np.mean(lat_faiss_total),
}

for etapa, valor in sorted(etapas.items(), key=lambda x: x[1], reverse=True):
    print(f"{etapa}: {valor:.2f} ms")


# ---------------------------------------------------------
# HISTOGRAMA DA LATÊNCIA TOTAL
# ---------------------------------------------------------
plt.figure(figsize=(10, 5))
plt.hist(lat_total, bins=40, alpha=0.75)
plt.title("Histograma da Latência Total")
plt.xlabel("Latência (ms)")
plt.ylabel("Frequência")
plt.grid(True)
plt.show()


# ---------------------------------------------------------
# LATÊNCIA POR CLASSE (TOP-1)
# ---------------------------------------------------------
media_por_classe = {cls: np.mean(vals) for cls, vals in lat_por_classe.items()}
ordenado = sorted(media_por_classe.items(), key=lambda x: x[1])

plt.figure(figsize=(12, 6))
plt.bar([c for c, _ in ordenado], [v for _, v in ordenado])
plt.xticks(rotation=80)
plt.title("Latência Média por Classe (TOP-1 prev)")
plt.ylabel("Latência (ms)")
plt.grid(True, axis="y")
plt.show()


Total de imagens detectadas: 7482
Realizando warm-up...
Warm-up concluído.

Iniciando benchmark...


KeyboardInterrupt: 