# Proyecto de Visión por Computadora - Etapas 1 a 4

Este notebook consolida todo el código del proyecto, incluyendo utilidades y scripts de cada etapa.


In [None]:
!pip install faiss-cpu gradio ultralytics onnx onnxruntime



In [None]:
!git clone https://github.com/Roberto-Orazi/tp-cv-orazi.git
!ln -s tp-cv-orazi/dataset dataset


In [None]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import models, transforms
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
import cv2
import faiss
import pickle
import json
import time
from pathlib import Path
from tqdm import tqdm
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import gradio as gr
from ultralytics import YOLO
from datetime import datetime
import torch.quantization

# Configuración
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando dispositivo: {device}")

## Utilidades (Utils)


### dataset.py


In [None]:
class DogDataset(Dataset):
    def __init__(self, csv_file, root_dir, dataset_type=None, transform=None):
        df = pd.read_csv(csv_file)

        if dataset_type:
            self.data = df[df['data set'] == dataset_type].reset_index(drop=True)
        else:
            self.data = df

        self.root_dir = Path(root_dir)
        self.transform = transform

        self.label_to_idx = {label: idx for idx, label in enumerate(sorted(df['labels'].unique()))}
        self.idx_to_label = {idx: label for label, idx in self.label_to_idx.items()}
        self.num_classes = len(self.label_to_idx)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.root_dir / self.data.iloc[idx]['filepaths']
        image = Image.open(img_path).convert('RGB')
        label = self.data.iloc[idx]['labels']
        label_idx = self.label_to_idx[label]

        if self.transform:
            image = self.transform(image)

        return image, label_idx, str(img_path)


### models.py


In [None]:
def get_resnet18(num_classes, pretrained=True):
    if pretrained:
        model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    else:
        model = models.resnet18(weights=None)

    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    return model

def get_resnet50(num_classes, pretrained=True):
    if pretrained:
        model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    else:
        model = models.resnet50(weights=None)

    model = nn.Sequential(*list(model.children())[:-1])
    return model

def get_feature_extractor(model_name='resnet50'):
    if model_name == 'resnet50':
        model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        model = nn.Sequential(*list(model.children())[:-1])
    elif model_name == 'resnet18':
        model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        model = nn.Sequential(*list(model.children())[:-1])
    return model


### transforms.py


In [None]:
def get_train_transform():
    return transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

def get_val_transform():
    return transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])


### metrics.py


In [None]:
def dcg_at_k(relevances, k):
    relevances = np.array(relevances)[:k]
    return np.sum(relevances / np.log2(np.arange(2, len(relevances) + 2)))

def ndcg_at_k(relevances, k):
    dcg = dcg_at_k(relevances, k)
    ideal_relevances = sorted(relevances, reverse=True)
    idcg = dcg_at_k(ideal_relevances, k)
    return dcg / idcg if idcg > 0 else 0.0

def calculate_metrics_per_class(y_true, y_pred, class_idx):
    tp = np.sum((y_true == class_idx) & (y_pred == class_idx))
    fp = np.sum((y_true != class_idx) & (y_pred == class_idx))
    fn = np.sum((y_true == class_idx) & (y_pred != class_idx))
    tn = np.sum((y_true != class_idx) & (y_pred != class_idx))

    sensibilidad = tp / (tp + fn) if (tp + fn) > 0 else 0
    especificidad = tn / (tn + fp) if (tn + fp) > 0 else 0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    exactitud = (tp + tn) / (tp + tn + fp + fn)
    f1 = 2 * (precision * sensibilidad) / (precision + sensibilidad) if (precision + sensibilidad) > 0 else 0

    return {
        'tp': tp,
        'fp': fp,
        'fn': fn,
        'tn': tn,
        'sensibilidad': sensibilidad,
        'especificidad': especificidad,
        'precision': precision,
        'exactitud': exactitud,
        'f1': f1
    }


## Etapa 1: Extracción de Embeddings y Buscador


### extraer_embeddings.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()
model = get_feature_extractor('resnet50')
model = model.to(device)
model.eval()

if not os.path.exists('embeddings_data.pkl'):
    print("Extrayendo embeddings del dataset...")
    dataset = DogDataset('dataset/dogs.csv', 'dataset', transform=transform)
    # OPTIMIZADO PARA COLAB: Batch size aumentado
    dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2)

    embeddings = []
    labels = []
    image_paths = []

    with torch.no_grad():
        for i, (images, lbls, paths) in enumerate(dataloader):
            if i % 20 == 0:
                print(f"Procesando batch {i}/{len(dataloader)}")

            images = images.to(device)
            features = model(images)
            features = features.squeeze().cpu().numpy()

            if len(features.shape) == 1:
                features = features.reshape(1, -1)

            embeddings.append(features)
            labels.extend(lbls.cpu().numpy())
            image_paths.extend(paths)

    embeddings = np.vstack(embeddings)
    print(f"Embeddings shape: {embeddings.shape}")

    faiss.normalize_L2(embeddings)

    dimension = embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)
    index.add(embeddings)

    print(f"Indice creado con {index.ntotal} vectores")

    dataset_labels = [dataset.idx_to_label[idx] for idx in labels]

    print("Guardando embeddings...")
    with open('embeddings_data.pkl', 'wb') as f:
        pickle.dump({
            'index': index,
            'labels': dataset_labels,
            'image_paths': image_paths
        }, f)
    print("Embeddings guardados en embeddings_data.pkl")
else:
    print("Cargando embeddings previamente guardados...")
    with open('embeddings_data.pkl', 'rb') as f:
        data = pickle.load(f)
        index = data['index']
        labels = data['labels']
        image_paths = data['image_paths']
    print(f"Indice cargado con {index.ntotal} vectores")

print("\nExtraccion de embeddings completada!")
print("Usa 'python app_etapa1.py' para lanzar la aplicacion Gradio")


### evaluar_ndcg.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()
model = get_feature_extractor('resnet50')
model = model.to(device)
model.eval()

print("Cargando embeddings...")
with open('embeddings_data.pkl', 'rb') as f:
    data = pickle.load(f)
    index = data['index']
    labels = data['labels']
    image_paths = data['image_paths']

print(f"Indice cargado con {index.ntotal} vectores")

def search_similar_images(query_image, k=10):
    img = Image.open(query_image).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        query_embedding = model(img_tensor)
        query_embedding = query_embedding.squeeze().cpu().numpy().reshape(1, -1)

    faiss.normalize_L2(query_embedding)
    distances, indices = index.search(query_embedding, k)

    similar_images = []
    similar_labels = []

    for idx in indices[0]:
        similar_images.append(image_paths[idx])
        similar_labels.append(labels[idx])

    breed_counts = Counter(similar_labels)
    predicted_breed = breed_counts.most_common(1)[0][0]

    return similar_images, similar_labels, predicted_breed

print("\nPreparando conjunto de prueba...")
df = pd.read_csv('dataset/dogs.csv')
test_df = df[df['data set'] == 'test']

breed_groups = test_df.groupby('labels')
test_samples = []

# OPTIMIZADO PARA COLAB: Reducido de 5 a 2 muestras por raza
for breed, group in breed_groups:
    samples = group.sample(min(2, len(group)))
    test_samples.extend(samples['filepaths'].tolist())

print(f"Conjunto de prueba: {len(test_samples)} imagenes")

print("\nCalculando NDCG@10...")
ndcg_scores = []
accuracy = 0

for i, test_path in enumerate(test_samples):
    if i % 20 == 0:
        print(f"Evaluando imagen {i}/{len(test_samples)}")

    full_path = Path('dataset') / test_path
    true_label = df[df['filepaths'] == test_path]['labels'].values[0]

    similar_images, similar_labels, predicted_breed = search_similar_images(str(full_path), k=10)

    if predicted_breed == true_label:
        accuracy += 1

    relevances = [1 if label == true_label else 0 for label in similar_labels]
    ndcg = ndcg_at_k(relevances, 10)
    ndcg_scores.append(ndcg)

avg_ndcg = np.mean(ndcg_scores)
accuracy_pct = (accuracy / len(test_samples)) * 100

print(f"\n{'='*50}")
print(f"RESULTADOS DE EVALUACION")
print(f"{'='*50}")
print(f"NDCG@10 promedio: {avg_ndcg:.4f}")
print(f"Accuracy: {accuracy_pct:.2f}% ({accuracy}/{len(test_samples)})")
print(f"{'='*50}")


### app_etapa1.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()
model = get_feature_extractor('resnet50')
model = model.to(device)
model.eval()

print("Cargando embeddings...")
with open('embeddings_data.pkl', 'rb') as f:
    data = pickle.load(f)
    index = data['index']
    labels = data['labels']
    image_paths = data['image_paths']

print(f"Indice cargado con {index.ntotal} vectores")

def search_similar_images(query_image, k=10):
    img = Image.open(query_image).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        query_embedding = model(img_tensor)
        query_embedding = query_embedding.squeeze().cpu().numpy().reshape(1, -1)

    faiss.normalize_L2(query_embedding)
    distances, indices = index.search(query_embedding, k)

    similar_images = []
    similar_labels = []

    for idx in indices[0]:
        similar_images.append(image_paths[idx])
        similar_labels.append(labels[idx])

    breed_counts = Counter(similar_labels)
    predicted_breed = breed_counts.most_common(1)[0][0]

    return similar_images, similar_labels, predicted_breed

def gradio_search(image):
    if image is None:
        return "Por favor sube una imagen", []

    try:
        similar_images, similar_labels, predicted_breed = search_similar_images(image, k=10)
        result_text = f"Raza predicha: {predicted_breed}"

        gallery_images = []
        for img_path, label in zip(similar_images, similar_labels):
            gallery_images.append((img_path, label))

        return result_text, gallery_images
    except Exception as e:
        return f"Error: {str(e)}", []

with gr.Blocks() as demo:
    gr.Markdown("# Etapa 1: Buscador de Razas por Similitud")
    gr.Markdown("Modelo: **ResNet50** pre-entrenado (ImageNet)")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="filepath", label="Subir imagen")
            search_btn = gr.Button("Buscar")

        with gr.Column():
            result_text = gr.Textbox(label="Resultado")

    gallery = gr.Gallery(label="Imagenes similares", columns=5)

    search_btn.click(gradio_search, inputs=input_image, outputs=[result_text, gallery])

if __name__ == "__main__":
    print("\nLanzando aplicacion Gradio - Etapa 1...")
    demo.launch(share=False, server_name="127.0.0.1", server_port=7860)


## Etapa 2: Entrenamiento y Evaluación (ResNet18)


### entrenar_resnet18.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

print("Cargando datasets...")
train_dataset = DogDataset('dataset/dogs.csv', 'dataset', 'train', transform=get_train_transform())
val_dataset = DogDataset('dataset/dogs.csv', 'dataset', 'valid', transform=get_val_transform())
test_dataset = DogDataset('dataset/dogs.csv', 'dataset', 'test', transform=get_val_transform())

num_classes = train_dataset.num_classes
print(f"Numero de clases: {num_classes}")

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

print("\nCreando modelo ResNet18...")
model = get_resnet18(num_classes, pretrained=True)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

def train_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels, _ in tqdm(loader, desc="Entrenando"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / len(loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def validate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels, _ in tqdm(loader, desc="Validando"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / len(loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

num_epochs = 5
best_val_acc = 0.0
train_losses, val_losses = [], []
train_accs, val_accs = [], []

print(f"\nIniciando entrenamiento por {num_epochs} epochs...")

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = validate(model, val_loader, criterion)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'resnet18_best.pth')
        print(f"Mejor modelo guardado con Val Acc: {val_acc:.2f}%")

    scheduler.step()

print("\nGraficando resultados...")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(train_losses, label='Train Loss')
ax1.plot(val_losses, label='Val Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.set_title('Loss durante entrenamiento')

ax2.plot(train_accs, label='Train Acc')
ax2.plot(val_accs, label='Val Acc')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.legend()
ax2.set_title('Accuracy durante entrenamiento')

plt.tight_layout()
plt.savefig('training_curves.png')
print("Curvas guardadas en training_curves.png")

print("\nCargando mejor modelo para evaluacion en test...")
model.load_state_dict(torch.load('resnet18_best.pth'))

print("Evaluando en conjunto de test...")
test_loss, test_acc = validate(model, test_loader, criterion)
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

print("\nEntrenamiento completado!")
print(f"Mejor Val Acc: {best_val_acc:.2f}%")
print(f"Test Acc: {test_acc:.2f}%")


### extraer_embeddings_resnet18.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()

print("Cargando modelo ResNet18 entrenado...")
dataset = DogDataset('dataset/dogs.csv', 'dataset', transform=transform)
num_classes = dataset.num_classes

model = get_resnet18(num_classes, pretrained=False)
model.load_state_dict(torch.load('resnet18_best.pth', map_location=device))
model = model.to(device)
model.eval()

print("Extrayendo embeddings con ResNet18...")
dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2)

embeddings = []
labels = []
image_paths = []

with torch.no_grad():
    for i, (images, lbls, paths) in enumerate(dataloader):
        if i % 20 == 0:
            print(f"Procesando batch {i}/{len(dataloader)}")

        images = images.to(device)

        features = model.conv1(images)
        features = model.bn1(features)
        features = model.relu(features)
        features = model.maxpool(features)
        features = model.layer1(features)
        features = model.layer2(features)
        features = model.layer3(features)
        features = model.layer4(features)
        features = model.avgpool(features)
        features = features.squeeze().cpu().numpy()

        if len(features.shape) == 1:
            features = features.reshape(1, -1)

        embeddings.append(features)
        labels.extend(lbls.cpu().numpy())
        image_paths.extend(paths)

embeddings = np.vstack(embeddings)
print(f"Embeddings shape: {embeddings.shape}")

faiss.normalize_L2(embeddings)

dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

print(f"Indice creado con {index.ntotal} vectores")

dataset_labels = [dataset.idx_to_label[idx] for idx in labels]

print("Guardando embeddings...")
with open('embeddings_resnet18.pkl', 'wb') as f:
    pickle.dump({
        'index': index,
        'labels': dataset_labels,
        'image_paths': image_paths
    }, f)

print("Embeddings de ResNet18 guardados en embeddings_resnet18.pkl")
print("Ahora podes usar 'python app_etapa2.py' para probar ambos modelos")


### evaluar_metricas.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

print("Cargando dataset de test...")
test_dataset = DogDataset('dataset/dogs.csv', 'dataset', 'test', transform=get_val_transform())
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

num_classes = test_dataset.num_classes
print(f"Numero de clases: {num_classes}")

print("\nCargando modelo ResNet18...")
model = get_resnet18(num_classes, pretrained=False)
model.load_state_dict(torch.load('resnet18_best.pth'))
model = model.to(device)
model.eval()

print("\nRealizando predicciones en conjunto de test...")
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

accuracy = 100. * np.sum(all_preds == all_labels) / len(all_labels)
print(f"\nAccuracy en test: {accuracy:.2f}%")

print("\nCalculando metricas detalladas...")

unique_labels = np.unique(np.concatenate([all_labels, all_preds]))
target_names = [test_dataset.idx_to_label[i] for i in unique_labels]

report = classification_report(
    all_labels,
    all_preds,
    labels=unique_labels,
    target_names=target_names,
    output_dict=True,
    zero_division=0
)

print("\n" + "="*80)
print("METRICAS POR CLASE (primeras 10 razas)")
print("="*80)

metrics_df = pd.DataFrame(report).transpose()
print(metrics_df.head(10).to_string())

print("\n" + "="*80)
print("METRICAS GLOBALES")
print("="*80)
print(f"Accuracy:        {report['accuracy']:.4f}")
print(f"Macro Avg:")
print(f"  Precision:     {report['macro avg']['precision']:.4f}")
print(f"  Recall:        {report['macro avg']['recall']:.4f}")
print(f"  F1-Score:      {report['macro avg']['f1-score']:.4f}")
print(f"Weighted Avg:")
print(f"  Precision:     {report['weighted avg']['precision']:.4f}")
print(f"  Recall:        {report['weighted avg']['recall']:.4f}")
print(f"  F1-Score:      {report['weighted avg']['f1-score']:.4f}")
print("="*80)

metrics_df.to_csv('metricas_detalladas.csv')
print("\nMetricas guardadas en metricas_detalladas.csv")

print("\nGenerando matriz de confusion...")
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(20, 20))
sns.heatmap(cm, annot=False, fmt='d', cmap='Blues',
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Prediccion')
plt.ylabel('Real')
plt.title('Matriz de Confusion - ResNet18')
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.yticks(rotation=0, fontsize=8)
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150)
print("Matriz de confusion guardada en confusion_matrix.png")

print("\nCalculando metricas detalladas por clase...")

for i, class_idx in enumerate(unique_labels[:5]):
    class_name = test_dataset.idx_to_label[class_idx]
    metrics = calculate_metrics_per_class(all_labels, all_preds, class_idx)

    print(f"\n{class_name}:")
    print(f"  TP={metrics['tp']}, FP={metrics['fp']}, FN={metrics['fn']}, TN={metrics['tn']}")
    print(f"  Sensibilidad (Recall):    {metrics['sensibilidad']:.4f}")
    print(f"  Especificidad:            {metrics['especificidad']:.4f}")
    print(f"  Precision:                {metrics['precision']:.4f}")
    print(f"  Exactitud (Accuracy):     {metrics['exactitud']:.4f}")
    print(f"  F1-Score:                 {metrics['f1']:.4f}")

print("\nEvaluacion completada!")


### app_etapa2.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()

print("Cargando modelos...")

model_resnet50 = get_feature_extractor('resnet50')
model_resnet50 = model_resnet50.to(device)
model_resnet50.eval()

csv_file = 'dataset/dogs.csv'
temp_dataset = DogDataset(csv_file, 'dataset', 'train', transform=transform)
num_classes = temp_dataset.num_classes

model_resnet18 = get_resnet18(num_classes, pretrained=False)
if os.path.exists('resnet18_best.pth'):
    model_resnet18.load_state_dict(torch.load('resnet18_best.pth', map_location=device))
    model_resnet18 = model_resnet18.to(device)
    model_resnet18.eval()
    print("ResNet18 entrenado cargado")
else:
    model_resnet18 = None
    print("ResNet18 no encontrado. Ejecuta 'python entrenar_resnet18.py' primero")

print("Cargando embeddings ResNet50...")
embeddings_path_50 = 'etapa-1/embeddings_data.pkl'
if os.path.exists(embeddings_path_50):
    with open(embeddings_path_50, 'rb') as f:
        data_50 = pickle.load(f)
        index_50 = data_50['index']
        labels_50 = data_50['labels']
        image_paths_50 = data_50['image_paths']
    print(f"Embeddings ResNet50 cargados: {index_50.ntotal} vectores")
else:
    index_50 = None
    print("Embeddings ResNet50 no encontrados")

embeddings_path_18 = 'embeddings_resnet18.pkl'
if os.path.exists(embeddings_path_18):
    with open(embeddings_path_18, 'rb') as f:
        data_18 = pickle.load(f)
        index_18 = data_18['index']
        labels_18 = data_18['labels']
        image_paths_18 = data_18['image_paths']
    print(f"Embeddings ResNet18 cargados: {index_18.ntotal} vectores")
else:
    index_18 = None
    print("Embeddings ResNet18 no encontrados. Se generaran al buscar")

def extract_features_resnet18(image_path):
    if model_resnet18 is None:
        return None

    img = Image.open(image_path).convert('RGB')
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        features = model_resnet18.conv1(img_tensor)
        features = model_resnet18.bn1(features)
        features = model_resnet18.relu(features)
        features = model_resnet18.maxpool(features)
        features = model_resnet18.layer1(features)
        features = model_resnet18.layer2(features)
        features = model_resnet18.layer3(features)
        features = model_resnet18.layer4(features)
        features = model_resnet18.avgpool(features)
        features = features.squeeze().cpu().numpy().reshape(1, -1)

    return features

def search_similar_images(query_image, model_name, k=10):
    if model_name == "ResNet50 (Pre-entrenado)":
        if index_50 is None:
            return [], [], "Error: Embeddings ResNet50 no encontrados"

        img = Image.open(query_image).convert('RGB')
        img_tensor = transform(img).unsqueeze(0).to(device)

        with torch.no_grad():
            query_embedding = model_resnet50(img_tensor)
            query_embedding = query_embedding.squeeze().cpu().numpy().reshape(1, -1)

        faiss.normalize_L2(query_embedding)
        distances, indices = index_50.search(query_embedding, k)

        similar_images = [image_paths_50[idx] for idx in indices[0]]
        similar_labels = [labels_50[idx] for idx in indices[0]]

    elif model_name == "ResNet18 (Fine-tuned)":
        if model_resnet18 is None:
            return [], [], "Error: ResNet18 no encontrado"

        if index_18 is not None:
            query_features = extract_features_resnet18(query_image)
            faiss.normalize_L2(query_features)
            distances, indices = index_18.search(query_features, k)

            similar_images = [image_paths_18[idx] for idx in indices[0]]
            similar_labels = [labels_18[idx] for idx in indices[0]]
        else:
            return [], [], "Error: Embeddings ResNet18 no encontrados"

    else:
        return [], [], "Modelo no disponible"

    breed_counts = Counter(similar_labels)
    predicted_breed = breed_counts.most_common(1)[0][0]

    return similar_images, similar_labels, predicted_breed

def gradio_search(image, model_name):
    if image is None:
        return "Por favor sube una imagen", []

    try:
        similar_images, similar_labels, predicted_breed = search_similar_images(image, model_name, k=10)

        if isinstance(predicted_breed, str) and predicted_breed.startswith("Error"):
            return predicted_breed, []

        result_text = f"Modelo: {model_name}\nRaza predicha: {predicted_breed}"

        gallery_images = []
        for img_path, label in zip(similar_images, similar_labels):
            gallery_images.append((img_path, label))

        return result_text, gallery_images
    except Exception as e:
        return f"Error: {str(e)}", []

with gr.Blocks() as demo:
    gr.Markdown("# Etapa 2: Buscador con Selector de Modelos")
    gr.Markdown("Compara el rendimiento de diferentes modelos de extraccion de caracteristicas")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="filepath", label="Subir imagen")

            model_selector = gr.Radio(
                choices=["ResNet50 (Pre-entrenado)", "ResNet18 (Fine-tuned)"],
                value="ResNet50 (Pre-entrenado)",
                label="Seleccionar Modelo"
            )

            search_btn = gr.Button("Buscar")

        with gr.Column():
            result_text = gr.Textbox(label="Resultado", lines=3)

    gallery = gr.Gallery(label="Imagenes similares", columns=5)

    search_btn.click(
        gradio_search,
        inputs=[input_image, model_selector],
        outputs=[result_text, gallery]
    )

if __name__ == "__main__":
    print("\nLanzando aplicacion Gradio - Etapa 2...")
    dataset_path = os.path.abspath('dataset')
    demo.launch(
        share=False,
        server_name="127.0.0.1",
        server_port=7860,
        allowed_paths=[dataset_path]
    )



## Etapa 3: Detección y Clasificación


### evaluar_pipeline.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()

print("Cargando modelos...")

yolo_model = YOLO('yolov8n.pt')
print("YOLO preentrenado cargado (clase 'dog' = 16 en COCO)")

csv_file = 'dataset/dogs.csv'
test_dataset = DogDataset(csv_file, 'dataset', 'test', transform=transform)
num_classes = test_dataset.num_classes

classifier = get_resnet18(num_classes, pretrained=False)
classifier.load_state_dict(torch.load('etapa-2/resnet18_best.pth', map_location=device))
classifier = classifier.to(device)
classifier.eval()

print(f"\nEvaluando pipeline en {len(test_dataset)} imagenes de test...")

total_images = 0
detected_images = 0
correct_classifications = 0
detection_confidences = []
classification_confidences = []

all_true_labels = []
all_pred_labels = []

for idx in tqdm(range(len(test_dataset))):
    img_tensor, true_label, img_path = test_dataset[idx]

    pil_image = Image.open(img_path).convert('RGB')

    results = yolo_model(pil_image, conf=0.25, verbose=False)

    dog_detected = False
    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])
            conf = float(box.conf[0])

            if cls == 16:
                dog_detected = True
                detection_confidences.append(conf)

                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                dog_crop = pil_image.crop((x1, y1, x2, y2))

                if dog_crop.width < 10 or dog_crop.height < 10:
                    continue

                img_tensor_crop = transform(dog_crop).unsqueeze(0).to(device)

                with torch.no_grad():
                    outputs = classifier(img_tensor_crop)
                    _, predicted = outputs.max(1)
                    predicted_idx = predicted.item()

                    probabilities = torch.nn.functional.softmax(outputs, dim=1)
                    conf_class = probabilities[0][predicted_idx].item()
                    classification_confidences.append(conf_class)

                all_true_labels.append(true_label)
                all_pred_labels.append(predicted_idx)

                if predicted_idx == true_label:
                    correct_classifications += 1

                break

        if dog_detected:
            break

    total_images += 1
    if dog_detected:
        detected_images += 1

detection_rate = 100. * detected_images / total_images
classification_acc = 100. * correct_classifications / detected_images if detected_images > 0 else 0
avg_detection_conf = np.mean(detection_confidences) if detection_confidences else 0
avg_classification_conf = np.mean(classification_confidences) if classification_confidences else 0

print("\n" + "="*80)
print("RESULTADOS DEL PIPELINE COMPLETO")
print("="*80)
print(f"Imagenes procesadas:           {total_images}")
print(f"Perros detectados:             {detected_images} ({detection_rate:.2f}%)")
print(f"Clasificaciones correctas:     {correct_classifications}")
print(f"Accuracy de clasificacion:     {classification_acc:.2f}%")
print(f"Confianza promedio deteccion:  {avg_detection_conf:.4f}")
print(f"Confianza promedio clasificacion: {avg_classification_conf:.4f}")
print("="*80)

from sklearn.metrics import classification_report, accuracy_score

if len(all_true_labels) > 0:
    unique_labels = np.unique(np.concatenate([all_true_labels, all_pred_labels]))
    target_names = [test_dataset.idx_to_label[i] for i in unique_labels]

    report = classification_report(
        all_true_labels,
        all_pred_labels,
        labels=unique_labels,
        target_names=target_names,
        output_dict=True,
        zero_division=0
    )

    print("\nMETRICAS GLOBALES:")
    print(f"Accuracy:        {report['accuracy']:.4f}")
    print(f"Macro Avg F1:    {report['macro avg']['f1-score']:.4f}")
    print(f"Weighted Avg F1: {report['weighted avg']['f1-score']:.4f}")

    metrics_df = pd.DataFrame(report).transpose()
    metrics_df.to_csv('metricas_pipeline.csv')
    print("\nMetricas guardadas en metricas_pipeline.csv")

print("\nEvaluacion completada!")



### app_deteccion.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()

print("Cargando YOLO preentrenado...")
yolo_model = YOLO('yolov8n.pt')
print("YOLO cargado (clase 'dog' = 16 en COCO)")

print("Cargando clasificador ResNet18...")
csv_file = 'dataset/dogs.csv'
temp_dataset = DogDataset(csv_file, 'dataset', 'train', transform=transform)
num_classes = temp_dataset.num_classes

classifier = get_resnet18(num_classes, pretrained=False)
classifier_path = 'etapa-2/resnet18_best.pth'
if os.path.exists(classifier_path):
    classifier.load_state_dict(torch.load(classifier_path, map_location=device))
    classifier = classifier.to(device)
    classifier.eval()
    print("Clasificador ResNet18 cargado")
else:
    classifier = None
    print("ERROR: ResNet18 no encontrado. Ejecuta 'python entrenar_resnet18.py' en etapa-2")

def detect_and_classify(image):
    if classifier is None:
        return None, "Error: Clasificador no encontrado"

    if isinstance(image, np.ndarray):
        pil_image = Image.fromarray(image)
    else:
        pil_image = image

    results = yolo_model(pil_image, conf=0.25)

    detections = []
    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])
            conf = float(box.conf[0])

            if cls == 16:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                detections.append({
                    'box': (int(x1), int(y1), int(x2), int(y2)),
                    'confidence': conf
                })

    if len(detections) == 0:
        return pil_image, "No se detectaron perros en la imagen"

    output_image = pil_image.copy()
    draw = ImageDraw.Draw(output_image)

    try:
        font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 20)
    except:
        font = ImageFont.load_default()

    results_text = f"Detectados {len(detections)} perro(s):\n\n"

    for i, detection in enumerate(detections):
        x1, y1, x2, y2 = detection['box']
        conf = detection['confidence']

        dog_crop = pil_image.crop((x1, y1, x2, y2))

        if dog_crop.width < 10 or dog_crop.height < 10:
            continue

        img_tensor = transform(dog_crop).unsqueeze(0).to(device)

        with torch.no_grad():
            outputs = classifier(img_tensor)
            _, predicted = outputs.max(1)
            predicted_idx = predicted.item()

            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            confidence = probabilities[0][predicted_idx].item()

            breed = temp_dataset.idx_to_label[predicted_idx]

        draw.rectangle([x1, y1, x2, y2], outline="green", width=3)

        label = f"{breed} ({confidence*100:.1f}%)"

        bbox = draw.textbbox((x1, y1-25), label, font=font)
        draw.rectangle(bbox, fill="green")
        draw.text((x1, y1-25), label, fill="white", font=font)

        results_text += f"Perro {i+1}:\n"
        results_text += f"  Raza: {breed}\n"
        results_text += f"  Confianza: {confidence*100:.1f}%\n"
        results_text += f"  Deteccion: {conf*100:.1f}%\n\n"

    return output_image, results_text

with gr.Blocks() as demo:
    gr.Markdown("# Etapa 3: Deteccion y Clasificacion de Razas de Perros")
    gr.Markdown("Pipeline completo: YOLO detecta perros -> ResNet18 clasifica raza")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Subir imagen")
            detect_btn = gr.Button("Detectar y Clasificar")

        with gr.Column():
            output_image = gr.Image(type="pil", label="Detecciones")
            result_text = gr.Textbox(label="Resultados", lines=10)

    detect_btn.click(
        detect_and_classify,
        inputs=[input_image],
        outputs=[output_image, result_text]
    )

if __name__ == "__main__":
    print("\nLanzando aplicacion Gradio - Etapa 3...")
    demo.launch(
        share=False,
        server_name="127.0.0.1",
        server_port=7861
    )



## Etapa 4: Optimización y Anotación


### anotar_automatico.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()

print("Cargando modelos...")
yolo_model = YOLO('yolov8n.pt')

csv_file = 'dataset/dogs.csv'
temp_dataset = DogDataset(csv_file, 'dataset', 'train', transform=transform)
num_classes = temp_dataset.num_classes

classifier = get_resnet18(num_classes, pretrained=False)
classifier.load_state_dict(torch.load('etapa-2/resnet18_best.pth', map_location=device))
classifier = classifier.to(device)
classifier.eval()

input_folder = "dataset/test"
input_folder = Path(input_folder)

if not input_folder.exists():
    print(f"ERROR: La carpeta {input_folder} no existe")
    sys.exit(1)

image_files = list(input_folder.glob("*.jpg")) + list(input_folder.glob("*.jpeg")) + list(input_folder.glob("*.png"))

if len(image_files) == 0:
    print("ERROR: No se encontraron imagenes en la carpeta")
    sys.exit(1)

print(f"\nEncontradas {len(image_files)} imagenes")

output_yolo = Path('anotaciones_yolo')
output_yolo.mkdir(exist_ok=True)

coco_output = {
    'info': {
        'description': 'Anotaciones automaticas de perros',
        'date_created': datetime.now().isoformat()
    },
    'images': [],
    'annotations': [],
    'categories': []
}

breed_to_category_id = {}
category_id_counter = 1

for breed in temp_dataset.label_to_idx.keys():
    coco_output['categories'].append({
        'id': category_id_counter,
        'name': breed,
        'supercategory': 'dog'
    })
    breed_to_category_id[breed] = category_id_counter
    category_id_counter += 1

annotation_id = 1

print("\nProcesando imagenes...")

for img_idx, img_path in enumerate(image_files):
    print(f"Procesando {img_idx+1}/{len(image_files)}: {img_path.name}")

    pil_image = Image.open(img_path).convert('RGB')
    img_width, img_height = pil_image.size

    coco_output['images'].append({
        'id': img_idx + 1,
        'file_name': img_path.name,
        'width': img_width,
        'height': img_height
    })

    results = yolo_model(pil_image, conf=0.25, verbose=False)

    yolo_annotations = []

    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])
            if cls == 16:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                dog_crop = pil_image.crop((x1, y1, x2, y2))
                if dog_crop.width < 10 or dog_crop.height < 10:
                    continue

                img_tensor = transform(dog_crop).unsqueeze(0).to(device)

                with torch.no_grad():
                    outputs = classifier(img_tensor)
                    _, predicted = outputs.max(1)
                    predicted_idx = predicted.item()
                    breed = temp_dataset.idx_to_label[predicted_idx]

                x_center = ((x1 + x2) / 2) / img_width
                y_center = ((y1 + y2) / 2) / img_height
                width = (x2 - x1) / img_width
                height = (y2 - y1) / img_height

                class_id = predicted_idx
                yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

                coco_bbox = [x1, y1, x2 - x1, y2 - y1]
                coco_output['annotations'].append({
                    'id': annotation_id,
                    'image_id': img_idx + 1,
                    'category_id': breed_to_category_id[breed],
                    'bbox': coco_bbox,
                    'area': (x2 - x1) * (y2 - y1),
                    'iscrowd': 0
                })
                annotation_id += 1

    yolo_file = output_yolo / f"{img_path.stem}.txt"
    with open(yolo_file, 'w') as f:
        f.write('\n'.join(yolo_annotations))

print(f"\nAnotaciones YOLO guardadas en: {output_yolo}/")

coco_file = 'anotaciones_coco.json'
with open(coco_file, 'w') as f:
    json.dump(coco_output, f, indent=2)

print(f"Anotaciones COCO guardadas en: {coco_file}")

print("\n" + "="*80)
print("RESUMEN")
print("="*80)
print(f"Imagenes procesadas:       {len(image_files)}")
print(f"Total de anotaciones:      {annotation_id - 1}")
print(f"Clases detectadas:         {len(set([ann['category_id'] for ann in coco_output['annotations']]))}")
print("="*80)
print("\nFormatos generados:")
print(f"  - YOLOv5 (.txt):         {output_yolo}/")
print(f"  - COCO (.json):          {coco_file}")



### herramienta_anotar.py


In [None]:
print("Esta herramienta requiere interfaz gráfica local y no puede ejecutarse en Colab.")


### optimizar_modelos.py


In [None]:
device = torch.device("cpu")
print(f"Usando CPU para cuantizacion")

print("\nCargando modelo ResNet18 original...")
csv_file = 'dataset/dogs.csv'
test_dataset = DogDataset(csv_file, 'dataset', 'test', transform=get_val_transform())
num_classes = test_dataset.num_classes

model_original = get_resnet18(num_classes, pretrained=False)
model_original.load_state_dict(torch.load('etapa-2/resnet18_best.pth', map_location=device))
model_original = model_original.to(device)
model_original.eval()

print("Creando modelo cuantizado...")
model_quantized = get_resnet18(num_classes, pretrained=False)
model_quantized.load_state_dict(torch.load('etapa-2/resnet18_best.pth', map_location=device))
model_quantized = model_quantized.to(device)
model_quantized.eval()

model_quantized.qconfig = torch.quantization.get_default_qconfig('x86')
model_quantized_prepared = torch.quantization.prepare(model_quantized, inplace=False)

print("Calibrando modelo con datos de validacion...")
val_dataset = DogDataset(csv_file, 'dataset', 'valid', transform=get_val_transform())
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

with torch.no_grad():
    for i, (images, labels, _) in enumerate(val_loader):
        if i >= 5:
            break
        images = images.to(device)
        model_quantized_prepared(images)

print("Convirtiendo a modelo cuantizado...")
model_quantized_final = torch.quantization.convert(model_quantized_prepared, inplace=False)

print("Guardando modelo cuantizado...")
torch.save(model_quantized_final.state_dict(), 'resnet18_quantized.pth')
print("Modelo cuantizado guardado en resnet18_quantized.pth")

print("\nEvaluando modelo original...")
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

def evaluate_model(model, loader, device):
    correct = 0
    total = 0
    inference_times = []

    with torch.no_grad():
        for images, labels, _ in loader:
            images = images.to(device)
            labels = labels.to(device)

            start_time = time.time()
            outputs = model(images)
            inference_time = time.time() - start_time
            inference_times.append(inference_time)

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    accuracy = 100. * correct / total
    avg_inference_time = np.mean(inference_times) * 1000
    return accuracy, avg_inference_time

acc_original, time_original = evaluate_model(model_original, test_loader, device)
print(f"Accuracy: {acc_original:.2f}%")
print(f"Tiempo promedio de inferencia: {time_original:.2f}ms por batch")

print("\nEvaluando modelo cuantizado...")
acc_quantized, time_quantized = evaluate_model(model_quantized_final, test_loader, device)
print(f"Accuracy: {acc_quantized:.2f}%")
print(f"Tiempo promedio de inferencia: {time_quantized:.2f}ms por batch")

print("\n" + "="*80)
print("COMPARACION DE MODELOS")
print("="*80)
print(f"{'Metrica':<30} {'Original':<20} {'Cuantizado':<20} {'Diferencia':<15}")
print("-"*80)
print(f"{'Accuracy (%)':<30} {acc_original:<20.2f} {acc_quantized:<20.2f} {acc_quantized - acc_original:<15.2f}")
print(f"{'Tiempo (ms/batch)':<30} {time_original:<20.2f} {time_quantized:<20.2f} {time_quantized - time_original:<15.2f}")
print(f"{'Speedup':<30} {'-':<20} {time_original/time_quantized:<20.2f}x {'-':<15}")

size_original = os.path.getsize('etapa-2/resnet18_best.pth') / (1024 * 1024)
size_quantized = os.path.getsize('resnet18_quantized.pth') / (1024 * 1024)
print(f"{'Tamano (MB)':<30} {size_original:<20.2f} {size_quantized:<20.2f} {size_quantized - size_original:<15.2f}")
print("="*80)

results = {
    'original': {
        'accuracy': float(acc_original),
        'inference_time_ms': float(time_original),
        'size_mb': float(size_original)
    },
    'quantized': {
        'accuracy': float(acc_quantized),
        'inference_time_ms': float(time_quantized),
        'size_mb': float(size_quantized),
        'speedup': float(time_original / time_quantized)
    }
}

import json
with open('resultados_optimizacion.json', 'w') as f:
    json.dump(results, f, indent=2)

print("\nResultados guardados en resultados_optimizacion.json")

### evaluar_pipeline.py


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando: {device}")

transform = get_val_transform()

print("Cargando modelos...")
yolo_model = YOLO('yolov8n.pt')

csv_file = 'dataset/dogs.csv'
temp_dataset = DogDataset(csv_file, 'dataset', 'train', transform=transform)
num_classes = temp_dataset.num_classes

classifier = get_resnet18(num_classes, pretrained=False)
classifier.load_state_dict(torch.load('etapa-2/resnet18_best.pth', map_location=device))
classifier = classifier.to(device)
classifier.eval()

print("\nCargando anotaciones manuales...")
annotations_file = 'anotaciones_manuales.json'

if not os.path.exists(annotations_file):
    print(f"\nERROR: No se encontro {annotations_file}")
    print("Por favor crea el archivo con las anotaciones manuales de 10 imagenes complejas.")
    print("Formato esperado:")
    print("""
{
  "images": [
    {
      "file": "ruta/imagen1.jpg",
      "annotations": [
        {
          "bbox": [x1, y1, x2, y2],
          "breed": "nombre_raza"
        }
      ]
    }
  ]
}
    """)
    sys.exit(1)

with open(annotations_file, 'r') as f:
    ground_truth = json.load(f)

def calculate_iou(box1, box2):
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    inter_x_min = max(x1_min, x2_min)
    inter_y_min = max(y1_min, y2_min)
    inter_x_max = min(x1_max, x2_max)
    inter_y_max = min(y1_max, y2_max)

    if inter_x_max < inter_x_min or inter_y_max < inter_y_min:
        return 0.0

    inter_area = (inter_x_max - inter_x_min) * (inter_y_max - inter_y_min)
    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)

    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area > 0 else 0.0

print("\nEvaluando pipeline en imagenes anotadas manualmente...")

total_gt_boxes = 0
total_pred_boxes = 0
true_positives = 0
false_positives = 0
false_negatives = 0

iou_threshold = 0.5
ious = []
classification_correct = 0
classification_total = 0

for image_data in ground_truth['images']:
    img_path = image_data['file']
    gt_annotations = image_data['annotations']

    total_gt_boxes += len(gt_annotations)

    pil_image = Image.open(img_path).convert('RGB')
    results = yolo_model(pil_image, conf=0.25, verbose=False)

    pred_boxes = []
    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])
            if cls == 16:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                conf = float(box.conf[0])

                dog_crop = pil_image.crop((int(x1), int(y1), int(x2), int(y2)))
                if dog_crop.width < 10 or dog_crop.height < 10:
                    continue

                img_tensor = transform(dog_crop).unsqueeze(0).to(device)

                with torch.no_grad():
                    outputs = classifier(img_tensor)
                    _, predicted = outputs.max(1)
                    predicted_idx = predicted.item()
                    breed = temp_dataset.idx_to_label[predicted_idx]

                pred_boxes.append({
                    'bbox': [int(x1), int(y1), int(x2), int(y2)],
                    'breed': breed,
                    'conf': conf,
                    'matched': False
                })

    total_pred_boxes += len(pred_boxes)

    for gt_ann in gt_annotations:
        gt_box = gt_ann['bbox']
        gt_breed = gt_ann['breed']
        best_iou = 0.0
        best_match = None

        for pred in pred_boxes:
            if pred['matched']:
                continue

            iou = calculate_iou(gt_box, pred['bbox'])
            if iou > best_iou:
                best_iou = iou
                best_match = pred

        if best_iou >= iou_threshold and best_match is not None:
            true_positives += 1
            best_match['matched'] = True
            ious.append(best_iou)

            if best_match['breed'] == gt_breed:
                classification_correct += 1
            classification_total += 1
        else:
            false_negatives += 1

    for pred in pred_boxes:
        if not pred['matched']:
            false_positives += 1

precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
avg_iou = np.mean(ious) if ious else 0
classification_acc = classification_correct / classification_total if classification_total > 0 else 0

def calculate_ap(precisions, recalls):
    precisions = np.array([0] + precisions + [0])
    recalls = np.array([0] + recalls + [1])

    for i in range(len(precisions) - 2, -1, -1):
        precisions[i] = max(precisions[i], precisions[i + 1])

    indices = np.where(recalls[1:] != recalls[:-1])[0] + 1
    ap = np.sum((recalls[indices] - recalls[indices - 1]) * precisions[indices])
    return ap

all_predictions = []
for image_data in ground_truth['images']:
    img_path = image_data['file']
    pil_image = Image.open(img_path).convert('RGB')
    results = yolo_model(pil_image, conf=0.25, verbose=False)

    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])
            if cls == 16:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                conf = float(box.conf[0])
                all_predictions.append((conf, img_path, [int(x1), int(y1), int(x2), int(y2)]))

all_predictions.sort(key=lambda x: x[0], reverse=True)

tp_list = []
fp_list = []
for conf, img_path, pred_box in all_predictions:
    img_data = next((img for img in ground_truth['images'] if img['file'] == img_path), None)
    if img_data is None:
        fp_list.append(1)
        tp_list.append(0)
        continue

    matched = False
    for gt_ann in img_data['annotations']:
        iou = calculate_iou(gt_ann['bbox'], pred_box)
        if iou >= iou_threshold:
            matched = True
            break

    if matched:
        tp_list.append(1)
        fp_list.append(0)
    else:
        tp_list.append(0)
        fp_list.append(1)

tp_cumsum = np.cumsum(tp_list)
fp_cumsum = np.cumsum(fp_list)

precisions_curve = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-10)
recalls_curve = tp_cumsum / total_gt_boxes if total_gt_boxes > 0 else np.zeros_like(tp_cumsum)

ap = calculate_ap(precisions_curve.tolist(), recalls_curve.tolist())

print("\n" + "="*80)
print("RESULTADOS DE EVALUACION DEL PIPELINE")
print("="*80)
print(f"Imagenes evaluadas:              {len(ground_truth['images'])}")
print(f"Total GT boxes:                  {total_gt_boxes}")
print(f"Total predicciones:              {total_pred_boxes}")
print(f"True Positives:                  {true_positives}")
print(f"False Positives:                 {false_positives}")
print(f"False Negatives:                 {false_negatives}")
print(f"\nPrecision:                       {precision:.4f}")
print(f"Recall:                          {recall:.4f}")
print(f"F1-Score:                        {f1_score:.4f}")
print(f"Average IoU:                     {avg_iou:.4f}")
print(f"mAP@0.5:                         {ap:.4f}")
print(f"\nClassification Accuracy:         {classification_acc:.4f} ({classification_correct}/{classification_total})")
print("="*80)

results_dict = {
    'num_images': len(ground_truth['images']),
    'total_gt_boxes': total_gt_boxes,
    'total_predictions': total_pred_boxes,
    'true_positives': true_positives,
    'false_positives': false_positives,
    'false_negatives': false_negatives,
    'precision': float(precision),
    'recall': float(recall),
    'f1_score': float(f1_score),
    'average_iou': float(avg_iou),
    'mAP@0.5': float(ap),
    'classification_accuracy': float(classification_acc)
}

with open('resultados_evaluacion.json', 'w') as f:
    json.dump(results_dict, f, indent=2)

print("\nResultados guardados en resultados_evaluacion.json")

