In [1]:
import os
import shutil
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm
from pycocotools.coco import COCO

In [2]:
# Configurações iniciais
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_clusters = 80  # Número de classes do COCO

In [3]:
# Configurações do COCO
img_dir = os.path.join("F:/COCO-Dataset/", "train2017/train2017/")
ann_file = os.path.join("F:/COCO-Dataset/", "annotations2017/annotations2017/instances_train2017.json")

In [4]:
# Pastas de saída
output_dirs = {
    "raizes": {
        "images": "F:/COCO-Dataset/train2017/clustering/train/images",
        "labels": "F:/COCO-Dataset/train2017/clustering/train/labels"
    },
    "fronteiras": {
        "images": "F:/COCO-Dataset/train2017/clustering/train/images",
        "labels": "F:/COCO-Dataset/train2017/clustering/train/labels"
    },
    "restante": {
        "images": "F:/COCO-Dataset/train2017/clustering/pool/images",
        "labels": "F:/COCO-Dataset/train2017/clustering/pool/labels"
    }
}


In [5]:
# Criar estrutura de diretórios
for group in output_dirs.values():
    os.makedirs(group["images"], exist_ok=True)
    os.makedirs(group["labels"], exist_ok=True)

# 1. Carregar dataset COCO
coco = COCO(ann_file)
img_ids = coco.getImgIds()

# Mapeamento de IDs COCO para YOLO
categories = coco.loadCats(coco.getCatIds())
categories.sort(key=lambda x: x['id'])
coco_id_to_yolo_id = {cat['id']: idx for idx, cat in enumerate(categories)}

print(f"Total de imagens: {len(img_ids)}")
print(f"Total de categorias: {len(categories)}")

# 2. Carregar modelo ResNet-18
model = models.resnet18(pretrained=True)
model = nn.Sequential(*list(model.children())[:-1])  # Remove a última camada
model.to(device).eval()

# Transformações das imagens
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

loading annotations into memory...
Done (t=11.53s)
creating index...
index created!
Total de imagens: 118287
Total de categorias: 80




In [6]:
# 3. Função para extrair embeddings
def extract_embeddings(img_paths):
    embeddings = []
    valid_indices = []
    model.to(device)
    for idx, path in enumerate(tqdm(img_paths, desc="Extraindo embeddings")):
        try:
            img = Image.open(path).convert('RGB')
            img_tensor = transform(img).unsqueeze(0).to(device)
            
            with torch.no_grad():
                features = model(img_tensor)
            
            embeddings.append(features.squeeze().cpu().numpy().flatten())
            valid_indices.append(idx)
        except Exception as e:
            print(f"Erro ao processar {path}: {str(e)}")
    
    return np.array(embeddings), valid_indices

In [7]:
# 4. Obter caminhos das imagens
img_paths = []
valid_img_ids = []

for img_id in img_ids:
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(img_dir, img_info["file_name"])
    if os.path.exists(img_path):
        img_paths.append(img_path)
        valid_img_ids.append(img_id)

print(f"Imagens válidas: {len(img_paths)}/{len(img_ids)}")

Imagens válidas: 118287/118287


In [8]:
# 5. Extrair embeddings (usando GPU)
embeddings, valid_indices = extract_embeddings(img_paths)
valid_img_ids = [valid_img_ids[i] for i in valid_indices]
img_paths = [img_paths[i] for i in valid_indices]

Extraindo embeddings: 100%|██████████| 118287/118287 [47:16<00:00, 41.71it/s] 


In [9]:
# 6. Clusterização com K-Means
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10)
clusters = kmeans.fit_predict(embeddings)
centroids = kmeans.cluster_centers_

In [10]:
# 7. Identificar raízes e fronteiras
raizes = {}
fronteiras = {i: [] for i in range(num_clusters)}
resto = []

for cluster_id in range(num_clusters):
    # Índices das imagens deste cluster
    indices = np.where(clusters == cluster_id)[0]
    
    if len(indices) == 0:
        continue
    
    # Calcular distâncias para o centróide
    cluster_embeddings = embeddings[indices]
    distancias = np.linalg.norm(cluster_embeddings - centroids[cluster_id], axis=1)
    
    # 7.1 Encontrar raiz (ponto mais próximo do centróide)
    raiz_idx = indices[np.argmin(distancias)]
    raizes[cluster_id] = raiz_idx
    
    # 7.2 Calcular distâncias para outros centróides
    outras_distancias = []
    for other_id in range(num_clusters):
        if other_id != cluster_id:
            dist = np.linalg.norm(cluster_embeddings - centroids[other_id], axis=1)
            outras_distancias.append(dist)
    
    # 7.3 Encontrar pontos de fronteira (mais próximos de outros clusters)
    min_outras_distancias = np.min(outras_distancias, axis=0)
    razoes = distancias / min_outras_distancias
    
    # Selecionar 20% com maiores razões (mais próximos da fronteira)
    n_fronteira = max(1, int(0.2 * len(indices)))
    fronteira_indices = razoes.argsort()[-n_fronteira:]
    
    # 7.4 Classificar imagens
    for i, idx in enumerate(indices):
        if idx == raiz_idx:
            continue  # Raiz já foi registrada
        elif i in fronteira_indices:
            fronteiras[cluster_id].append(idx)
        else:
            resto.append(idx)

In [11]:
# 8. Função para processar grupos de imagens
def process_group(img_indices, group_name):
    image_dir = output_dirs[group_name]["images"]
    label_dir = output_dirs[group_name]["labels"]
    
    for idx in tqdm(img_indices, desc=f"Processando {group_name}"):
        img_id = valid_img_ids[idx]
        img_info = coco.loadImgs(img_id)[0]
        img_file = img_info["file_name"]
        img_path = os.path.join(img_dir, img_file)
        
        # Copiar imagem
        shutil.copy(img_path, os.path.join(image_dir, img_file))
        
        # Gerar rótulo YOLO
        label_file = img_file.replace(".jpg", ".txt")
        label_path = os.path.join(label_dir, label_file)
        
        ann_ids = coco.getAnnIds(imgIds=img_id)
        annotations = coco.loadAnns(ann_ids)
        
        with open(label_path, "w") as f:
            for ann in annotations:
                if "bbox" not in ann or ann["area"] <= 0:
                    continue
                
                # Converter ID COCO para YOLO
                yolo_class_id = coco_id_to_yolo_id.get(ann['category_id'], -1)
                if yolo_class_id == -1:
                    continue
                
                # Converter bbox
                x, y, w, h = ann["bbox"]
                img_width = img_info["width"]
                img_height = img_info["height"]
                
                # Normalizar coordenadas
                x_center = (x + w / 2) / img_width
                y_center = (y + h / 2) / img_height
                w_norm = w / img_width
                h_norm = h / img_height
                
                # Escrever no formato YOLO
                f.write(f"{yolo_class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")

In [12]:
# 9. Processar cada grupo
# Raízes (uma por cluster)
process_group(raizes.values(), "raizes")

# Fronteiras (todos os clusters juntos)
all_fronteiras = [idx for indices in fronteiras.values() for idx in indices]
process_group(all_fronteiras, "fronteiras")

# Restante
process_group(resto, "restante")

Processando raizes: 100%|██████████| 80/80 [00:01<00:00, 68.87it/s]
Processando fronteiras: 100%|██████████| 23624/23624 [08:40<00:00, 45.36it/s] 
Processando restante: 100%|██████████| 94583/94583 [38:55<00:00, 40.50it/s]  
