## Treinamento do Modelo - 15 Épocas

In [None]:
import os
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import v2 as T
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import json
from sklearn.model_selection import train_test_split

print(f"Versão do PyTorch: {torch.__version__}")
print(f"Versão do Torchvision: {torchvision.__version__}")

# --- CARREGANDO O DATASET ---
class ArthropodDetectionDataset(Dataset):
    def __init__(self, image_paths, annotation_paths, transforms):
        self.transforms = transforms
        self.image_paths = image_paths
        self.annotation_paths = annotation_paths
        self.class_map = {'Araneae': 1, 'Coleoptera': 2, 'Diptera': 3, 'Hemiptera': 4, 'Hymenoptera': 5, 'Lepidoptera': 6, 'Odonata': 7}

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        annotation_path = self.annotation_paths[idx]
        img = Image.open(img_path).convert("RGB")
        boxes, labels = self.parse_annotation(annotation_path)

        if not boxes:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros(0, dtype=torch.int64)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}

        if self.transforms:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.image_paths)

    # Função que abre o arquivo JSON e extrai as coordenadas
    def parse_annotation(self, path):
        boxes, labels = [], []
        try:
            with open(path) as f: data = json.load(f)
            for region in data['regions']:
                class_name = region['tags'][0]
                if class_name in self.class_map:
                    label_id = self.class_map[class_name]
                    bbox_data = region['boundingBox']
                    xmin, ymin = bbox_data['left'], bbox_data['top']
                    xmax, ymax = xmin + bbox_data['width'], ymin + bbox_data['height']
                    boxes.append([xmin, ymin, xmax, ymax])
                    labels.append(label_id)
        except Exception:
            pass
        return boxes, labels

# Define o pré-processamento para o treino, usamos uma simples inversão horizontal
def get_transform(train):
    transforms = []
    transforms.append(T.ToImage())
    transforms.append(T.ToDtype(torch.float, scale=True))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

# Definição da arquitetura usando um Faster R-CNN pré-treinado
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

def collate_fn(batch):
    return tuple(zip(*batch))

# Função contém a lógica de uma única época de treinamento.
def train_one_epoch(model, optimizer, data_loader, device, epoch_num):
    model.train()
    print_freq = 50
    for i, (images, targets) in enumerate(data_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if (i+1) % print_freq == 0:
            print(f"Época [{epoch_num}] | Batch [{i+1}/{len(data_loader)}] | Loss: {losses.item():.4f}")

# --- PARÂMETROS PRINCIPAIS ---
DATASET_PATH = '/kaggle/input/arthropod-taxonomy-orders-object-detection-dataset/ArTaxOr'

CHECKPOINT_DIR = '/kaggle/working/'
ultimo_checkpoint_salvo = None
num_epochs = 15
batch_size = 4

all_image_paths = []
all_annotation_paths = []

print("Mapeando todos os arquivos do dataset...")
for class_folder in sorted(os.listdir(DATASET_PATH)):
    class_path = os.path.join(DATASET_PATH, class_folder)
    if not os.path.isdir(class_path): continue
    annotation_folder_path = os.path.join(class_path, 'annotations')
    if not os.path.exists(annotation_folder_path): continue

    annotation_map = {}
    for json_file in os.listdir(annotation_folder_path):
        json_path = os.path.join(annotation_folder_path, json_file)
        try:
            with open(json_path) as f: data = json.load(f)
            asset_name = data['asset']['name']
            annotation_map[asset_name] = json_path
        except Exception: pass

    for img_file in os.listdir(class_path):
        if img_file.lower().endswith(('.jpg', '.jpeg', '.png')) and img_file in annotation_map:
            all_image_paths.append(os.path.join(class_path, img_file))
            all_annotation_paths.append(annotation_map[img_file])

print(f"Mapeamento concluído! Total de {len(all_image_paths)} imagens encontradas.")

# Dividindo os caminhos (90% treino, 10% teste)
paths = list(zip(all_image_paths, all_annotation_paths))
train_paths, test_paths = train_test_split(paths, test_size=0.1, random_state=42)
train_img_paths, train_ann_paths = zip(*train_paths)
test_img_paths, test_ann_paths = zip(*test_paths)

print(f"Dados divididos em: {len(train_img_paths)} para treino e {len(test_img_paths)} para teste.")

# --- PREPARAÇÃO DOS DATASETS E DATALOADERS ---
dataset_train = ArthropodDetectionDataset(
    image_paths=list(train_img_paths),
    annotation_paths=list(train_ann_paths),
    transforms=get_transform(train=True)
)
dataset_test = ArthropodDetectionDataset(
    image_paths=list(test_img_paths),
    annotation_paths=list(test_ann_paths),
    transforms=get_transform(train=False)
)

data_loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, num_workers=2)
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=2)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Usando o dispositivo: {device}")

# --- CRIAÇÃO DO MODELO E OTIMIZADOR ---
num_classes = 8
model = get_model(num_classes).to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# --- CARREGAMENTO DO CHECKPOINT ---
start_epoch = 0
if ultimo_checkpoint_salvo and os.path.exists(ultimo_checkpoint_salvo):
    print(f"Carregando checkpoint de '{ultimo_checkpoint_salvo}'...")
    checkpoint = torch.load(ultimo_checkpoint_salvo)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    print(f"Checkpoint carregado. Recomeçando o treinamento da época {start_epoch + 1}.")
else:
    print("Nenhum checkpoint válido encontrado. Começando o treinamento do zero.")

# --- LOOP DE TREINAMENTO ---
print("\n--- INICIANDO O TREINAMENTO ---")
for epoch in range(start_epoch, num_epochs):
    epoch_num = epoch + 1
    train_one_epoch(model, optimizer, data_loader_train, device, epoch_num)
    print(f"--- FIM DA ÉPOCA {epoch_num} ---\n")

    checkpoint_save_path = os.path.join(CHECKPOINT_DIR, f'modelo_epoca_{epoch_num}.pth')
    torch.save({
        'epoch': epoch_num,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, checkpoint_save_path)
    print(f"Checkpoint da época {epoch_num} salvo em: {checkpoint_save_path}")

print("--- TREINAMENTO CONCLUÍDO! ---")

## Teste do Modelo - Modelo 6

In [None]:
import os
import torch
import torchvision
import numpy as np
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import v2 as T
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import json
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torchvision.ops import box_iou, nms

print(f"Versão do PyTorch: {torch.__version__}")
print(f"Versão do Torchvision: {torchvision.__version__}")

class ArthropodDetectionDataset(Dataset):
    def __init__(self, image_paths, annotation_paths, transforms):
        self.transforms = transforms
        self.image_paths = image_paths
        self.annotation_paths = annotation_paths
        self.class_map = {'Araneae': 1, 'Coleoptera': 2, 'Diptera': 3, 'Hemiptera': 4, 'Hymenoptera': 5, 'Lepidoptera': 6, 'Odonata': 7}

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        annotation_path = self.annotation_paths[idx]
        img = Image.open(img_path).convert("RGB")
        boxes, labels = self.parse_annotation(annotation_path)

        if not boxes:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros(0, dtype=torch.int64)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([idx])}

        if self.transforms:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.image_paths)

    def parse_annotation(self, path):
        boxes, labels = [], []
        try:
            with open(path) as f: data = json.load(f)
            for region in data['regions']:
                class_name = region['tags'][0]
                if class_name in self.class_map:
                    label_id = self.class_map[class_name]
                    bbox_data = region['boundingBox']
                    xmin, ymin = bbox_data['left'], bbox_data['top']
                    xmax, ymax = xmin + bbox_data['width'], ymin + bbox_data['height']
                    boxes.append([xmin, ymin, xmax, ymax])
                    labels.append(label_id)
        except Exception:
            pass
        return boxes, labels

def get_transform(train):
    transforms = []
    transforms.append(T.ToImage())
    transforms.append(T.ToDtype(torch.float, scale=True))
    return T.Compose(transforms)

def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=None)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

def collate_fn(batch):
    return tuple(zip(*batch))

DATASET_PATH = '/kaggle/input/arthropod-taxonomy-orders-object-detection-dataset/ArTaxOr'

all_image_paths = []
all_annotation_paths = []
print("Mapeando todos os arquivos do dataset...")
for class_folder in sorted(os.listdir(DATASET_PATH)):
    class_path = os.path.join(DATASET_PATH, class_folder)
    if not os.path.isdir(class_path): continue
    annotation_folder_path = os.path.join(class_path, 'annotations')
    if not os.path.exists(annotation_folder_path): continue
    annotation_map = {}
    for json_file in os.listdir(annotation_folder_path):
        json_path = os.path.join(annotation_folder_path, json_file)
        try:
            with open(json_path) as f: data = json.load(f)
            asset_name = data['asset']['name']
            annotation_map[asset_name] = json_path
        except Exception: pass
    for img_file in os.listdir(class_path):
        if img_file.lower().endswith(('.jpg', '.jpeg', '.png')) and img_file in annotation_map:
            all_image_paths.append(os.path.join(class_path, img_file))
            all_annotation_paths.append(annotation_map[img_file])

print("Recriando a divisão de treino/teste...")
paths = list(zip(all_image_paths, all_annotation_paths))
_, test_paths = train_test_split(paths, test_size=0.1, random_state=42)
test_img_paths, test_ann_paths = zip(*test_paths)

dataset_test = ArthropodDetectionDataset(
    image_paths=list(test_img_paths),
    annotation_paths=list(test_ann_paths),
    transforms=get_transform(train=False)
)
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn, num_workers=2)
print(f"Conjunto de teste carregado com {len(dataset_test)} imagens.")

CHECKPOINT_PATH = '/kaggle/input/modelo-6/modelo_epoca_6.pth'

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 8

model = get_model(num_classes)
model.to(device)

if os.path.exists(CHECKPOINT_PATH):
    print(f"\nCarregando checkpoint de '{CHECKPOINT_PATH}'...")
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    print("Modelo da época 6 carregado com sucesso!")
else:
    raise FileNotFoundError(f"ERRO: Checkpoint '{CHECKPOINT_PATH}' não encontrado. Verifique o caminho no painel 'Input'.")

def evaluate_model_refined(model, data_loader, device, score_threshold=0.5, iou_thresholds=np.arange(0.5, 1.0, 0.05)):
    model.eval()
    aps_per_threshold = {thr: [] for thr in iou_thresholds}
    all_precisions, all_recalls, all_f1s = [], [], []

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Avaliando modelo refinado", total=len(data_loader)):
            images = [img.to(device) for img in images]
            outputs = model(images)
            outputs = [{k: v.to("cpu") for k, v in t.items()} for t in outputs]
            targets = [{k: v.to("cpu") for k, v in t.items()} for t in targets]

            for output, target in zip(outputs, targets):
                gt_boxes = target["boxes"]
                scores = output["scores"]
                pred_boxes = output["boxes"]

                keep = scores >= score_threshold
                pred_boxes = pred_boxes[keep]
                scores = scores[keep]
                
                if len(pred_boxes) > 0:
                    keep_idx = nms(pred_boxes, scores, 0.5)
                    pred_boxes = pred_boxes[keep_idx]

                if len(gt_boxes) == 0 and len(pred_boxes) == 0:
                    continue

                for thr in iou_thresholds:
                    if len(pred_boxes) == 0 or len(gt_boxes) == 0:
                         aps_per_threshold[thr].append(0)
                         continue
                    
                    ious = box_iou(pred_boxes, gt_boxes)
                    tp, fp = 0, 0
                    matched_gt = set()

                    if ious.numel() == 0:
                        fp = len(pred_boxes)
                    else:
                        for i in range(len(pred_boxes)):
                            iou_vals = ious[i]
                            max_iou, max_idx = iou_vals.max(dim=0)
                            if max_iou >= thr and max_idx.item() not in matched_gt:
                                tp += 1
                                matched_gt.add(max_idx.item())
                            else:
                                fp += 1
                    
                    fn = len(gt_boxes) - tp
                    precision = tp / (tp + fp + 1e-6)
                    recall = tp / (tp + fn + 1e-6)
                    aps_per_threshold[thr].append(precision)
    
    mAP_50 = np.mean(aps_per_threshold.get(0.5, [0]))
    mAP_50_95_list = [np.mean(v) for v in aps_per_threshold.values() if v]
    mAP_50_95 = np.mean(mAP_50_95_list) if mAP_50_95_list else 0

    mean_precision = np.mean([p for thr_aps in aps_per_threshold.values() for p in thr_aps])
    
    results = {
        "mAP@50": mAP_50,
        "mAP@50:95": mAP_50_95
    }
    return results

refined_results = evaluate_model_refined(model, data_loader_test, device, score_threshold=0.5)

print("\n==================== RESULTADOS REFINADOS (Época 6) ====================")
print(f"mAP@50:     {refined_results['mAP@50']:.4f}")
print(f"mAP@50:95:  {refined_results['mAP@50:95']:.4f}")
print("=======================================================================")

## Exemplo do Modelo - Modelo 6

In [None]:
import os
import torch
import torchvision
import random
import numpy as np
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import v2 as T
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import json
from torchvision.utils import draw_bounding_boxes
import matplotlib.font_manager as fm
import matplotlib.patches as patches
print(f"Versão do PyTorch: {torch.__version__}")
print(f"Versão do Torchvision: {torchvision.__version__}")

class SimpleImageDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transforms:
            img, _ = self.transforms(img, None)
        return img, img_path

    def __len__(self):
        return len(self.image_paths)

def get_transform(train):
    transforms = []
    transforms.append(T.ToImage())
    transforms.append(T.ToDtype(torch.float, scale=True))
    return T.Compose(transforms)

def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=None)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

DATASET_PATH = '/kaggle/input/arthropod-taxonomy-orders-object-detection-dataset/ArTaxOr'

CHECKPOINT_PATH = '/kaggle/input/modelo-6/modelo_epoca_6.pth'

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 8

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 8

all_image_paths = []
for class_folder in sorted(os.listdir(DATASET_PATH)):
    class_path = os.path.join(DATASET_PATH, class_folder)
    if not os.path.isdir(class_path): continue
    for img_file in os.listdir(class_path):
        if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
            all_image_paths.append(os.path.join(class_path, img_file))

_, test_img_paths = train_test_split(all_image_paths, test_size=0.1, random_state=42)

model = get_model(num_classes)
model.to(device)

if os.path.exists(CHECKPOINT_PATH):
    print(f"Carregando checkpoint de '{CHECKPOINT_PATH}'...")
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval() # Colocar o modelo em modo de avaliação!
    print("Modelo carregado com sucesso!")
else:
    raise FileNotFoundError(f"ERRO: Checkpoint '{CHECKPOINT_PATH}' não encontrado.")

class_map_inv = {1: 'Araneae', 2: 'Coleoptera', 3: 'Diptera', 4: 'Hemiptera', 5: 'Hymenoptera', 6: 'Lepidoptera', 7: 'Odonata'}

random_image_path = random.choice(test_img_paths)
print(f"\nImagem selecionada para detecção: {os.path.basename(random_image_path)}")

original_image = Image.open(random_image_path).convert("RGB")
transform = get_transform(train=False)
image_tensor, _ = transform(original_image, None)
image_tensor = image_tensor.to(device)


with torch.no_grad():
    prediction = model([image_tensor])

pred_boxes = prediction[0]['boxes'].cpu()
pred_labels = prediction[0]['labels'].cpu()
pred_scores = prediction[0]['scores'].cpu()

score_threshold = 0.5
keep = pred_scores > score_threshold
final_boxes = pred_boxes[keep]
final_labels = pred_labels[keep]
final_scores = pred_scores[keep]

labels_with_scores = [
    f"{class_map_inv.get(label.item(), 'Desconhecido')}: {score.item():.2f}"
    for label, score in zip(final_labels, final_scores)
]

fig, ax = plt.subplots(figsize=(12, 12))

ax.imshow(original_image)

if len(final_boxes) > 0:
    for box, label in zip(final_boxes, labels_with_scores):
        xmin, ymin, xmax, ymax = box
        
        width = xmax - xmin
        height = ymax - ymin
        
        rect = patches.Rectangle(
            (xmin, ymin), 
            width, 
            height, 
            linewidth=2, 
            edgecolor='blue', 
            facecolor='none'
        )
        
        ax.add_patch(rect)
        
        ax.text(
            xmin,
            ymin,
            label,
            color='white',
            fontsize=12,
            bbox=dict(facecolor='blue', alpha=0.6, pad=1),
            verticalalignment='bottom'
        )
else:
    print("Nenhum artrópode detectado com confiança >", score_threshold)

ax.axis('off')
plt.title(f"Detecções em: {os.path.basename(random_image_path)}")
plt.tight_layout()
plt.show()