<a href="https://colab.research.google.com/github/FernandoSimon22/vision_artificial/blob/main/Faster_CBAM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 📘 Faster R-CNN + CBAM con 1 clase ("def")
Entrenamiento completo con dataset descargado desde Roboflow.

In [1]:
!pip install torch torchvision pycocotools opencv-python matplotlib tqdm roboflow -q

import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
from torchvision import transforms as T
import matplotlib.pyplot as plt
from PIL import Image

!pip install pycocotools



[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m105.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m104.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
import torch.nn as nn

# 🔧 CBAM Módulo
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(
            nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        return self.sigmoid(self.fc(self.avg_pool(x)) + self.fc(self.max_pool(x))) * x

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x_cat = torch.cat([avg_out, max_out], dim=1)
        return self.sigmoid(self.conv(x_cat)) * x

class CBAM(nn.Module):
    def __init__(self, in_planes, ratio=16, kernel_size=7):
        super().__init__()
        self.ca = ChannelAttention(in_planes, ratio)
        self.sa = SpatialAttention(kernel_size)
    def forward(self, x):
        return self.sa(self.ca(x))

# ⚙️ Cargar backbone con CBAM insertado correctamente
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
import types

def insert_cbam_in_layer2(backbone, device):
    original_layer2 = backbone.body.layer2
    cbam = CBAM(512).to(device)

    def new_forward(self, x):
        x = original_layer2(x)
        x = cbam(x)
        return x

    backbone.body.layer2.forward = types.MethodType(new_forward, backbone.body.layer2)
    return backbone

# 📦 Crea el modelo con CBAM en layer2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
backbone = resnet_fpn_backbone('resnet50', pretrained=True)
backbone = insert_cbam_in_layer2(backbone, device)
model = FasterRCNN(backbone, num_classes=2)
model.to(device)

print("✅ Modelo Faster R-CNN con CBAM integrado en layer2")



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 233MB/s]


✅ Modelo Faster R-CNN con CBAM integrado en layer2


In [12]:
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection import FasterRCNN
import torch.nn as nn

backbone = resnet_fpn_backbone('resnet50', pretrained=True)
backbone.body.layer2 = nn.Sequential(
    backbone.body.layer2,
    CBAM(512)
)

Using 'backbone_name' as positional parameter(s) is deprecated since 0.13 and may be removed in the future. Please use keyword parameter(s) instead.
The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.


In [13]:
model = FasterRCNN(backbone, num_classes=2)
print("✅ Modelo con CBAM cargado")

✅ Modelo con CBAM cargado


In [11]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="lX9sE2RPQOCv2R7cn0io")
project = rf.workspace("jota22").project("am_boundingbox")
version = project.version(14)
dataset = version.download("coco")

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

loading Roboflow workspace...
loading Roboflow project...


In [14]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    epoch_loss = 0.0

    for images, targets in data_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

    print(f"📘 Epoch [{epoch+1}] | Loss: {epoch_loss:.4f}")



In [15]:
class CocoDetectionFasterRCNN(CocoDetection):
    def __getitem__(self, idx):
        img, target = super().__getitem__(idx)
        boxes = []
        labels = []
        for obj in target:
            x, y, w, h = obj['bbox']
            if w > 0 and h > 0:
                boxes.append([x, y, x + w, y + h])
                labels.append(obj['category_id'])
        if len(boxes) == 0:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.tensor([], dtype=torch.int64)
        else:
            boxes = torch.tensor(boxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx]),
            "area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
            "iscrowd": torch.zeros((len(labels),), dtype=torch.int64)
        }
        return img, target

class SimpleTransform:
    def __call__(self, image, target=None):
        return T.ToTensor()(image), target

def get_coco_dataset(img_dir, ann_file):
    return CocoDetectionFasterRCNN(
        root=img_dir,
        annFile=ann_file,
        transforms=SimpleTransform()
    )

train_dataset = get_coco_dataset(
    img_dir="/content/AM_BoundingBox-14/train",
    ann_file="/content/AM_BoundingBox-14/train/_annotations.coco.json"
)

val_dataset = get_coco_dataset(
    img_dir="/content/AM_BoundingBox-14/valid",
    ann_file="/content/AM_BoundingBox-14/valid/_annotations.coco.json"
)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

loading annotations into memory...
Done (t=0.20s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [16]:
num_epochs = 150
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()
    if (epoch + 1) % 15 == 0:
        model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
        torch.save(model.state_dict(), model_path)
        print(f"✅ Model saved: {model_path}")


📘 Epoch [1] | Loss: 35.6812
📘 Epoch [2] | Loss: 35.6715
📘 Epoch [3] | Loss: 35.6869
📘 Epoch [4] | Loss: 35.7342
📘 Epoch [5] | Loss: 35.6432
📘 Epoch [6] | Loss: 35.6836
📘 Epoch [7] | Loss: 35.7144
📘 Epoch [8] | Loss: 35.7020
📘 Epoch [9] | Loss: 35.6646
📘 Epoch [10] | Loss: 35.7124
📘 Epoch [11] | Loss: 35.6926
📘 Epoch [12] | Loss: 35.6798
📘 Epoch [13] | Loss: 35.6358
📘 Epoch [14] | Loss: 35.7107
📘 Epoch [15] | Loss: 35.6733
✅ Model saved: fasterrcnn_resnet50_epoch_15.pth
📘 Epoch [16] | Loss: 35.6559
📘 Epoch [17] | Loss: 35.6570
📘 Epoch [18] | Loss: 35.6936
📘 Epoch [19] | Loss: 35.6683
📘 Epoch [20] | Loss: 35.6808
📘 Epoch [21] | Loss: 35.6924
📘 Epoch [22] | Loss: 35.6406
📘 Epoch [23] | Loss: 35.6729
📘 Epoch [24] | Loss: 35.6740
📘 Epoch [25] | Loss: 35.6699
📘 Epoch [26] | Loss: 35.6384
📘 Epoch [27] | Loss: 35.6607
📘 Epoch [28] | Loss: 35.6895
📘 Epoch [29] | Loss: 35.6609
📘 Epoch [30] | Loss: 35.7007
✅ Model saved: fasterrcnn_resnet50_epoch_30.pth
📘 Epoch [31] | Loss: 35.7333
📘 Epoch [32] |

In [17]:
# 🔍 Evaluación con pycocotools sobre el conjunto de test
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json
import os

# Cargar dataset de test
from torchvision.datasets import CocoDetection

test_img_dir = "/content/AM_BoundingBox-14/test"
test_ann_file = "/content/AM_BoundingBox-14/test/_annotations.coco.json"

class CocoDetectionTest(CocoDetection):
    def __getitem__(self, idx):
        img, target = super().__getitem__(idx)
        image_id = target[0]['image_id'] if target else idx
        return img, image_id

test_dataset = CocoDetectionTest(test_img_dir, test_ann_file, transform=T.ToTensor())
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model.eval()
results = []

with torch.no_grad():
    for images, image_ids in test_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)

        for output, image_id in zip(outputs, image_ids):
            boxes = output['boxes'].cpu().numpy()
            scores = output['scores'].cpu().numpy()
            labels = output['labels'].cpu().numpy()

            for box, score, label in zip(boxes, scores, labels):
                x_min, y_min, x_max, y_max = box
                results.append({
                    "image_id": int(image_id),
                    "category_id": int(label),
                    "bbox": [float(x_min), float(y_min), float(x_max - x_min), float(y_max - y_min)],
                    "score": float(score)
                })

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [18]:
# Guardar resultados en JSON para COCOeval
with open("results_coco.json", "w") as f:
    json.dump(results, f)

coco_gt = COCO(test_ann_file)
coco_dt = coco_gt.loadRes("results_coco.json")

coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=1.74s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet