<a href="https://colab.research.google.com/github/FernandoSimon22/vision_artificial/blob/main/faster_r_cnn_implementacion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="lX9sE2RPQOCv2R7cn0io")
project = rf.workspace("jota22").project("am_boundingbox")
version = project.version(14)
dataset = version.download("coco")


Collecting roboflow
  Downloading roboflow-1.1.58-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pillow-heif>=0.18.0 (from roboflow)
  Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.58-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.5/84.5 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Downloading Dataset Version Zip in AM_BoundingBox-14 to coco:: 100%|██████████| 6817/6817 [00:00<00:00, 7276.88it/s]





Extracting Dataset Version Zip to AM_BoundingBox-14 in coco:: 100%|██████████| 198/198 [00:00<00:00, 5691.95it/s]


In [None]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  # Convert PIL image to tensor
        return image, target

In [None]:
# Dataset class
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

# Load datasets
train_dataset = get_coco_dataset(
    img_dir="/content/AM_BoundingBox-14/train",
    ann_file="/content/AM_BoundingBox-14/train/_annotations.coco.json"
)


val_dataset = get_coco_dataset(
    img_dir="/content/AM_BoundingBox-14/valid",
    ann_file="/content/AM_BoundingBox-14/valid/_annotations.coco.json"
)



# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [None]:
# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [None]:
# Initialize the model
num_classes = 2 # background + def
model = get_model(num_classes)

The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 187MB/s]


In [None]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    for images, targets in data_loader:
        # Move images to the device
        images = [img.to(device) for img in images]

        # Validate and process targets
        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            for obj in target:
                # Extract bbox
                bbox = obj["bbox"]  # Format: [x, y, width, height]
                x, y, w, h = bbox

                # Ensure the width and height are positive
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])  # Convert to [x_min, y_min, x_max, y_max]
                    labels.append(obj["category_id"])

            # Only process if there are valid boxes
            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])  # Add only valid images

        # Skip iteration if no valid targets
        if not processed_targets:
            continue

        # Ensure images and targets are aligned
        images = valid_images

        # Forward pass
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [None]:
# Training loop
num_epochs = 150
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

    # Save the model's state dictionary every 10 epochs
    if (epoch + 1) % 15 == 0:
        model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
        torch.save(model.state_dict(), model_path)
        print(f"✅ Model saved: {model_path}")


Epoch [0] Loss: 0.9778
Epoch [1] Loss: 1.0183
Epoch [2] Loss: 0.9870
Epoch [3] Loss: 0.9042
Epoch [4] Loss: 0.8954
Epoch [5] Loss: 1.0818
Epoch [6] Loss: 0.9317
Epoch [7] Loss: 1.0317
Epoch [8] Loss: 1.2131
Epoch [9] Loss: 0.9586
Epoch [10] Loss: 1.1286
Epoch [11] Loss: 0.9955
Epoch [12] Loss: 1.0205
Epoch [13] Loss: 0.9911
Epoch [14] Loss: 0.9473
✅ Model saved: fasterrcnn_resnet50_epoch_15.pth
Epoch [15] Loss: 0.9657
Epoch [16] Loss: 1.0088
Epoch [17] Loss: 0.9860
Epoch [18] Loss: 0.9864
Epoch [19] Loss: 0.8537
Epoch [20] Loss: 0.6659
Epoch [21] Loss: 0.8754
Epoch [22] Loss: 0.9667
Epoch [23] Loss: 1.1067
Epoch [24] Loss: 1.1261
Epoch [25] Loss: 0.9425
Epoch [26] Loss: 1.0165
Epoch [27] Loss: 1.0123
Epoch [28] Loss: 1.0575
Epoch [29] Loss: 0.9797
✅ Model saved: fasterrcnn_resnet50_epoch_30.pth
Epoch [30] Loss: 0.9357
Epoch [31] Loss: 1.0195
Epoch [32] Loss: 1.0534
Epoch [33] Loss: 1.2621
Epoch [34] Loss: 0.9765
Epoch [35] Loss: 1.0557
Epoch [36] Loss: 1.0910
Epoch [37] Loss: 1.0311
Ep

In [None]:
# --- MEJORADO: Evaluar todas las imágenes del conjunto de validación y visualizar ---

import os
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from tqdm import tqdm

# --- CONFIGURACIÓN ---
num_classes = 2  # 1 clase + fondo
model_path = "/content/fasterrcnn_resnet50_epoch_90.pth"
img_dir = "/content/AM_BoundingBox-14/test"  # <-- Cambia esto si es necesario
ann_file = "/content/AM_BoundingBox-14/test/_annotations.coco.json"

# --- CARGAR MODELO ---
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = get_model(num_classes)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# --- COCO DATASET & LOADER ---
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(root=img_dir, annFile=ann_file)

dataset = get_coco_dataset(img_dir, ann_file)
val_loader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# --- FUNCIONES DE DIBUJO ---
COCO_CLASSES = {0: "background", 1: "def"}

def get_class_name(class_id):
    return COCO_CLASSES.get(class_id, "unknown")

def draw_boxes(image, prediction, threshold=0.5):
    draw = ImageDraw.Draw(image)
    boxes = prediction["boxes"].cpu().numpy()
    labels = prediction["labels"].cpu().numpy()
    scores = prediction["scores"].cpu().numpy()

    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:
            x_min, y_min, x_max, y_max = box
            draw.rectangle([x_min, y_min, x_max, y_max], outline="red", width=2)
            draw.text((x_min, y_min), f"{get_class_name(label)} ({score:.2f})", fill="red")
    return image

# --- EVALUAR Y GUARDAR RESULTADOS DE TODAS LAS IMÁGENES ---
out_dir = "detections"
os.makedirs(out_dir, exist_ok=True)

with torch.no_grad():
    for idx, (images, targets) in enumerate(tqdm(val_loader, desc="Evaluando")):
        image_tensor = images[0].to(device)
        output = model([image_tensor])[0]

        # Cargar imagen original para visualizar
        image_path = dataset.ids[idx]
        image_file = dataset.coco.loadImgs(image_path)[0]['file_name']
        image = Image.open(os.path.join(img_dir, image_file)).convert("RGB")

        # Dibujar predicciones
        result_img = draw_boxes(image.copy(), output)

        # Guardar
        result_img.save(os.path.join(out_dir, f"prediction_{idx+1}.jpg"))