In [1]:
!pip install torch torchvision torchaudio
!pip install requests pydantic opencv-python gdown
!pip install git+https://github.com/facebookresearch/detectron2.git

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-bujjdd4l
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-bujjdd4l
  Resolved https://github.com/facebookresearch/detectron2.git to commit ebe8b45437f86395352ab13402ba45b75b4d1ddb
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import os
import subprocess
import torch
import json
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2 import model_zoo


In [4]:
class FruitDetector:
    """
    A class used to download, filter, and train a fruit detection model using Detectron2.

    Attributes:
    -----------
    train_dataset_path : str
        The path to the training dataset directory.
    test_dataset_path : str
        The path to the testing dataset directory.
    output_dir : str
        The directory where the model and results will be saved.
    num_classes : int
        Number of target classes for detection.
    base_lr : float
        The base learning rate for the model.
    max_iter : int
        Maximum number of iterations for training.
    batch_size : int
        The batch size used for training.
    num_workers : int
        Number of worker threads used for loading data.
    device : str
        Device used for training and inference (CPU/GPU).
    classes_to_keep : list
        List of classes to keep from the dataset.
    """

    def __init__(self, train_dataset_path, test_dataset_path,
                 output_dir="frutas_deteccion_objetos", num_classes=4, base_lr=0.0025,
                 max_iter=2500, batch_size=2, num_workers=2, device=None):
        """
        Initializes the FruitDetector class with dataset paths, training parameters, and filtering setup.

        Parameters:
        -----------
        train_dataset_path : str
            The path to the training dataset directory.
        test_dataset_path : str
            The path to the testing dataset directory.
        output_dir : str
            The directory where the model and results will be saved.
        num_classes : int
            Number of target classes for detection.
        base_lr : float
            The base learning rate for the model.
        max_iter : int
            Maximum number of iterations for training.
        batch_size : int
            The batch size used for training.
        num_workers : int
            Number of worker threads used for loading data.
        device : str
            Device used for training and inference (CPU/GPU). If None, automatically detects GPU or CPU.
        """
        self.train_dataset_path = train_dataset_path
        self.test_dataset_path = test_dataset_path
        self.num_classes = num_classes
        self.output_dir = output_dir
        self.base_lr = base_lr
        self.max_iter = max_iter
        self.batch_size = batch_size
        self.num_workers = num_workers

        # Detect device if not manually specified
        if device:
            self.device = device
        else:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"

        print(f"Using device: {self.device}")

        self.classes_to_keep = ['apple', 'banana', 'orange', 'pear']

        self._download_dataset()
        self._filter_dataset()
        self._register_datasets()
        self.cfg = self._setup_cfg()

    def _download_dataset(self):
        """
        Downloads the dataset from Roboflow if it's not available locally.
        """
        if not os.path.exists(self.train_dataset_path) or not os.path.exists(self.test_dataset_path):
            print("Descargando el dataset desde Roboflow...")
            subprocess.run(
                'curl -L "https://universe.roboflow.com/ds/GcqspXimqf?key=CjGlzoKQxq" > roboflow.zip',
                shell=True)
            subprocess.run("unzip roboflow.zip", shell=True)
            os.remove("roboflow.zip")
            print("Dataset descargado, extraído y el archivo .zip eliminado.")
        else:
            print("El dataset ya está disponible.")

    def _filter_dataset(self):
        """
        Filters the dataset annotations and removes any images that do not contain the relevant classes.
        """
        def filter_annotations(json_path, dataset_path):
            with open(json_path, 'r') as f:
                data = json.load(f)

            filtered_images = []
            filtered_annotations = []
            class_ids_to_keep = [
                data['categories'].index(c) for c in data['categories'] if c['name'] in self.classes_to_keep
            ]

            for annotation in data['annotations']:
                if annotation['category_id'] in class_ids_to_keep:
                    filtered_annotations.append(annotation)

            for image in data['images']:
                image_annotations = [a for a in filtered_annotations if a['image_id'] == image['id']]
                if image_annotations:
                    filtered_images.append(image)
                else:
                    image_path = os.path.join(dataset_path, image['file_name'])
                    if os.path.exists(image_path):
                        os.remove(image_path)

            data['images'] = filtered_images
            data['annotations'] = filtered_annotations
            data['categories'] = [c for c in data['categories'] if c['name'] in self.classes_to_keep]

            with open(json_path, 'w') as f:
                json.dump(data, f)

        filter_annotations(f"{self.train_dataset_path}/_annotations.coco.json", self.train_dataset_path)
        filter_annotations(f"{self.test_dataset_path}/_annotations.coco.json", self.test_dataset_path)

    def _register_datasets(self):
        """
        Registers the filtered datasets for training and testing in the Detectron2 framework.
        """
        register_coco_instances("fruit_dataset_train", {},
                                f"{self.train_dataset_path}/_annotations.coco.json", self.train_dataset_path)
        register_coco_instances("fruit_dataset_test", {},
                                f"{self.test_dataset_path}/_annotations.coco.json", self.test_dataset_path)

    def _setup_cfg(self):
        """
        Sets up the configuration for the Detectron2 model and training parameters.
        """
        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
        cfg.DATASETS.TRAIN = ("fruit_dataset_train",)
        cfg.DATASETS.TEST = ("fruit_dataset_test",)
        cfg.DATALOADER.NUM_WORKERS = self.num_workers
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
        cfg.SOLVER.IMS_PER_BATCH = self.batch_size
        cfg.SOLVER.BASE_LR = self.base_lr
        cfg.SOLVER.MAX_ITER = self.max_iter
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = self.num_classes
        cfg.OUTPUT_DIR = self.output_dir
        cfg.MODEL.DEVICE = self.device
        os.makedirs(self.output_dir, exist_ok=True)
        return cfg

    def train(self, resume=False):
        """
        Starts the training process for the model.

        Parameters:
        -----------
        resume : bool
            Whether to resume training from the last checkpoint.
        """
        trainer = DefaultTrainer(self.cfg)
        trainer.resume_or_load(resume=resume)
        trainer.train()


Entrenar Modelo ajuste fino


In [5]:
train_dataset_path = "train"
test_dataset_path = "test"

fruit_detector = FruitDetector(train_dataset_path, test_dataset_path)
fruit_detector.train(resume=False)

Using device: cuda
Descargando el dataset desde Roboflow...
Dataset descargado, extraído y el archivo .zip eliminado.
[10/03 16:08:12 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), p

model_final_280758.pkl: 167MB [00:00, 200MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}


[10/03 16:08:13 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[10/03 16:08:25 d2.utils.events]:  eta: 0:13:14  iter: 19  total_loss: 2.32  loss_cls: 1.444  loss_box_reg: 0.9197  loss_rpn_cls: 0.004708  loss_rpn_loc: 0.005475    time: 0.3215  last_time: 0.3289  data_time: 0.0205  last_data_time: 0.0049   lr: 4.9952e-05  max_mem: 1736M
[10/03 16:08:35 d2.utils.events]:  eta: 0:13:08  iter: 39  total_loss: 1.825  loss_cls: 0.8976  loss_box_reg: 0.9224  loss_rpn_cls: 0.003367  loss_rpn_loc: 0.007422    time: 0.3206  last_time: 0.3202  data_time: 0.0147  last_data_time: 0.0056   lr: 9.9902e-05  max_mem: 1737M
[10/03 16:08:40 d2.utils.events]:  eta: 0:12:38  iter: 59  total_loss: 1.552  loss_cls: 0.608  loss_box_reg: 0.9164  loss_rpn_cls: 0.008533  loss_rpn_loc: 0.007636    time: 0.3098  last_time: 0.2604  data_time: 0.0103  last_data_time: 0.0130   lr: 0.00014985  max_mem: 1737M
[10/03 16:08:46 d2.utils.events]:  eta: 0:11:57  iter: 79  total_loss: 1.32  loss_cls: 0.4489  loss_box_reg: 0.8661  loss_rpn_cls: 0.004691  loss_rpn_loc: 0.005581    time: 0.

In [15]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.engine import DefaultTrainer

# Configurar la configuración de Detectron2 para evaluación
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TEST = ("fruit_dataset_test",)  # Dataset de prueba
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = "frutas_deteccion_objetos/model_final.pth"  # Ruta de tu modelo guardado
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4  # Número de clases
cfg.MODEL.DEVICE = "cuda"  # O "cpu" si no tienes GPU

# Cargar el modelo entrenado sin iniciar de nuevo el entrenamiento
model = DefaultTrainer.build_model(cfg)
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)

# Crear el evaluador y el dataloader de prueba
evaluator = COCOEvaluator("fruit_dataset_test", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "fruit_dataset_test")

# Ejecutar la evaluación y obtener las métricas
metrics = inference_on_dataset(model, val_loader, evaluator)
print(metrics)


[10/03 16:53:54 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

  return torch.load(f, map_location=torch.device("cpu"))


Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[10/03 16:53:55 d2.data.datasets.coco]: Loaded 113 images in COCO format from test/_annotations.coco.json
[10/03 16:53:55 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[10/03 16:53:55 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[10/03 16:53:55 d2.data.common]: Serializing 113 elements to byte tensors and concatenating them all ...
[10/03 16:53:55 d2.data.common]: Serialized dataset takes 0.04 MiB
[10/03 16:53:55 d2.evaluation.evaluator]: Start inference on 113 batches
[10/03 16:53:56 d2.evaluation.evaluator]: Inference done 11/113. Dataloading: 0.0014 s/iter. Inference: 0.0772 s/iter. Eval: 0.0002 s/iter. Total: 0.0788 s/iter. ETA=0:00:08
[10/03 16:54:01 d2.evaluation.evaluator]: Inference done 72/113. Dataloading: 0.0043 s/it

GUARDAMOS MODELO EN DRIVE

In [20]:
import shutil

src = '/content/frutas_deteccion_objetos'
dst = '/content/drive/MyDrive/modelos_entrenados/frutas_deteccion_objetos'
shutil.copytree(src, dst)


'/content/drive/MyDrive/modelos_entrenados/frutas_deteccion_objetos'