In [1]:
# Instalación de paquetes necesarios
!pip install scikit-learn torch torchvision pycocotools pillow



In [2]:
# Importación de librerías
import os
from sklearn.model_selection import train_test_split
from pycocotools.coco import COCO
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch
import torch.optim as optim
from torchvision.models.detection import maskrcnn_resnet50_fpn
import torch.optim as optim
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torch.utils.data import DataLoader
import torchvision

In [3]:
# Definición de rutas
coco_file_path = r'D:\Proyecto_Semillas\Germinacion\Semillas_Ger\result.json'
image_dir = r'D:\Proyecto_Semillas\Germinacion\Semillas_Ger\images'

In [4]:
# Cargar las anotaciones COCO
coco = COCO(coco_file_path)

loading annotations into memory...
Done (t=0.42s)
creating index...
index created!


In [5]:
# Obtener todos los IDs de las imágenes
img_ids = list(coco.imgs.keys())

In [6]:
# Dividir los datos: 80% entrenamiento, 20% evaluación
train_ids, val_ids = train_test_split(img_ids, test_size=0.2, random_state=42)

print(f"Total imágenes: {len(img_ids)}")
print(f"Imágenes de entrenamiento: {len(train_ids)}")
print(f"Imágenes de validación: {len(val_ids)}")

Total imágenes: 159
Imágenes de entrenamiento: 127
Imágenes de validación: 32


In [8]:
# Definición de la clase SeedsDataset
class SeedsDataset(Dataset):
    def __init__(self, annotation_file, image_dir, img_ids, transforms=None):
        self.coco = COCO(annotation_file)
        self.image_dir = image_dir
        self.transforms = transforms
        self.ids = img_ids

    def __getitem__(self, index):
        img_id = self.ids[index]
        
        img_path = os.path.join(self.image_dir, self.coco.loadImgs(img_id)[0]['file_name'])
        img = Image.open(img_path).convert("RGB")
        
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)
        
        boxes = []
        masks = []
        labels = []
        for ann in anns:
            xmin, ymin, width, height = ann['bbox']
            boxes.append([xmin, ymin, xmin + width, ymin + height])
            masks.append(self.coco.annToMask(ann))
            labels.append(ann['category_id'])
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'masks': masks
        }
        
        if self.transforms:
            img, target = self.transforms(img, target)
        
        return img, target

    def __len__(self):
        return len(self.ids)

In [9]:
# Crear datasets para entrenamiento y validación
train_dataset = SeedsDataset(annotation_file=coco_file_path, image_dir=image_dir, img_ids=train_ids)
val_dataset = SeedsDataset(annotation_file=coco_file_path, image_dir=image_dir, img_ids=val_ids)

loading annotations into memory...
Done (t=0.40s)
creating index...
index created!
loading annotations into memory...
Done (t=0.37s)
creating index...
index created!


In [10]:
# Crear dataloaders para entrenamiento y validación
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=4)

In [11]:
# Cargar las categorías (clases)
categories = coco.loadCats(coco.getCatIds())
print(f"\nCategorías en el dataset: {[cat['name'] for cat in categories]}")


Categorías en el dataset: ['qrcode', 'root', 'scale1cm', 'seed', 'shoot']


In [12]:
# Detalles de cada categoría
for cat in categories:
    print(f"ID: {cat['id']}, Nombre: {cat['name']}")

ID: 0, Nombre: qrcode
ID: 1, Nombre: root
ID: 2, Nombre: scale1cm
ID: 3, Nombre: seed
ID: 4, Nombre: shoot


In [13]:
# Cargar el modelo preentrenado
model = maskrcnn_resnet50_fpn(pretrained=True)



In [14]:
# Definir el número de clases (incluyendo el fondo)
num_classes = 6  # Fondo + ['qrcode', 'root', 'scale1cm', 'seed', 'shoot']
print(f"Número de clases: {num_classes}")

Número de clases: 6


In [15]:
# Ajustar la cabeza del modelo para las clases de tu dataset
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

In [16]:
# Ajustar también el predictor de máscaras si es necesario
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)

In [17]:
# Definir optimizador
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

In [18]:
# Verificar la estructura del modelo ##Ver si se corrio el codigo correctamente
print(model)

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [19]:
# Comprobar el número de clases en el predictor de cajas
print(f"Número de clases en el predictor de cajas: {model.roi_heads.box_predictor.cls_score.out_features}")

Número de clases en el predictor de cajas: 6


In [20]:
# Comprobar el número de clases en el predictor de máscaras
print(f"Número de clases en el predictor de máscaras: {model.roi_heads.mask_predictor.mask_fcn_logits.out_channels}")

Número de clases en el predictor de máscaras: 6


In [21]:
# Verificar que el optimizador está configurado correctamente
print(f"Optimizador: {optimizer}")

Optimizador: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.005
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0.0005
)


In [23]:
# Intentar hacer una predicción de prueba
dummy_input = torch.rand(1, 3, 224, 224)  # Imagen de prueba
model.eval()
with torch.no_grad():
    outputs = model(dummy_input)

print("Predicción de prueba realizada con éxito.")
print(f"Tipos de salidas: {outputs}")

Predicción de prueba realizada con éxito.
Tipos de salidas: [{'boxes': tensor([[  6.6281,   3.0637, 216.3106, 112.4733],
        [148.4702,  79.5544, 151.7088,  83.1525],
        [ 67.0124,  20.5115, 206.4914, 133.8413],
        [138.6029, 125.2381, 141.7133, 128.4921],
        [173.6647, 113.5426, 176.3110, 116.6722],
        [ 50.6243,  96.7417,  53.6109, 100.6298],
        [ 67.9814, 146.7273,  71.6175, 150.5229],
        [ 66.1415, 142.0909,  71.3698, 145.5857],
        [141.6255, 117.6756, 144.6655, 119.9425],
        [132.9279, 111.9669, 137.4594, 115.0456],
        [147.9505,  80.3360, 150.9344,  83.8356],
        [ 46.4352, 156.7977,  50.0164, 160.4601],
        [ 62.4205, 140.4106,  65.6724, 143.7223],
        [174.3944, 113.1815, 177.4140, 116.4763],
        [146.2512, 118.6746, 149.3932, 121.2890],
        [146.5798,  79.3680, 149.6487,  82.8010],
        [ 65.0748, 141.6740,  74.0282, 150.4204],
        [174.2261, 151.4905, 177.2943, 154.3372],
        [126.5326, 116.6762, 

In [24]:
# Verificar si el modelo está en el dispositivo correcto (CPU o GPU)
print(f"Dispositivo del modelo: {next(model.parameters()).device}")

Dispositivo del modelo: cpu


In [None]:
# Ciclo de entrenamiento ## Continuar
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, targets in train_loader:
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        running_loss += losses.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")  