In [1]:
import torchvision
import os
from transformers import DetrConfig, DetrForObjectDetection, DetrFeatureExtractor

class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder,  train_json_path, test_json_path, feature_extractor, train=True):
        #ann_file = os.path.join(img_folder, "custom_train.json" if train else "custom_val.json")
        if train:
            ann_file = train_json_path
        else:
            ann_file = test_json_path
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        img, target = super(CocoDetection, self).__getitem__(idx)
        
        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.feature_extractor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, 

  warn(
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_img_path = "..\\data\\Zona\\Dataset\\images\\"
train_path = "..\\data\\Zona\\Dataset\\annotations\\annotations.json"
test_path = ""
feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
train_dataset = CocoDetection(train_img_path, train_path, test_path, feature_extractor = feature_extractor)



loading annotations into memory...
Done (t=0.24s)
creating index...
index created!


In [3]:
train_dataset

Dataset CocoDetection
    Number of datapoints: 526
    Root location: ..\data\Zona\Dataset\images\

In [4]:
import pytorch_lightning as pl
from transformers import DetrConfig, DetrForObjectDetection
import torch

class Detr(pl.LightningModule):

     def __init__(self, lr, lr_backbone, weight_decay, id2label, train_dataloader, val_dataloader):
         super().__init__()
         # replace COCO classification head with custom head
         self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", 
                                                             num_labels=len(id2label),
                                                             ignore_mismatched_sizes=True)
         # see https://github.com/PyTorchLightning/pytorch-lightning/pull/1896
         self.lr = lr
         self.lr_backbone = lr_backbone
         self.weight_decay = weight_decay
         self.t_dataloader = train_dataloader
         self.v_dataloader = val_dataloader

     def forward(self, pixel_values, pixel_mask):
         
       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

       return outputs
     
     def common_step(self, batch, batch_idx):
         
       pixel_values = batch["pixel_values"]
       pixel_mask = batch["pixel_mask"]
       labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)

       loss = outputs.loss
       loss_dict = outputs.loss_dict

       return loss, loss_dict

     def training_step(self, batch, batch_idx):
         
        loss, loss_dict = self.common_step(batch, batch_idx)     
        # logs metrics for each training_step,
        # and the average across the epoch
        self.log("training_loss", loss)
        for k,v in loss_dict.items():
          self.log("train_" + k, v.item())

        return loss

     def validation_step(self, batch, batch_idx):
         
        loss, loss_dict = self.common_step(batch, batch_idx)     
        self.log("validation_loss", loss)
        for k,v in loss_dict.items():
          self.log("validation_" + k, v.item())

        return loss

     def configure_optimizers(self):
         
        param_dicts = [
              {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
              {
                  "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                  "lr": self.lr_backbone,
              },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                  weight_decay=self.weight_decay)
        
        return optimizer

     def train_dataloader(self):
         
        return self.t_dataloader

     def val_dataloader(self):
         
        return self.v_dataloader

In [5]:
from torch.utils.data import DataLoader
from transformers import DetrFeatureExtractor
from pytorch_lightning import Trainer
import argparse
import os
import json
import torch
from datasets_helper import get_coco_api_from_dataset
from datasets_helper.coco_eval import CocoEvaluator
from tqdm import tqdm

class Train:
    def __init__(self, train_img_path, test_img_path):
        self.train_img_path = train_img_path
        self.test_img_path = test_img_path
        self.feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
        self.train_batch_size = 4
        self.test_batch_size = 2
        self.gpus = 0
        self.lr = 0.0001
        self.lr_backbone = 1e-05
        self.weight_decay = 0.0001
        self.max_steps = 6000
        self.gradient_clip_val = 0.1
        
    
    @staticmethod
    def collate_fn(batch):
        
        feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
        pixel_values = [item[0] for item in batch]
        encoding = feature_extractor.pad_and_create_pixel_mask(pixel_values, return_tensors="pt")

        labels = [0 for item in batch]
        batch = {}
        batch['pixel_values'] = encoding['pixel_values']
        batch['pixel_mask'] = encoding['pixel_mask']
        batch['labels'] = labels
        
        return batch
    
    def create_dataset(self, train_path, test_path):

        #feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
        train_dataset = CocoDetection(self.train_img_path, train_path, test_path, feature_extractor = self.feature_extractor)
        val_dataset = CocoDetection(self.test_img_path, train_path, test_path, feature_extractor = self.feature_extractor, train=False)
        
        return train_dataset, val_dataset
    
    def evaluation(self, val_dataset, val_dataloader, model):

        base_ds = get_coco_api_from_dataset(val_dataset)
        iou_types = ['bbox']
        coco_evaluator = CocoEvaluator(base_ds, iou_types) # initialize evaluator with ground truths

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model.to(device)
        model.eval()

        print("Running evaluation...")

        for idx, batch in enumerate(tqdm(val_dataloader)):
            # get the inputs
            pixel_values = batch["pixel_values"].to(device)
            pixel_mask = batch["pixel_mask"].to(device)
            labels = [{k: v.to(device) for k, v in t.items()} for t in batch["labels"]] # these are in DETR format, resized + normalized

            # forward pass
            outputs = model.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

            orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
            results = self.feature_extractor.post_process(outputs, orig_target_sizes) # convert outputs of model to COCO api
            res = {target['image_id'].item(): output for target, output in zip(labels, results)}
            coco_evaluator.update(res)

        coco_evaluator.synchronize_between_processes()
        coco_evaluator.accumulate()
        coco_evaluator.summarize()
    
    
    def train(self, train_dataset, val_dataset):

        train_dataloader = DataLoader(train_dataset, collate_fn=Train.collate_fn, batch_size = self.train_batch_size, shuffle=True)
        val_dataloader = DataLoader(val_dataset, collate_fn=Train.collate_fn, batch_size = self.test_batch_size)
        #batch = next(iter(train_dataloader))
        cats = train_dataset.coco.cats
        id2label = {k: v['name'] for k,v in cats.items()}
        model = Detr(lr=self.lr, lr_backbone=self.lr_backbone, weight_decay=self.weight_decay, id2label = id2label, train_dataloader = train_dataloader, val_dataloader = val_dataloader)
        #PATH = '/Users/.../aa.ckpt'
        #model = model.load_from_checkpoint(PATH,lr=self.lr, lr_backbone=self.lr_backbone, weight_decay=self.weight_decay, id2label = id2label, train_dataloader = train_dataloader, val_dataloader = val_dataloader)
        trainer = Trainer( max_steps = self.max_steps, gradient_clip_val = self.gradient_clip_val)
        trainer.fit(model)

        #-----
        self.evaluation(val_dataset, val_dataloader, model)
        
        return model, trainer
        
    def main(self):
        
        train_path = "..\\data\\Zona\\Dataset\\annotations\\annotations.json"
        test_path = "..\\data\\Zona\\Dataset\\annotations\\annotations.json"
        model_path = "D:\Clase\TFG\model\\firt_model.ckpt"
        train_dataset, test_dataset = self.create_dataset(train_path, test_path)
        _, trainer = self.train(train_dataset, test_dataset)
        trainer.save_checkpoint(model_path)
        
        return

train_img_path = "..\\data\\Zona\\Dataset\\images\\"
Train(train_img_path, "").main()



loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type                   | Params
-------------------------------------------------
0 | model | DetrForObjectDetection | 41.5 M
-------------------------------------------------
41.3 M    Trainable params
222 K     Non-trainable params
41.5 M    Total params
166.037   Total estimated model param

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]



AttributeError: 'int' object has no attribute 'items'

In [1]:
import torch

In [2]:
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)

Using cache found in C:\Users\rober/.cache\torch\hub\facebookresearch_detr_main
  warn(


In [None]:
import argparse
import datetime
import json
import random
import time
from pathlib import Path

import numpy as np
import torch
from torch.utils.data import DataLoader, DistributedSampler

import datasets
import util.misc as utils
from datasets import build_dataset, get_coco_api_from_dataset
from engine import evaluate, train_one_epoch
from models import build_model

In [34]:
def build_model(args):
    model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
    
    return model, None, None

def main(args):

    if args.frozen_weights is not None:
        assert args.masks, "Frozen training is meant for segmentation only"
    print(args)

    device = torch.device(args.device)

    # fix the seed for reproducibility
    seed = 1000
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    model, criterion, postprocessors = build_model(args)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('number of params:', n_parameters)

    param_dicts = [
        {"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
        {
            "params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": args.lr_backbone,
        },
    ]
    optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
                                  weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

    dataset_train = build_dataset(image_set='train', args=args)
    dataset_val = build_dataset(image_set='val', args=args)

    if args.distributed:
        sampler_train = DistributedSampler(dataset_train)
        sampler_val = DistributedSampler(dataset_val, shuffle=False)
    else:
        sampler_train = torch.utils.data.RandomSampler(dataset_train)
        sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    batch_sampler_train = torch.utils.data.BatchSampler(
        sampler_train, args.batch_size, drop_last=True)

    data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                                   collate_fn=utils.collate_fn, num_workers=args.num_workers)
    data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val,
                                 drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers)

    if args.dataset_file == "coco_panoptic":
        # We also evaluate AP during panoptic training, on original coco DS
        coco_val = datasets.coco.build("val", args)
        base_ds = get_coco_api_from_dataset(coco_val)
    else:
        base_ds = get_coco_api_from_dataset(dataset_val)

    if args.frozen_weights is not None:
        checkpoint = torch.load(args.frozen_weights, map_location='cpu')
        model_without_ddp.detr.load_state_dict(checkpoint['model'])

    output_dir = Path(args.output_dir)
    if args.resume:
        if args.resume.startswith('https'):
            checkpoint = torch.hub.load_state_dict_from_url(
                args.resume, map_location='cpu', check_hash=True)
        else:
            checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            args.start_epoch = checkpoint['epoch'] + 1

    if args.eval:
        test_stats, coco_evaluator = evaluate(model, criterion, postprocessors,
                                              data_loader_val, base_ds, device, args.output_dir)
        if args.output_dir:
            utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, output_dir / "eval.pth")
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            sampler_train.set_epoch(epoch)
        train_stats = train_one_epoch(
            model, criterion, data_loader_train, optimizer, device, epoch,
            args.clip_max_norm)
        lr_scheduler.step()
        if args.output_dir:
            checkpoint_paths = [output_dir / 'checkpoint.pth']
            # extra checkpoint before LR drop and every 100 epochs
            if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 100 == 0:
                checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
            for checkpoint_path in checkpoint_paths:
                utils.save_on_master({
                    'model': model_without_ddp.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'epoch': epoch,
                    'args': args,
                }, checkpoint_path)

        test_stats, coco_evaluator = evaluate(
            model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir
        )

        log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
                     **{f'test_{k}': v for k, v in test_stats.items()},
                     'epoch': epoch,
                     'n_parameters': n_parameters}

        if args.output_dir and utils.is_main_process():
            with (output_dir / "log.txt").open("a") as f:
                f.write(json.dumps(log_stats) + "\n")

            # for evaluation logs
            if coco_evaluator is not None:
                (output_dir / 'eval').mkdir(exist_ok=True)
                if "bbox" in coco_evaluator.coco_eval:
                    filenames = ['latest.pth']
                    if epoch % 50 == 0:
                        filenames.append(f'{epoch:03}.pth')
                    for name in filenames:
                        torch.save(coco_evaluator.coco_eval["bbox"].eval,
                                   output_dir / "eval" / name)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

DETR(
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-5): 6 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
          )
          (linear1): Linear(in_features=256, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=256, bias=True)
          (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
        (0-5): 6 x TransformerDecoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=256, ou

# Otro intento

In [1]:
import torch
import detectron2
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog
from detectron2.data import DatasetCatalog
from detectron2.data import transforms as T
from detectron2.structures import BoxMode
from detectron2.modeling import build_model

# Registra los datasets
register_coco_instances("piscinas_entrenamiento", {}, "..\\data\Zona\Dataset\\annotations.json", "..\\data\Zona\Dataset\images")
register_coco_instances("piscinas_validacion", {}, "..\\data\Zona\Dataset\\annotations.json", "..\\data\Zona\Dataset\images")

  warn(


In [2]:
# Define los metadatos de las clases
classes = ["piscina"]
metadata_train = MetadataCatalog.get("piscinas_entrenamiento")
metadata_train.thing_classes = classes
metadata_test = MetadataCatalog.get("piscinas_validacion")
metadata_test.thing_classes = classes

In [3]:
# Función para convertir las anotaciones del dataset en el formato necesario para DETR
def transform_annotations(dataset_dict):
    record = {}
    record["file_name"] = dataset_dict["file_name"]
    record["image_id"] = dataset_dict["image_id"]
    record["height"] = dataset_dict["height"]
    record["width"] = dataset_dict["width"]
    annos = dataset_dict["annotations"]
    objs = []
    for anno in annos:
        obj = {
            "bbox": anno["bbox"],
            "bbox_mode": BoxMode.XYWH_ABS,
            "category_id": 0,
        }
        objs.append(obj)
    record["annotations"] = objs
    return record

In [4]:
# Transformación de datos para entrenamiento
def get_transforms(train=True):
    transforms = []
    transforms.append(T.Resize((800, 800)))
    if train:
        transforms.append(T.RandomFlip(prob=0.5, horizontal=True, vertical=False))
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

In [5]:
# Carga de datasets y transformación de anotaciones
DatasetCatalog.clear()
for d in ["piscinas_entrenamiento", "piscinas_validacion"]:
    DatasetCatalog.register(d, lambda d=d: transform_annotations(detectron2.data.get_detection_dataset_dicts([d])[0]))
    MetadataCatalog.get(d).set(thing_classes=["piscina"])

In [37]:
import torch.nn as nn
import torchvision

class PoolDetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(PoolDetectionModel, self).__init__()
        
        # Cargamos el modelo pre-entrenado
        self.model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=True)
        
        # Deshabilitamos la clasificación de objetos
        self.model.class_embed = nn.Identity()
        
        # Cambiamos el número de clases de salida
        num_query = self.model.transformer.d_model
        self.model.num_classes = num_classes
        self.model.query_embed = nn.Embedding(num_classes, num_query)
        
        # Cambiamos el número de capas de la cabeza de detección
        #self.model.bbox_embed = nn.Sequential(nn.Linear(num_query, num_query), nn.ReLU(), nn.Linear(num_query, num_query), nn.ReLU(), nn.Linear(num_query, 4))
        
    def forward(self, x):
        # Pasamos las imágenes por el modelo pre-entrenado
        outputs = self.model(x)
        
        # Obtenemos las cajas y las etiquetas de las predicciones
        boxes = outputs['pred_boxes']
        labels = outputs['pred_logits'].softmax(-1)
        
        # Devolvemos las cajas y las etiquetas
        return boxes, labels

In [1]:
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as TF

def train(model, optimizer, data_loader, device):
    model.train()
    for name, param in model.named_parameters():
        print(name)
        if "linear" not in name:
            param.requires_grad = False

    for images, targets in data_loader:
        images = [TF.to_tensor(image).to(device) for image in images]
        targets = [{k: v for k, v in t.items()} for t in targets]
        output = model(images)
        #loss_dict = output['loss_dict']
        losses = sum(sum(sum(output[1])))
        print(losses)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
    return losses.item()

  warn(


In [104]:
import torch
from torchvision.transforms.functional import to_tensor
from torchvision.transforms import Resize


def collate_fn(batch):
    images = []
    targets = []
    new_size = (1000, 1000)
    transform = Resize(new_size)
    for sample in batch:
        image = Image.open(sample['file_name']).convert('RGB')
        width, height = image.size
        # Transformar la imagen
        image = transform(image)
        # Adaptar las anotaciones
        annotations = sample['annotations']
        for ann in annotations:
            bbox = ann['bbox']
            x_original, y_original, w_original, h_original = bbox
            x_new = x_original * new_size[0] / width
            y_new = y_original * new_size[1] / height
            w_new = w_original * new_size[0] / width
            h_new = h_original * new_size[1] / height
            ann['bbox'] = [x_new, y_new, w_new, h_new]
        # Añadir la imagen y las anotaciones a la lista
        images.append(image)
        targets.append({'boxes': [ann['bbox'] for ann in annotations], 'labels': [ann['category_id'] for ann in annotations]})
    return images, targets

In [105]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader

DatasetCatalog.clear()
MetadataCatalog.clear()

# Declarar el modelo
model = PoolDetectionModel(num_classes=1)

# Declarar el optimizador
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Registrar instancia de COCO
register_coco_instances("piscinas_entrenamiento", {}, "..\\data\Zona\Dataset\\annotations.json", "..\\data\Zona\Dataset\images")

# Cargar la instancia de COCO como un objeto Dataset
dataset_name = "piscinas_entrenamiento"
dataset = DatasetCatalog.get(dataset_name)

# Obtener metadatos del dataset
metadata = MetadataCatalog.get(dataset_name)

# Declarar el DataLoader
data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)


# Declarar el dispositivo
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Mover el modelo y el DataLoader al dispositivo
model.to(device)
train(model, optimizer, data_loader, device)

Using cache found in C:\Users\rober/.cache\torch\hub\facebookresearch_detr_main

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.



tensor(4.0000, grad_fn=<AddBackward0>)
tensor(4.0000, grad_fn=<AddBackward0>)
tensor(4.0000, grad_fn=<AddBackward0>)


KeyboardInterrupt: 

# Funciona

In [None]:
import torch
from torchvision.transforms.functional import to_tensor
from torchvision.transforms import Resize
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torchvision.transforms.functional as TF
import torch.nn as nn
import torchvision
import torch
import detectron2
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog
from detectron2.data import DatasetCatalog
from detectron2.data import transforms as T
from detectron2.structures import BoxMode
from detectron2.modeling import build_model
from PIL import Image, ImageDraw

# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# # import some common libraries
# import numpy as np
# import cv2
# import random
# from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

import torch
from scipy.optimize import linear_sum_assignment
import torch.nn.functional as F

In [None]:
class PoolDetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(PoolDetectionModel, self).__init__()

        num_predictions = 10
        
        # Cargamos el modelo pre-entrenado
        self.model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=True)

        # Agregamos una nueva capa class_embed
        hidden_dim = self.model.transformer.d_model
        self.model.class_embed = nn.Linear(hidden_dim, num_classes)
        
        # Cambiamos el número de clases de salida
        num_query = self.model.transformer.d_model
        self.model.num_classes = num_classes
        self.model.query_embed = nn.Embedding(num_predictions, num_query)
        
    def forward(self, x):
        # Pasamos las imágenes por el modelo pre-entrenado
        outputs = self.model(x)
        
        # Obtenemos las cajas y las etiquetas de las predicciones
        boxes = outputs['pred_boxes']
        labels = outputs['pred_logits'].softmax(-1)
        
        # Devolvemos las cajas y las etiquetas
        return boxes, labels

In [None]:
def hungarian_loss(outputs, targets):
    boxes, labels = outputs
    true_boxes, true_labels = torch.FloatTensor(targets['boxes']).to(device), torch.FloatTensor(targets['labels']).to(device)
    true_labels = torch.FloatTensor([[label] for label in true_labels]).to(device)

    # Calculamos el costo de emparejamiento entre las cajas predichas y verdaderas
    cost_boxes = torch.cdist(boxes, true_boxes, p=1)
    
    # Calculamos el costo de emparejamiento entre las etiquetas predichas y verdaderas
    cost_labels = torch.cdist(labels, true_labels, p=1)
    
    # Combinamos los costos
    cost = cost_boxes + cost_labels
    
    # Resolvemos el problema de asignación lineal
    row_ind, col_ind = linear_sum_assignment(cost.cpu().detach().numpy())
    
    # Obtenemos las cajas y etiquetas predichas emparejadas
    boxes = boxes[row_ind]
    labels = labels[row_ind]
    
    # Obtenemos las cajas y etiquetas verdaderas emparejadas
    true_boxes = true_boxes[col_ind]
    true_labels = true_labels[col_ind]

    # Calculamos la pérdida de IoU para las cajas
    box_loss = F.mse_loss(boxes, true_boxes)
    
    # Calculamos la entropía cruzada binaria para las etiquetas
    label_loss = F.binary_cross_entropy(labels, true_labels)
    
    # Combinamos las pérdidas
    loss = box_loss + label_loss
    
    return loss

In [None]:
def train(model, optimizer, data_loader, device, epochs=10):
    model.train()
    for name, param in model.named_parameters():
        if "query_embed" not in name and "bbox_embed" not in name:
            param.requires_grad = False
    for epoch in range(epochs):
      for images, targets in data_loader:
          images = [TF.to_tensor(image).to(device) for image in images]
          targets = [{k: v for k, v in t.items()} for t in targets]
          output = model(images)
          losses = 0
          output = [(prob, targ) for prob, targ in zip(output[0], output[1])]
          for o, t in zip(output, targets):
            losses += hungarian_loss(o, t)
          # losses = object_detection_loss(output, targets)
          print(losses)
          optimizer.zero_grad()
          losses.backward()
          optimizer.step()
          
    return losses.item()

In [None]:
def collate_fn(batch):
    images = []
    targets = []
    new_size = (100, 100)
    transform = Resize(new_size)
    for sample in batch:
        image = Image.open(sample['file_name']).convert('RGB')
        width, height = image.size
        # Transformar la imagen
        image = transform(image)
        # Adaptar las anotaciones
        annotations = sample['annotations']
        for ann in annotations:
            bbox = ann['bbox']
            x_original, y_original, w_original, h_original = bbox
            # x_new = x_original * new_size[0] / width
            # y_new = y_original * new_size[1] / height
            # w_new = w_original * new_size[0] / width
            # h_new = h_original * new_size[1] / height

            x_new = x_original / width
            y_new = y_original / height
            w_new = w_original / width
            h_new = h_original / height
            ann['bbox'] = [x_new, y_new, w_new, h_new]
        
        # Añadir la imagen y las anotaciones a la lista
        images.append(image)
        targets.append({'boxes': [ann['bbox'] for ann in annotations], 'labels': [1 for ann in annotations if ann['category_id'] == 0]})
        targets.append({'boxes': [[0, 0, 0, 0]], 'labels': [0]})
    return images, targets

In [None]:
DatasetCatalog.clear()
MetadataCatalog.clear()

# Declarar el modelo
model = PoolDetectionModel(num_classes=1)

# Declarar el optimizador
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Registrar instancia de COCO
register_coco_instances("piscinas_entrenamiento", {}, "/content/drive/MyDrive/TFG_999/annotations.json", "/content/drive/MyDrive/TFG_999/images")

# Cargar la instancia de COCO como un objeto Dataset
dataset_name = "piscinas_entrenamiento"
dataset = DatasetCatalog.get(dataset_name)

# Obtener metadatos del dataset
metadata = MetadataCatalog.get(dataset_name)

# Declarar el DataLoader
data_loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

# Declarar el dispositivo
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Mover el modelo y el DataLoader al dispositivo
model.to(device)
train(model, optimizer, data_loader, device, epochs = 1)

In [None]:
torch.save(model, "/content/drive/MyDrive/TFG_999/model.pt")