In [1]:
import os
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.datasets import CocoDetection
from torch.utils.data import DataLoader
from torch import optim
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from pycocotools.coco import COCO
import zipfile
import urllib.request
import torchvision.transforms as transforms

os.environ['KMP_DUPLICATE_LIB_OK']='True'

torch.cuda.empty_cache()

In [67]:
# Carga las anotaciones de COCO
coco = COCO('COCO/annotations/instances_train2017.json')

# Obtén todas las categorías
cats = coco.loadCats(coco.getCatIds())

# Imprime los nombres de todas las categorías
for i, cat in enumerate(cats):
    print(i, cat['name'])

loading annotations into memory...
Done (t=11.87s)
creating index...
index created!
0 person
1 bicycle
2 car
3 motorcycle
4 airplane
5 bus
6 train
7 truck
8 boat
9 traffic light
10 fire hydrant
11 stop sign
12 parking meter
13 bench
14 bird
15 cat
16 dog
17 horse
18 sheep
19 cow
20 elephant
21 bear
22 zebra
23 giraffe
24 backpack
25 umbrella
26 handbag
27 tie
28 suitcase
29 frisbee
30 skis
31 snowboard
32 sports ball
33 kite
34 baseball bat
35 baseball glove
36 skateboard
37 surfboard
38 tennis racket
39 bottle
40 wine glass
41 cup
42 fork
43 knife
44 spoon
45 bowl
46 banana
47 apple
48 sandwich
49 orange
50 broccoli
51 carrot
52 hot dog
53 pizza
54 donut
55 cake
56 chair
57 couch
58 potted plant
59 bed
60 dining table
61 toilet
62 tv
63 laptop
64 mouse
65 remote
66 keyboard
67 cell phone
68 microwave
69 oven
70 toaster
71 sink
72 refrigerator
73 book
74 clock
75 vase
76 scissors
77 teddy bear
78 hair drier
79 toothbrush


In [60]:
class MyTransform:
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, image, target):
        return self.transform(image), target


In [61]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [68]:
def main():
    
    transform = MyTransform(transforms.Compose([
                transforms.Resize((800,800)),
                transforms.ToTensor(), 
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                ]))


    # Load the COCO dataset
    train_dataset = CocoDetection(root='COCO/train2017',
                                  annFile='COCO/annotations/instances_train2017.json', transforms=transform)
    val_dataset = CocoDetection(root='COCO/val2017',
                                annFile='COCO/annotations/instances_val2017.json', transforms=transform)


    # Create data loaders
    train_loader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=2, 
    shuffle=False, 
    num_workers=0,
    collate_fn=collate_fn
    )

    val_loader = torch.utils.data.DataLoader(
    val_dataset, 
    batch_size=1, 
    shuffle=False, 
    num_workers=0,
    collate_fn=collate_fn
    )

    # Define the model
    model = fasterrcnn_resnet50_fpn(pretrained=True)

    # Move model to gpu if available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # Set the optimizer and the loss function
    optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
    criterion = nn.MSELoss()

    # Checkpoint saving path
    ckpt_path = 'checkpoints/model_ckpt.pt'
    start_epoch = 0
    # Load the checkpoint if it exists
    if os.path.exists(ckpt_path):
        checkpoint = torch.load(ckpt_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch']
        loss = checkpoint['loss']
        print(f'Loaded checkpoint from epoch {start_epoch}')
    else:
        print("No checkpoint found, starting from scratch.")

    # Entrenar la red y guardar las pérdidas para la visualización
    train_losses = []
    val_losses = []
    val_accuracies = []
    best_val_loss = float("inf")
    epochs_no_improve = 0
    n_epochs_stop = 15

    for epoch in range(start_epoch, 20000):
        running_loss = 0.0
        for i, (images, targets) in enumerate(train_loader):
            images = list(image.to(device) for image in images)
            for t in targets:
                print(t)
            targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

            # Forward pass
            output = model(images)

            # Calculate the loss
            loss = criterion(output, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 100 == 99 or i == len(train_loader) - 1:  # Asume que la indexación comienza en 0
                avg_loss = running_loss / (i+1)
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, avg_loss))
                train_losses.append(avg_loss)
                writer.add_scalar('training loss', avg_loss, epoch * len(train_loader) + i)
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for images, targets in val_loader:
                images = list(image.to(device) for image in images)
                targets = [{k: torch.tensor(v).to(device) for k, v in t.items()} for t in targets]

                outputs = model(images)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

                # Calculate validation accuracy
                # This may not be the right way to calculate accuracy for object detection
                # as it's more complicated than classification
                predicted_boxes = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
                target_boxes = [{k: v.to('cpu') for k, v in t.items()} for t in targets]
                total += len(target_boxes)
                for predicted, target in zip(predicted_boxes, target_boxes):
                    if predicted['boxes'].equal(target['boxes']):
                        correct += 1

        avg_val_loss = val_loss / total
        val_accuracy = correct / total
        print(f'Validation loss: {avg_val_loss:.3f}, Validation accuracy: {val_accuracy:.3f}')
        val_losses.append(avg_val_loss)
        val_accuracies.append(val_accuracy)

        writer.add_scalar('validation loss', avg_val_loss, epoch)
        writer.add_scalar('validation accuracy', val_accuracy, epoch)

        if epoch % 10 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
            }, f'checkpoint{epoch}.pth')
            print(f'Saved checkpoint at epoch {epoch}')

        # Save the last checkpoint
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
        }, 'latest_checkpoint.pth')

        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == n_epochs_stop:
                print('Early stopping!')
                model.load_state_dict(torch.load('best_model.pth'))
                break

        model.train()

    print('Finished Training')

    # Plotting the training and validation loss
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    # Plotting the validation accuracy
    plt.figure(figsize=(10, 5))
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

    writer.close()
     
        
if __name__ == "__main__":
    main()

loading annotations into memory...
Done (t=10.20s)
creating index...
index created!
loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
No checkpoint found, starting from scratch.
[{'segmentation': [[500.49, 473.53, 599.73, 419.6, 612.67, 375.37, 608.36, 354.88, 528.54, 269.66, 457.35, 201.71, 420.67, 187.69, 389.39, 192.0, 19.42, 360.27, 1.08, 389.39, 2.16, 427.15, 20.49, 473.53]], 'area': 120057.13925, 'iscrowd': 0, 'image_id': 9, 'bbox': [1.08, 187.69, 611.59, 285.84], 'category_id': 51, 'id': 1038967}, {'segmentation': [[357.03, 69.03, 311.73, 15.1, 550.11, 4.31, 631.01, 62.56, 629.93, 88.45, 595.42, 185.53, 513.44, 230.83, 488.63, 232.99, 437.93, 190.92, 429.3, 189.84, 434.7, 148.85, 410.97, 121.89, 359.19, 74.43, 358.11, 65.8]], 'area': 44434.751099999994, 'iscrowd': 0, 'image_id': 9, 'bbox': [311.73, 4.31, 319.28, 228.68], 'category_id': 51, 'id': 1039564}, {'segmentation': [[249.6, 348.99, 267.67, 311.72, 291.39, 294.78, 304.94, 294.78, 326.4, 283

AttributeError: 'list' object has no attribute 'items'