In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
sys.path.append("../")

from src.constants import *
from src.training_utils.dataset import *
from src.training_utils.training import train_model, get_model_instance_segmentation

In [3]:
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image

import torchvision
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [4]:
train_df = pd.read_csv("/home/nacho/TFI-Cazcarra/data/csv/augmented_train_diagramas.csv", 
                       header=None)
train_df.columns = ['image_path', 'xmin', 'ymin', 'xmax', 'ymax', 'label']

test_df = pd.read_csv("/home/nacho/TFI-Cazcarra/data/csv/augmented_test_diagramas.csv")

In [5]:
le_dict = get_encoder_dict(CLASSES_CSV)
le_dict

{'tabla': 3,
 'muchos_opcional': 2,
 'muchos_obligatorio': 1,
 'uno_opcional': 5,
 'uno_obligatorio': 4}

In [6]:
train_df['label_transformed'] = train_df['label'].apply(lambda x: le_dict[x])
test_df['label_transformed'] = test_df['label'].apply(lambda x: le_dict[x])

In [7]:
def get_custom_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.RandomVerticalFlip(0.5))
    return T.Compose(transforms)

In [8]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = len(le_dict)+1 

dataset = PennFudanDataset(csv=train_df, images_dir=IMAGES_DIR, transforms=get_custom_transform(train=True))
dataset_test = PennFudanDataset(csv=test_df, images_dir=IMAGES_DIR, transforms=get_custom_transform(train=False))

In [9]:
data_loader = get_dataloader(dataset, batch_size=2, shuffle=True)
data_loader_test = get_dataloader(dataset_test, batch_size=1, shuffle=False)

## Training model

In [10]:
train = True
epochs = 100

In [11]:
model = get_model_instance_segmentation(num_classes=num_classes, model_type="retinanet")
model.to(device)



Instancing model retinanet. Trainable parameters: 33792599


RetinaNet(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256, eps=0.0)


In [12]:
params = [p for p in model.parameters() if p.requires_grad]
# override_path = f"{PATH}/data/models/model_best_test.pt"

In [None]:
if train:
    train_model(model=model, data_loader=data_loader, data_loader_test=data_loader_test, 
            num_epochs=epochs, device=device, params=params)

Epoch: [0]  [ 0/14]  eta: 0:04:25  lr: 0.000389  loss: 2.5516 (2.5516)  classification: 1.7330 (1.7330)  bbox_regression: 0.8186 (0.8186)  time: 18.9361  data: 0.0577
Epoch: [0]  [10/14]  eta: 0:01:15  lr: 0.004232  loss: 1.6950 (1.9680)  classification: 0.9899 (1.2402)  bbox_regression: 0.7334 (0.7277)  time: 18.7758  data: 0.0713
Epoch: [0]  [13/14]  eta: 0:00:18  lr: 0.005000  loss: 1.6222 (1.8724)  classification: 0.9772 (1.1840)  bbox_regression: 0.6451 (0.6885)  time: 18.4050  data: 0.0720
Epoch: [0] Total time: 0:04:17 (18.4054 s / it)
creating index...
index created!
Test:  [0/6]  eta: 0:00:37  model_time: 6.2376 (6.2376)  evaluator_time: 0.0231 (0.0231)  time: 6.2822  data: 0.0216
Test:  [5/6]  eta: 0:00:06  model_time: 6.2376 (6.1263)  evaluator_time: 0.0168 (0.0191)  time: 6.1641  data: 0.0187
Test: Total time: 0:00:36 (6.1642 s / it)
Averaged stats: model_time: 6.2376 (6.1263)  evaluator_time: 0.0168 (0.0191)
Accumulating evaluation results...
DONE (t=0.02s).
IoU metric: bb

Modelo guardado en /home/nacho/TFI-Cazcarra/data/models/best_model_retinanet.pt
Epoch: [4]  [ 0/14]  eta: 0:04:27  lr: 0.005000  loss: 1.3308 (1.3308)  classification: 0.7548 (0.7548)  bbox_regression: 0.5759 (0.5759)  time: 19.1151  data: 0.0757
Epoch: [4]  [10/14]  eta: 0:01:17  lr: 0.005000  loss: 1.1997 (1.0494)  classification: 0.6639 (0.6001)  bbox_regression: 0.4659 (0.4493)  time: 19.4094  data: 0.0874
Epoch: [4]  [13/14]  eta: 0:00:19  lr: 0.005000  loss: 1.1108 (1.0286)  classification: 0.6327 (0.5871)  bbox_regression: 0.4659 (0.4415)  time: 19.4526  data: 0.0809
Epoch: [4] Total time: 0:04:32 (19.4535 s / it)
creating index...
index created!
Test:  [0/6]  eta: 0:00:37  model_time: 6.2453 (6.2453)  evaluator_time: 0.0396 (0.0396)  time: 6.3063  data: 0.0214
Test:  [5/6]  eta: 0:00:06  model_time: 6.2453 (6.0490)  evaluator_time: 0.0154 (0.0202)  time: 6.0921  data: 0.0228
Test: Total time: 0:00:36 (6.0923 s / it)
Averaged stats: model_time: 6.2453 (6.0490)  evaluator_time: 0

Epoch: [8]  [13/14]  eta: 0:00:18  lr: 0.000500  loss: 1.0213 (0.9750)  classification: 0.5835 (0.5504)  bbox_regression: 0.4259 (0.4246)  time: 18.6902  data: 0.0789
Epoch: [8] Total time: 0:04:21 (18.6923 s / it)
creating index...
index created!
Test:  [0/6]  eta: 0:00:37  model_time: 6.2161 (6.2161)  evaluator_time: 0.0227 (0.0227)  time: 6.2621  data: 0.0233
Test:  [5/6]  eta: 0:00:06  model_time: 6.2161 (6.1017)  evaluator_time: 0.0137 (0.0172)  time: 6.1380  data: 0.0191
Test: Total time: 0:00:36 (6.1382 s / it)
Averaged stats: model_time: 6.2161 (6.1017)  evaluator_time: 0.0137 (0.0172)
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.361
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.554
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.322
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.240
 Average Pr

## Save model
https://pytorch.org/tutorials/beginner/saving_loading_models.html

In [None]:
# model_name = model.__class__.__name__.lower()
# PATH_TO_SAVE_MODEL = f"{PATH}/data/models/model_{model_name}_final.pt"

# save_model(path_to_save, model, epoch, loss_value)

## Testing

In [None]:
import cv2
import PIL
from IPython.display import display

def get_class_name(num_label, le_dict):
    reversed_le_dict = {v:k for k,v in le_dict.items()}
    return reversed_le_dict[num_label]

def draw_bbox(img, xmin, ymin, xmax, ymax, score, label): 
    txt = get_class_name(label, le_dict) + ' ' + str(score)
    img = cv2.putText(img, txt, (int(xmin), int(ymin)),
                      cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255), 1)

    return cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), 
                         (255,0,0), 1)

In [None]:
model.eval()

In [None]:
for i in range(len(dataset_test)):
    tensor_image = dataset_test.__getitem__(i)[0]
    to_pil = T.ToPILImage()
    pil_image = to_pil(tensor_image)
    predictions = model([tensor_image])
    image = pil_image
    for prediction in predictions:
        for box, score, label in zip(prediction['boxes'],prediction['scores'],prediction['labels']):
            score = round(score.item(), 3)
            label = label.item()
            if score < 0.5:
                break
            xmin = box[0].item()
            ymin = box[1].item()
            xmax = box[2].item()
            ymax = box[3].item()
            print(xmin, ymin, xmax, ymax)
            if isinstance(image, PIL.Image.Image):
                image = draw_bbox(np.array(image), xmin, ymin, xmax, ymax, score, label)
            else:
                image = draw_bbox(image, xmin, ymin, xmax, ymax, score, label)
        display(Image.fromarray(image))

## Load the two final models & calculate AP for them
- https://torchmetrics.readthedocs.io/en/stable/classification/average_precision.html
- https://torchmetrics.readthedocs.io/en/stable/retrieval/map.html

In [None]:
model = get_model_instance_segmentation(len(le_dict)+1, "retinanet")
model_name = model.__class__.__name__.lower()
PATH_TO_LOAD_MODEL = f"/home/nacho/TFI-Cazcarra/data/models/model_{model_name}_final.pt"

model_obj = torch.load(PATH_TO_LOAD_MODEL)
model.load_state_dict(model_obj['model_state_dict'])

In [None]:
model.eval()

In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from pprint import pprint

In [None]:
# En batch mata el kernel
predictions = []
targets = []
for i in range(len(dataset_test)):
    prediction = model([dataset_test.__getitem__(i)[0]])
    predictions.append(prediction)
    target = dataset_test.__getitem__(i)[1]
    targets.append(dataset_test.__getitem__(i)[1])

In [None]:
predictions = [p[0] for p in predictions]

In [None]:
metric = MeanAveragePrecision(box_format="xyxy", iou_type="bbox", max_detection_thresholds=[100], class_metrics=False)
metric.update(predictions, targets)
pprint(metric.compute())