## Made with guiding "Finetuning from a pretrained model"
https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html#finetuning-from-a-pretrained-model

In [1]:
import sys
sys.path.append("../")

from src.detection.engine import train_one_epoch, evaluate

In [2]:
import os
import numpy as np
import pandas as pd
import random
import torch
from PIL import Image
from typing import Sequence
import torch
import torchvision
from torchvision.models.detection import RetinaNet
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torchvision.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
from torchvision.transforms import ToTensor, RandomRotation, functional, Compose
from torchvision.ops.feature_pyramid_network import LastLevelP6P7

import logging

In [3]:
PATH = 'C:\\Users\\Diego\\Desktop\\TFI-Cazcarra'
ELEMENT_TO_TRAIN = "diagramas"

IMAGES_DIR = f"{PATH}\\data\\imagenes_diagramas"
CLASSES_CSV = f"{PATH}/data/csv/classes_{ELEMENT_TO_TRAIN}.csv"
TRAIN_CSV = f"{PATH}/data/csv/train_{ELEMENT_TO_TRAIN}.csv"
TEST_CSV = f"{PATH}/data/csv/test_{ELEMENT_TO_TRAIN}.csv"
VAL_CSV = f"{PATH}/data/csv/val_{ELEMENT_TO_TRAIN}.csv"

In [4]:
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)
val_df = pd.read_csv(VAL_CSV)

In [5]:
from sklearn.preprocessing import LabelEncoder

classes = pd.read_csv(CLASSES_CSV)
le = LabelEncoder()
le.fit(classes.nombre)

le_num_arr = le.transform(classes.nombre) + 1 #Empezamos por 1 para dejarle el 0 a la 'background class'
le_label_arr = classes.nombre.values

le_dict = {k:v for k,v in zip(le_label_arr,le_num_arr)}

In [6]:
class DiagramasDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, csv_path, le_dict, transform=None, target_transform=None):
        self.images_dir = images_dir
        self.csv = pd.read_csv(csv_path)
        self.images = [i for i in os.listdir(self.images_dir) \
                       if os.path.join(self.images_dir, i) in self.csv['image_path'].unique()]
        self.le_dict = le_dict
        self.transform = transform
        self.target_transform = target_transform
                
    def __getitem__(self, idx):
        image_path = self.images[idx]
        full_image_path = os.path.join(self.images_dir, image_path)
        df = self.csv
        filtered_df = df[df['image_path']==full_image_path]
        number_of_objects = filtered_df.shape[0]
        
        image = Image.open(full_image_path).convert("RGB")
        
        boxes = []
        for xmin, ymin, xmax, ymax in zip(filtered_df['xmin'], filtered_df['ymin'], filtered_df['xmax'], filtered_df['ymax']):
            boxes.append([xmin, ymin, xmax, ymax])       
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        labels = [self.le_dict[label] for label in filtered_df['label'].values]
#         area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = torch.as_tensor(labels, dtype=torch.int64)
#         target['image_id'] = torch.tensor([idx])
#         target['area'] = area
#         target['iscrowd'] = torch.zeros((number_of_objects,), dtype=torch.int64)

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            target = self.target_transform(target)
        
        return image, target
                
    def __len__(self):
        return len(self.images)

In [7]:
class CustomRotateTransform:
    def __init__(self, angles: Sequence[int]):
        self.angles = angles

    def __call__(self, x):
        angle = random.choice(self.angles)
        return functional.rotate(x, int(angle))

In [8]:
def transform_compose(train):
    transforms = []
    transforms.append(ToTensor())
    if train:
        angles = np.arange(0,360,90)
        transforms.append(CustomRotateTransform(angles=angles))
    return Compose(transforms)

In [9]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [10]:
train_dataset = DiagramasDataset(images_dir=IMAGES_DIR, csv_path=TRAIN_CSV, le_dict=le_dict, \
                                 transform=transform_compose(train=True))

BATCH_SIZE = 16
SHUFFLE = True

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, \
                                          shuffle=SHUFFLE, num_workers=0,\
                                          collate_fn=collate_fn)

In [11]:
test_dataset = DiagramasDataset(images_dir=IMAGES_DIR, csv_path=TEST_CSV, le_dict=le_dict, \
                                 transform=transform_compose(train=False))

BATCH_SIZE_TEST = 32

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE_TEST, \
                                          shuffle=SHUFFLE, num_workers=0,\
                                          collate_fn=collate_fn)

In [12]:
backbone = torchvision.models.resnet50(pretrained=True)

trainable_backbone_layers = None

trainable_backbone_layers = _validate_trainable_layers(
        True, trainable_backbone_layers, 5, 3
    )

backbone = _resnet_fpn_extractor(
        backbone, trainable_backbone_layers, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256)
    )

In [13]:
#Cuando se usa RPN, size y aspect_ratios no tienen que estar "double-nested" -> ((x,y,z,w),)
anchor_generator = AnchorGenerator(sizes=(32, 64, 128, 256, 512),\
                                   aspect_ratios=(0.5, 1.0, 2.0)
                                  )

In [14]:
minimum_size = 800
for img in os.listdir(IMAGES_DIR):
    image = np.asarray(Image.open(os.path.join(IMAGES_DIR, img)))
    minimum_size = min(minimum_size, image.shape[0])
    minimum_size = min(minimum_size, image.shape[1])

In [15]:
# Segun la documentacion, 'num_classes (int): number of output classes of the model (including the background).'
model = RetinaNet(backbone, \
                  num_classes=len(classes)+1, \
                  anchor_generator=anchor_generator,  \
                  min_size=minimum_size)

In [16]:
LEARNING_RATE = 1e-4
EPOCHS = 40
DEVICE = 'cpu'
PLATEAU_PATIENCE = 5

In [17]:
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=PLATEAU_PATIENCE)

In [18]:
for epoch in range(1, EPOCHS+1):
    _, losses_reduced = train_one_epoch(model, optimizer, train_dataloader, DEVICE, epoch, print_freq=10)
    lr_scheduler.step(losses_reduced)
    evaluate(model, test_dataloader, device=DEVICE)



Epoch: [1]  [0/2]  eta: 0:00:35  lr: 0.000100  loss: 3.0830 (3.0830)  classification: 1.1280 (1.1280)  bbox_regression: 1.9550 (1.9550)  time: 17.5942  data: 1.6146
Epoch: [1]  [1/2]  eta: 0:00:16  lr: 0.000100  loss: 3.0221 (3.0525)  classification: 1.1280 (1.1281)  bbox_regression: 1.8939 (1.9245)  time: 16.5487  data: 1.1576
Epoch: [1] Total time: 0:00:33 (16.5487 s / it)


KeyError: 'image_id'

In [None]:
# for epoch in range(1, EPOCHS+1):
#     model.train()
#     header = f"Epoch: [{epoch}]"
#     for num, (images, targets) in enumerate(train_dataloader):
#         images = list(image.to(DEVICE) for image in images)
#         targets = [{k: v.to(DEVICE) for k, v in list(t.items())} for t in targets]
        
#         loss_dict = model(images, targets)
#         losses = sum(loss for loss in list(loss_dict.values()))
        
#         optimizer.zero_grad()
#         losses.backward()
#         optimizer.step()

#         lr_scheduler.step(losses)
#         if num % 10 == 0:
#             print(header, f" [] Focal Loss: {losses}")
#         num+=1