In [1]:
import torch
import torch.nn as nn 
import torchvision as tv
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import cv2 
import os 
import torch.utils.data
from tqdm import tqdm
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import albumentations as A 
from albumentations.pytorch import ToTensorV2
import pycocotools
from random import shuffle
import csv
from collections import defaultdict
device = torch.device("cuda") if torch.cuda.is_available() else torch.device('cpu')
from torchmetrics.utilities.imports import _TORCHVISION_GREATER_EQUAL_0_8
with_checkpoint = True

In [2]:
def get_model(num_classes):
    
    model = tv.models.detection.retinanet_resnet50_fpn_v2(num_classes=num_classes, weights_backbone=tv.models.ResNet50_Weights.DEFAULT, trainable_backbone_layers=5)
    return model

In [3]:
model = get_model(num_classes=2)
checkpoint_epoch = 1
if with_checkpoint:
    checkpoint = torch.load('retina2.pth', map_location=device)
    checkpoint_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint["model_state_dict"])
model = model.to(device)

In [4]:
def print_model_size(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    print('model size: {:.3f}MB'.format(size_all_mb))

In [5]:
print_model_size(model)

model size: 138.877MB


In [6]:
def get_train_transforms():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.Blur(
            blur_limit=3,
            p=0.1
        ),
        A.MotionBlur(
            blur_limit=3,
            p=0.1
        ),
        A.MedianBlur(
            blur_limit=3,
            p=0.1,
        ),
        A.ToGray(
            p=0.3,
        ),
        A.RandomBrightnessContrast(
            p=0.3,
        ),
        A.ColorJitter(
            p=0.3,
        ),
        A.RandomGamma(
            p=0.3,
        ),
        ToTensorV2(p=1.0)], 
        bbox_params={
            'format': 'pascal_voc',
            'label_fields': ['labels']
        }
    )
def get_test_transforms():
    return A.Compose([ToTensorV2(p=1)], bbox_params={
            'format': 'pascal_voc',
            'label_fields': ['labels']
        })


In [7]:
class FacesDataset(torch.utils.data.Dataset):
    def __init__(self, dir, dir_list, meta, size, transforms=None):
        super().__init__()
        self.transforms = transforms
        self.dir = dir
        self.width, self.height = size
        self.dir_list = dir_list
        self.meta = meta
    def __len__(self):
        return len(self.dir_list)
    def __getitem__(self, index):
        img_file = self.dir_list[index]
        img = cv2.imread(os.path.join(self.dir, img_file), cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_width = img.shape[1]
        img_height = img.shape[0]
        img = img.astype(np.float32)/255.
        img = cv2.resize(img, (self.width, self.height), interpolation=cv2.INTER_AREA)
        t_img = img
        # t_img = img.transpose((2, 0, 1))
        # t_img = torch.from_numpy(img)
        boxes = []
        labels = []
        with open(os.path.join(self.meta, os.path.splitext(img_file)[0]+'.txt'), 'r') as f:
            meta_lines = f.readlines()
            for meta_line in meta_lines:
                face = meta_line.split(" ")[2:]
                labels.append(1)
                xmin = float(face[0]) / img_width * self.width
                ymin = float(face[1]) / img_height * self.height
                xmax = float(face[2]) / img_width * self.width
                ymax = float(face[3]) / img_height * self.height
                xmax = min(xmax, self.width)
                ymax = min(ymax, self.height)
                boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        if self.transforms:
            sample = self.transforms(image=img, bboxes=target['boxes'], labels=target['labels'])
            t_img = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])
            target['labels'] = torch.Tensor(sample['labels']).int()
        if np.isnan((target['boxes']).numpy()).any() or target['boxes'].shape == torch.Size([0]):
            target['boxes'] = torch.zeros((0, 4), dtype=torch.int64)
        return t_img, target
        

In [8]:
def create_datasets(train_dir,val_dir, meta, size, train_transforms=None, test_transforms=None):
    train_list = [file for file in os.listdir(train_dir) 
                if os.path.splitext(file)[1] in ['.jpg', '.png']]
    val_list = [file for file in os.listdir(val_dir) 
                if os.path.splitext(file)[1] in ['.jpg', '.png']]
    return FacesDataset(train_dir, train_list, meta, size, train_transforms), FacesDataset(val_dir, val_list, meta, size, test_transforms)
    
    
    

In [9]:
train_data, test_data = create_datasets('faces2/images/train', 'faces2/images/val', 'faces2/labels2', (380, 380), get_train_transforms(), get_test_transforms())
print(len(train_data))
print(len(test_data))


13386
3347


  self._set_keys()


In [10]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [11]:
batch_size = 2
train_data_loader = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size, shuffle=True,
    num_workers=1, drop_last=False, collate_fn=collate_fn
)
test_data_loader = torch.utils.data.DataLoader(
    test_data, batch_size=batch_size, shuffle=False,
    num_workers=1, drop_last=False, collate_fn=collate_fn
)

In [12]:
optimizer = torch.optim.AdamW(model.parameters())
if with_checkpoint:
    optimizer.load_state_dict(checkpoint['optimazer_state_dict'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3)

In [13]:
class Average:
    def __init__(self):
        self.current_total = .0
        self.iterations = .0
    def send(self, value):
        self.current_total += value
        self.iterations += 1
    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1. * self.current_total / self.iterations
    def reset(self):
        self.current_total = 0.
        self.iterations = 0.

In [14]:
def train_one_epoch(data_loader, model):
    global train_loss_hist
    model.train()
    prog_bar = tqdm(data_loader, total=len(data_loader))
    for i, data in enumerate(prog_bar):
        optimizer.zero_grad()
        img, target = data 
        target = [{k: v.to(device) for k, v in t.items()} for t in target]
        img = list(i.to(device) for i in img)
        loss_dict = model(img, target)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_hist.send(loss_value)
        losses.backward()
        optimizer.step()
        prog_bar.set_description(f'loss: {loss_value:.4f}')
    return loss_value
        

In [15]:
def validate(valid_data_loader, model):
    model.eval()
    targets, preds = [], []
    prog_bar = tqdm(valid_data_loader, total=len(valid_data_loader))
    for i, data in enumerate(prog_bar):
        img, target = data 
        target = [{k: v.to(device) for k, v in t.items()} for t in target]
        img = list(i.to(device) for i in img)
        with torch.no_grad():
            out = model(img, target)
        for j in range(len(img)):
            true_dict = {}
            pred_dict = {}
            true_dict["boxes"] = target[j]["boxes"].detach().cpu()
            true_dict["labels"] = target[j]["labels"].detach().cpu()
            pred_dict['labels'] = out[j]['labels'].detach().cpu()
            pred_dict['scores'] = out[j]['scores'].detach().cpu()
            pred_dict['boxes'] = out[j]['boxes'].detach().cpu()
            preds.append(pred_dict)
            targets.append(true_dict)
    metric = MeanAveragePrecision()
    metric.update(preds, targets)
    metric_summary = metric.compute()
    return metric_summary
            

In [16]:
class SaveBestModel:
    def __init__(
        self, best_valid_map=float(0)
    ):
        self.best_valid_map = best_valid_map
        
    def __call__(
        self, 
        model, 
        current_valid_map, 
        epoch
    ):
        if current_valid_map > self.best_valid_map:
            self.best_valid_map = current_valid_map
            print(f"\nBEST VALIDATION mAP: {self.best_valid_map}")
            print(f"\nSAVING BEST MODEL FOR EPOCH: {epoch+1}\n")
            torch.save({
                'epoch': epoch+1,
                'model_state_dict': model.state_dict(),
                }, "best_retina2_model.pth")

In [17]:
def save_loss_plot(
    train_loss_list, 
    x_label='iterations',
    y_label='train loss',
    save_name='train_retina2_loss'
):
    figure_1 = plt.figure(figsize=(10, 7), num=1, clear=True)
    train_ax = figure_1.add_subplot()
    train_ax.plot(train_loss_list, color='tab:blue')
    train_ax.set_xlabel(x_label)
    train_ax.set_ylabel(y_label)
    figure_1.savefig(f"{save_name}.png")
    print('SAVING PLOTS COMPLETE...')

def save_mAP(map_05, map):
    figure = plt.figure(figsize=(10, 7), num=1, clear=True)
    ax = figure.add_subplot()
    ax.plot(
        map_05, color='tab:orange', linestyle='-', 
        label='mAP@0.5'
    )
    ax.plot(
        map, color='tab:red', linestyle='-', 
        label='mAP@0.5:0.95'
    )
    ax.set_xlabel('Epochs')
    ax.set_ylabel('mAP')
    ax.legend()
    figure.savefig(f"retina2_map.png")

In [None]:
epoch_num = 100
train_loss_hist = Average()
train_loss_list = []
map_list = []
map_50_list = []
save_best_model = SaveBestModel()
for epoch in range(checkpoint_epoch-1, epoch_num+checkpoint_epoch-1):
    print(f'Epoch: {epoch+1}')
    train_loss_hist.reset()
    train_loss = train_one_epoch(train_data_loader, model)
    metric_summary = validate(test_data_loader, model)
    print(f'Epoch {epoch+1} train loss: {train_loss_hist.value:.3f}')
    print(f'Epoch {epoch+1} mAP@0.50:0.95: {metric_summary["map"]}')
    print(f'Epoch {epoch+1} mAP@0.50: {metric_summary["map_50"]}')
    train_loss_list.append(train_loss)
    map_50_list.append(metric_summary['map_50'])
    map_list.append(metric_summary['map'])
    save_best_model(model, float(metric_summary['map']), epoch)
    save_loss_plot(train_loss_list)
    save_mAP(map_50_list, map_list)
    scheduler.step(float(metric_summary['map']))
    print(f'Lr: {scheduler.get_last_lr()}')

Epoch: 23


loss: 0.1780: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:40<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:56<00:00,  5.64it/s]


Epoch 23 train loss: 0.289
Epoch 23 mAP@0.50:0.95: 0.4941801130771637
Epoch 23 mAP@0.50: 0.8004164695739746

BEST VALIDATION mAP: 0.4941801130771637

SAVING BEST MODEL FOR EPOCH: 23

SAVING PLOTS COMPLETE...
Lr: [0.001]
Epoch: 24


loss: 0.1501: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:43<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:56<00:00,  5.64it/s]


Epoch 24 train loss: 0.286
Epoch 24 mAP@0.50:0.95: 0.49880310893058777
Epoch 24 mAP@0.50: 0.8110334277153015

BEST VALIDATION mAP: 0.49880310893058777

SAVING BEST MODEL FOR EPOCH: 24

SAVING PLOTS COMPLETE...
Lr: [0.001]
Epoch: 25


loss: 0.1654: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:40<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:54<00:00,  5.68it/s]


Epoch 25 train loss: 0.290
Epoch 25 mAP@0.50:0.95: 0.48541197180747986
Epoch 25 mAP@0.50: 0.7993677854537964
SAVING PLOTS COMPLETE...
Lr: [0.001]
Epoch: 26


loss: 0.1344: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:44<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:54<00:00,  5.68it/s]


Epoch 26 train loss: 0.285
Epoch 26 mAP@0.50:0.95: 0.48563352227211
Epoch 26 mAP@0.50: 0.7919703722000122
SAVING PLOTS COMPLETE...
Lr: [0.001]
Epoch: 27


loss: 0.0850: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:42<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:57<00:00,  5.63it/s]


Epoch 27 train loss: 0.283
Epoch 27 mAP@0.50:0.95: 0.4874163866043091
Epoch 27 mAP@0.50: 0.7960348725318909
SAVING PLOTS COMPLETE...
Lr: [0.001]
Epoch: 28


loss: 0.1147: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:47<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:50<00:00,  5.77it/s]


Epoch 28 train loss: 0.283
Epoch 28 mAP@0.50:0.95: 0.48864275217056274
Epoch 28 mAP@0.50: 0.7941462397575378
SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 29


loss: 0.0922: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:50<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:55<00:00,  5.66it/s]


Epoch 29 train loss: 0.227
Epoch 29 mAP@0.50:0.95: 0.529974102973938
Epoch 29 mAP@0.50: 0.8333792090415955

BEST VALIDATION mAP: 0.529974102973938

SAVING BEST MODEL FOR EPOCH: 29

SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 30


loss: 0.1741: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:41<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:47<00:00,  5.82it/s]


Epoch 30 train loss: 0.209
Epoch 30 mAP@0.50:0.95: 0.5292400121688843
Epoch 30 mAP@0.50: 0.8312427997589111
SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 31


loss: 0.1678: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:37<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:47<00:00,  5.82it/s]


Epoch 31 train loss: 0.200
Epoch 31 mAP@0.50:0.95: 0.5305012464523315
Epoch 31 mAP@0.50: 0.8327696919441223

BEST VALIDATION mAP: 0.5305012464523315

SAVING BEST MODEL FOR EPOCH: 31

SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 32


loss: 0.1039: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:40<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:50<00:00,  5.77it/s]


Epoch 32 train loss: 0.193
Epoch 32 mAP@0.50:0.95: 0.5289576053619385
Epoch 32 mAP@0.50: 0.8331999778747559
SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 33


loss: 0.3876: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:34<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:47<00:00,  5.82it/s]


Epoch 33 train loss: 0.187
Epoch 33 mAP@0.50:0.95: 0.5293073058128357
Epoch 33 mAP@0.50: 0.8338761329650879
SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 34


loss: 0.1685: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:42<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:50<00:00,  5.76it/s]


Epoch 34 train loss: 0.182
Epoch 34 mAP@0.50:0.95: 0.5301554799079895
Epoch 34 mAP@0.50: 0.8349409699440002
SAVING PLOTS COMPLETE...
Lr: [0.0001]
Epoch: 35


loss: 0.1525: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:41<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:50<00:00,  5.76it/s]


Epoch 35 train loss: 0.178
Epoch 35 mAP@0.50:0.95: 0.5294625759124756
Epoch 35 mAP@0.50: 0.8336709141731262
SAVING PLOTS COMPLETE...
Lr: [1e-05]
Epoch: 36


loss: 0.4116: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:50<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:49<00:00,  5.78it/s]


Epoch 36 train loss: 0.170
Epoch 36 mAP@0.50:0.95: 0.5298667550086975
Epoch 36 mAP@0.50: 0.8351181745529175
SAVING PLOTS COMPLETE...
Lr: [1e-05]
Epoch: 37


loss: 0.0870: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:51<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:51<00:00,  5.75it/s]


Epoch 37 train loss: 0.168
Epoch 37 mAP@0.50:0.95: 0.5307399034500122
Epoch 37 mAP@0.50: 0.8344210386276245

BEST VALIDATION mAP: 0.5307399034500122

SAVING BEST MODEL FOR EPOCH: 37

SAVING PLOTS COMPLETE...
Lr: [1e-05]
Epoch: 38


loss: 0.2216: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:52<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:49<00:00,  5.78it/s]


Epoch 38 train loss: 0.167
Epoch 38 mAP@0.50:0.95: 0.5297397375106812
Epoch 38 mAP@0.50: 0.8343560099601746
SAVING PLOTS COMPLETE...
Lr: [1e-05]
Epoch: 39


loss: 0.1907: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:43<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:48<00:00,  5.81it/s]


Epoch 39 train loss: 0.166
Epoch 39 mAP@0.50:0.95: 0.5282989740371704
Epoch 39 mAP@0.50: 0.8306371569633484
SAVING PLOTS COMPLETE...
Lr: [1e-05]
Epoch: 40


loss: 0.0855: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:48<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:50<00:00,  5.76it/s]


Epoch 40 train loss: 0.166
Epoch 40 mAP@0.50:0.95: 0.5297796130180359
Epoch 40 mAP@0.50: 0.8335580229759216
SAVING PLOTS COMPLETE...
Lr: [1e-05]
Epoch: 41


loss: 0.5369: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:43<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:47<00:00,  5.83it/s]


Epoch 41 train loss: 0.164
Epoch 41 mAP@0.50:0.95: 0.5290052890777588
Epoch 41 mAP@0.50: 0.8345298171043396
SAVING PLOTS COMPLETE...
Lr: [1.0000000000000002e-06]
Epoch: 42


loss: 0.0555: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:45<00:00,  2.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:48<00:00,  5.79it/s]


Epoch 42 train loss: 0.164
Epoch 42 mAP@0.50:0.95: 0.5291451215744019
Epoch 42 mAP@0.50: 0.8321715593338013
SAVING PLOTS COMPLETE...
Lr: [1.0000000000000002e-06]
Epoch: 43


loss: 0.0547: 100%|████████████████████████████████████████████████████████████████| 6693/6693 [52:43<00:00,  2.12it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1674/1674 [04:48<00:00,  5.80it/s]


Epoch 43 train loss: 0.164
Epoch 43 mAP@0.50:0.95: 0.5288277268409729
Epoch 43 mAP@0.50: 0.8325561881065369
SAVING PLOTS COMPLETE...
Lr: [1.0000000000000002e-06]
Epoch: 44


loss: 0.1425:  73%|██████████████████████████████████████████████▉                 | 4907/6693 [38:38<14:04,  2.11it/s]

In [None]:
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimazer_state_dict': optimizer.state_dict()},'retina2.pth')