In [1]:
import os
import sys
import time
import argparse
import numpy as np
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
import pandas as pd
from torch.utils.data import DataLoader
from models.efficientdet import EfficientDet
from models.losses import FocalLoss
from datasets import Spine_dataset, get_augumentation, detection_collate
from utils import EFFICIENTDET
from tqdm import tqdm_notebook as tqdm

In [3]:
resume = None
network = 'efficientdet-d0'
num_epochs = 100
batch_size = 2
num_worker = 4
num_classes = 1
device = [0]
grad_accumulation_steps = 1
learning_rate = 1e-4
momentum = 0.9
weight_decay = 5e-4
gamma = 0.1
save_folder = 'weights/'
image_root = 'boostnet_labeldata/data/'
csv_root = 'boostnet_labeldata/labels/'

In [4]:
if not os.path.exists(save_folder):
    os.mkdir(save_folder)

In [5]:
def prepare_device(device):
    n_gpu_use = len(device)
    n_gpu = torch.cuda.device_count()
    if n_gpu_use > 0 and n_gpu == 0:
        print("Warning: There\'s no GPU available on this machine, training will be performed on CPU.")
        n_gpu_use = 0
    if n_gpu_use > n_gpu:
        print("Warning: The number of GPU\'s configured to use is {}, but only {} are available on this machine.".format(
            n_gpu_use, n_gpu))
        n_gpu_use = n_gpu
    list_ids = device
    device = torch.device('cuda:{}'.format(
        device[0]) if n_gpu_use > 0 else 'cpu')

    return device, list_ids

In [6]:
def get_state_dict(model):
    if type(model) == torch.nn.DataParallel:
        state_dict = model.module.state_dict()
    else:
        state_dict = model.state_dict()
    return state_dict

In [7]:
checkpoint = []
if(resume is not None):
    resume_path = str(resume)
    print("Loading checkpoint: {} ...".format(resume_path))
    checkpoint = torch.load(
        resume, map_location=lambda storage, loc: storage)
    num_classes = checkpoint['num_classes']
    network = checkpoint['network']

In [8]:
corner_df_train = pd.read_csv(csv_root+'training/landmarks.csv',header = None)
filename_df_train = pd.read_csv(csv_root+'training/filenames.csv',header = None)
boxes_df_train = pd.read_csv(csv_root+'training/train.csv')
boxes_df_train.label = 0 # All boxes same class??

In [9]:
corner_df_test = pd.read_csv(csv_root+'test/landmarks.csv',header = None)
filename_df_test = pd.read_csv(csv_root+'test/filenames.csv',header = None)
boxes_df_test = pd.read_csv(csv_root+'test/test.csv')
boxes_df_test.label = 0

In [10]:
train_dataset = Spine_dataset.SPINEDetection(image_root,boxes_df_train,corner_df_train,filename_df_train,transform=get_augumentation('train'))

In [11]:
test_dataset = Spine_dataset.SPINEDetection(image_root,boxes_df_test,corner_df_test,filename_df_test,transform=get_augumentation('test'),image_set='test')

In [12]:
train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              num_workers=num_worker,
                              shuffle=True,
                              collate_fn=detection_collate,
                              pin_memory=True)

In [13]:
test_dataloader = DataLoader(test_dataset,
                              batch_size=batch_size,
                              num_workers=num_worker,
                              shuffle=False,
                              collate_fn=detection_collate,
                              pin_memory=True)

In [14]:
for idx, (images, annotations, corners) in enumerate(train_dataloader):
    print(idx ,images.shape, annotations.shape, corners.shape)
    break

0 torch.Size([2, 3, 1408, 768]) torch.Size([2, 17, 5]) torch.Size([2, 17, 8])


In [15]:
model = EfficientDet(num_classes=num_classes,
                     network=network,
                     W_bifpn=EFFICIENTDET[network]['W_bifpn'],
                     D_bifpn=EFFICIENTDET[network]['D_bifpn'],
                     D_class=EFFICIENTDET[network]['D_class'],
                     )

Loaded pretrained weights for efficientnet-b0


In [16]:
if(resume is not None):
    model.load_state_dict(checkpoint['state_dict'])
device, device_ids = prepare_device(device)
model = model.to(device)
if(len(device_ids) > 1):
    model = torch.nn.DataParallel(model, device_ids=device_ids)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, patience=3, verbose=True)
criterion = FocalLoss()

In [17]:
model.train()
for epoch in range(num_epochs):
    print("{} epoch: \t start training....".format(epoch))
    
    start = time.time()
    result = {}
    total_loss = []
    corner_losses = []
    bbox_losses = []
    cls_losses = []
    optimizer.zero_grad()
    total_batches = len(train_dataloader)
    tk0 = tqdm(train_dataloader, total=total_batches)
    for idx, (images, annotations_bboxes, annotations_corners) in enumerate(tk0):
        images = images.to(device)
        annotations_bboxes = annotations_bboxes.to(device)
        annotations_corners = annotations_corners.to(device)
        classification, regression, corners, anchors = model(images)
        classification_loss, regression_loss, corner_loss= criterion(
            classification, regression, corners, anchors, annotations_bboxes, annotations_corners)
        classification_loss = classification_loss.mean()
        regression_loss = regression_loss.mean()
        corner_loss = corner_loss.mean()
        loss = classification_loss + regression_loss + corner_loss
        if bool(loss == 0):
            print('loss equal zero(0)')
            continue
        loss.backward()
        if (idx+1) % grad_accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            optimizer.zero_grad()
        total_loss.append(loss.item())
        corner_losses.append(corner_loss.item())
        bbox_losses.append(regression_loss.item())
        cls_losses.append(classification_loss.item())
        tk0.set_postfix(loss=(np.mean(total_loss)))
    result = {
        'time': time.time() - start,
        'loss': np.mean(total_loss),
        'corner_loss': np.mean(corner_losses),
        'bbox_loss': np.mean(bbox_losses),
        'cls_loss': np.mean(cls_losses)
    }
    for key, value in result.items():
        print('    {:15s}: {}'.format(str(key), value)) 
        
    start = time.time()
    result = {}
    total_loss = []
    corner_losses = []
    bbox_losses = []
    cls_losses = []
    optimizer.zero_grad()
    total_batches = len(test_dataloader)
    tk0 = tqdm(test_dataloader, total=total_batches)
    for idx, (images, annotations_bboxes, annotations_corners) in enumerate(tk0):
        images = images.to(device)
        annotations_bboxes = annotations_bboxes.to(device)
        annotations_corners = annotations_corners.to(device)
        classification, regression, corners, anchors = model(images)
        classification_loss, regression_loss, corner_loss= criterion(
            classification, regression, corners, anchors, annotations_bboxes, annotations_corners)
        classification_loss = classification_loss.mean()
        regression_loss = regression_loss.mean()
        corner_loss = corner_loss.mean()
        loss = classification_loss + regression_loss + corner_loss
        if bool(loss == 0):
            print('loss equal zero(0)')
            continue
        loss.backward()
        if (idx+1) % grad_accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            optimizer.zero_grad()
        total_loss.append(loss.item())
        corner_losses.append(corner_loss.item())
        bbox_losses.append(regression_loss.item())
        cls_losses.append(classification_loss.item())
        tk0.set_postfix(loss=(np.mean(total_loss)))
    result = {
        'time': time.time() - start,
        'loss': np.mean(total_loss),
        'corner_loss': np.mean(corner_losses),
        'bbox_loss': np.mean(bbox_losses),
        'cls_loss': np.mean(cls_losses)
    }
    for key, value in result.items():
        print('    {:15s}: {}'.format(str(key), value))
    scheduler.step(np.mean(total_loss))

    arch = type(model).__name__
    state = {
        'arch': arch,
        'num_class': num_classes,
        'network': network,
        'state_dict': get_state_dict(model)
    }
    torch.save(
        state, './weights/checkpoint_{}_{}.pth'.format(network, epoch))
state = {
    'arch': arch,
    'num_class': num_classes,
    'network': network,
    'state_dict': get_state_dict(model)
}
torch.save(state, './weights/Final_{}.pth'.format(network))

0 epoch: 	 start training....


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  del sys.path[0]


HBox(children=(FloatProgress(value=0.0, max=241.0), HTML(value='')))


    time           : 127.09938168525696
    loss           : 61446.87183123703
    corner_loss    : 61440.595016289546
    bbox_loss      : 1.797781619046239
    cls_loss       : 4.479049701413673


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=64.0), HTML(value='')))

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/naamii-server2/EfficientDet.Pytorch/datasets/Spine_dataset.py", line 41, in __getitem__
    augmentation = self.transform(**annotation)
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/core/composition.py", line 174, in __call__
    p.preprocess(data)
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/core/utils.py", line 63, in preprocess
    data[data_name] = self.check_and_convert(data[data_name], rows, cols, direction="to")
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/core/utils.py", line 71, in check_and_convert
    return self.convert_to_albumentations(data, rows, cols)
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/augmentations/bbox_utils.py", line 51, in convert_to_albumentations
    return convert_bboxes_to_albumentations(data, self.params.format, rows, cols, check_validity=True)
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/augmentations/bbox_utils.py", line 303, in convert_bboxes_to_albumentations
    return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes]
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/augmentations/bbox_utils.py", line 303, in <listcomp>
    return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes]
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/augmentations/bbox_utils.py", line 251, in convert_bbox_to_albumentations
    check_bbox(bbox)
  File "/home/naamii-server2/anaconda3/envs/deeplearning-pytorch/lib/python3.7/site-packages/albumentations/augmentations/bbox_utils.py", line 330, in check_bbox
    "to be in the range [0.0, 1.0], got {value}.".format(bbox=bbox, name=name, value=value)
ValueError: Expected x_max for bbox (0.6532399299474606, 0.4307944307944308, 1.0035026269702276, 0.49221949221949224, 0) to be in the range [0.0, 1.0], got 1.0035026269702276.
