In [1]:
# https://github.com/MhLiao/DB
# https://github.com/zonasw/DBNet
# https://github.com/WenmuZhou/DBNet.pytorch
# https://github.com/xuannianz/DifferentiableBinarization
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    device_name = tf.test.gpu_device_name()
    print('Found GPU at:', device_name)
    !nvcc -V
else: print('Using CPU')

Found GPU at: /device:GPU:0
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Nov_30_19:15:10_Pacific_Standard_Time_2020
Cuda compilation tools, release 11.2, V11.2.67
Build cuda_11.2.r11.2/compiler.29373293_0


# Hyperparameters

In [2]:
IGNORE_TEXTS = ['###']
BATCH_SIZE = 10
IMAGE_SIZE = 640
THRESH_MIN = 0.3
THRESH_MAX = 0.7
SHRINK_RATIO = 0.4

In [3]:
MIN_BOX_SCORE = 0.7
MAX_CANDIDATES = 500
IMAGE_SHORT_SIDE = 736
AREA_PRECISION_CONSTRAINT = 0.4
AREA_RECALL_CONSTRAINT = 0.4

In [4]:
TRAIN_PATHS_MAP = './datasets/train.txt'
VALIDATE_PATHS_MAP = './datasets/validate.txt'
LEARNING_RATE = 7e-4
EPOCHS = 100

# Load the data

In [5]:
from loader import AnnotationsImporter, DataGenerator
train_annotations = AnnotationsImporter(TRAIN_PATHS_MAP)
validate_annotations = AnnotationsImporter(VALIDATE_PATHS_MAP)

[GET] Loading from ./datasets/train.txt: 1000 images
[GET] Loading from ./datasets/validate.txt: 500 images


In [6]:
print(
    f'Number of images found:'
    f' {train_annotations.images_count} + {validate_annotations.images_count} ='
    f' {train_annotations.images_count + validate_annotations.images_count}\n'
    f'Number of bounding boxes in all images:'
    f' {train_annotations.all_boxes_count} + {validate_annotations.all_boxes_count} ='
    f' {train_annotations.all_boxes_count + validate_annotations.all_boxes_count}'
)

Number of images found: 1000 + 500 = 1500
Number of bounding boxes in all images: 11886 + 5230 = 17116


In [7]:
train_generator = DataGenerator(
    train_annotations.annotations, 
    BATCH_SIZE, IMAGE_SIZE, IGNORE_TEXTS,  
    THRESH_MIN, THRESH_MAX, SHRINK_RATIO, seed=None
)
validate_generator = DataGenerator(
    validate_annotations.annotations, 
    BATCH_SIZE, IMAGE_SIZE, IGNORE_TEXTS, 
    THRESH_MIN, THRESH_MAX, SHRINK_RATIO, seed=None
)

# Build and compile the model

In [8]:
from model import DBNet
from processor import PostProcessor
post_processor = PostProcessor(min_box_score=MIN_BOX_SCORE, max_candidates=MAX_CANDIDATES)
dbnet = DBNet(post_processor, backbone='ResNet18', freeze_bn=False, k=50)
dbnet.model.summary(line_length=120)

Model: "DBNet"
________________________________________________________________________________________________________________________
 Layer (type)                          Output Shape               Param #       Connected to                            
 image (InputLayer)                    [(None, None, None, 3)]    0             []                                      
                                                                                                                        
 conv1 (Conv2D)                        (None, None, None, 64)     9408          ['image[0][0]']                         
                                                                                                                        
 bn_conv1 (BatchNormalization)         (None, None, None, 64)     256           ['conv1[0][0]']                         
                                                                                                                        
 conv1_relu (Acti

                                                                                                                        
 bn3a_branch2b (BatchNormalization)    (None, None, None, 128)    512           ['res3a_branch2b[0][0]']                
                                                                                                                        
 bn3a_branch1 (BatchNormalization)     (None, None, None, 128)    512           ['res3a_branch1[0][0]']                 
                                                                                                                        
 res3a (Add)                           (None, None, None, 128)    0             ['bn3a_branch2b[0][0]',                 
                                                                                 'bn3a_branch1[0][0]']                  
                                                                                                                        
 res3a_relu (Activation)        

                                                                                                                        
 res4b1_relu (Activation)              (None, None, None, 256)    0             ['res4b1[0][0]']                        
                                                                                                                        
 padding5a_branch2a (ZeroPadding2D)    (None, None, None, 256)    0             ['res4b1_relu[0][0]']                   
                                                                                                                        
 res5a_branch2a (Conv2D)               (None, None, None, 512)    1179648       ['padding5a_branch2a[0][0]']            
                                                                                                                        
 bn5a_branch2a (BatchNormalization)    (None, None, None, 512)    2048          ['res5a_branch2a[0][0]']                
                                

 P2 (ConvBnRelu)                       (None, None, None, 64)     147712        ['tf.__operators__.add_2[0][0]']        
                                                                                                                        
 P3 (Sequential)                       (None, None, None, 64)     147456        ['tf.__operators__.add_1[0][0]']        
                                                                                                                        
 P4 (Sequential)                       (None, None, None, 64)     147456        ['tf.__operators__.add[0][0]']          
                                                                                                                        
 P5 (Sequential)                       (None, None, None, 64)     147456        ['in5[0][0]']                           
                                                                                                                        
 fuse (Concatenate)             

In [9]:
from tensorflow.keras.optimizers import Adam
from losses import DBLoss
dbnet.compile(
    optimizer = Adam(LEARNING_RATE, amsgrad=True), 
    loss = DBLoss(alpha=1.0, beta=10.0, negative_ratio=3.0)
)

# Training

In [10]:
from metrics import TedEvalMetric
from tqdm.notebook import tqdm

tedeval_callback = TedEvalMetric(
    true_annotations = validate_annotations.annotations, 
    ignore_texts = IGNORE_TEXTS, 
    min_box_score = MIN_BOX_SCORE,
    image_short_side = IMAGE_SHORT_SIDE,
    area_precision_constraint = AREA_PRECISION_CONSTRAINT,
    area_recall_constraint = AREA_RECALL_CONSTRAINT,
    progressbar = tqdm,
    level = 'epoch'
)

Reading evaluation images:   0%|          | 0/500 [00:00<?, ?image/s]

In [11]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Stop if no improvement after 5 epochs
early_stopping_callback = EarlyStopping(patience=5, restore_best_weights=True, verbose=1)

# Reduce the learning rate once learning stagnates
reduce_lr_callback = ReduceLROnPlateau(
    monitor = 'val_loss', 
    patience = 2, # Reduce if no improvement after 2 epochs
    min_lr = 1e-6, # Lower bound on the learning rate 
    factor = 0.5, # => new_lr = lr * factor
    verbose = 1
)

In [None]:
%%time
history = dbnet.fit(
    train_generator,
    validation_data = validate_generator,
    validation_steps = len(validate_generator),
    steps_per_epoch = len(train_generator),
    epochs = EPOCHS,
    callbacks = [tedeval_callback, reduce_lr_callback, early_stopping_callback],
    verbose = 1
).history

Epoch 1/100

Predicting bounding boxes:   0%|          | 0/500 [00:00<?, ?image/s]

Calculating TedEval metric:   0%|          | 0/500 [00:00<?, ?image/s]

Evaluation metrics for epoch 1 - precision: 0.0000 - recall: 0.0000 - fmeasure: 0.0000
Epoch 2/100

Predicting bounding boxes:   0%|          | 0/500 [00:00<?, ?image/s]

Calculating TedEval metric:   0%|          | 0/500 [00:00<?, ?image/s]

Evaluation metrics for epoch 2 - precision: 0.0000 - recall: 0.0000 - fmeasure: 0.0000
Epoch 3/100
  8/100 [=>............................] - ETA: 34s - loss: 2.4350