In [1]:
# https://github.com/MhLiao/DB
# https://github.com/zonasw/DBNet
# https://github.com/WenmuZhou/DBNet.pytorch
# https://github.com/xuannianz/DifferentiableBinarization
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    device_name = tf.test.gpu_device_name()
    print('Found GPU at:', device_name)
    !nvcc -V
else: print('Using CPU')

Found GPU at: /device:GPU:0
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Nov_30_19:15:10_Pacific_Standard_Time_2020
Cuda compilation tools, release 11.2, V11.2.67
Build cuda_11.2.r11.2/compiler.29373293_0


# Hyperparameters

In [2]:
IGNORE_TEXTS = ['###']
BATCH_SIZE = 4
IMAGE_SIZE = 640
THRESH_MIN = 0.3
THRESH_MAX = 0.7
SHRINK_RATIO = 0.4

In [3]:
MIN_BOX_SCORE = 0.6
MAX_CANDIDATES = 500
IMAGE_SHORT_SIDE = 736
AREA_PRECISION_CONSTRAINT = 0.4
AREA_RECALL_CONSTRAINT = 0.4

In [4]:
TRAIN_PATHS_MAP = './datasets/train.txt'
VALIDATE_PATHS_MAP = './datasets/validate.txt'
LEARNING_RATE = 7e-3
EPOCHS = 50

# Load the data

In [5]:
from loader import AnnotationsImporter, DataGenerator
train_annotations = AnnotationsImporter(TRAIN_PATHS_MAP)
validate_annotations = AnnotationsImporter(VALIDATE_PATHS_MAP)

[GET] Loading from ./datasets/train.txt: 1000 images
[GET] Loading from ./datasets/validate.txt: 500 images


In [6]:
print(
    f'Number of images found:'
    f' {train_annotations.images_count} + {validate_annotations.images_count} ='
    f' {train_annotations.images_count + validate_annotations.images_count}\n'
    f'Number of bounding boxes in all images:'
    f' {train_annotations.all_boxes_count} + {validate_annotations.all_boxes_count} ='
    f' {train_annotations.all_boxes_count + validate_annotations.all_boxes_count}'
)

Number of images found: 1000 + 500 = 1500
Number of bounding boxes in all images: 11886 + 5230 = 17116


In [7]:
train_generator = DataGenerator(
    train_annotations.annotations, 
    BATCH_SIZE, IMAGE_SIZE, IGNORE_TEXTS,  
    THRESH_MIN, THRESH_MAX, SHRINK_RATIO, seed=2022
)
validate_generator = DataGenerator(
    validate_annotations.annotations, 
    BATCH_SIZE, IMAGE_SIZE, IGNORE_TEXTS, 
    THRESH_MIN, THRESH_MAX, SHRINK_RATIO, seed=None
)

# Build and compile the model

In [None]:
from model import DBNet
from processor import PostProcessor
post_processor = PostProcessor(min_box_score=MIN_BOX_SCORE, max_candidates=MAX_CANDIDATES)
dbnet = DBNet(post_processor, backbone='ResNet18', k=50)

In [None]:
from tensorflow.keras.optimizers import Adam
from losses import DBLoss
dbnet.compile(optimizer=Adam(LEARNING_RATE), loss=DBLoss())
dbnet.model.summary(line_length=120)

# Training

In [None]:
from metrics import TedEvalMetric
from tqdm.notebook import tqdm

tedeval_callback = TedEvalMetric(
    true_annotations = validate_annotations.annotations, 
    ignore_texts = IGNORE_TEXTS, 
    min_box_score = MIN_BOX_SCORE,
    image_short_side = IMAGE_SHORT_SIDE,
    area_precision_constraint = AREA_PRECISION_CONSTRAINT,
    area_recall_constraint = AREA_RECALL_CONSTRAINT,
    progressbar = tqdm,
    level = 'train'
)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Stop if no improvement after 5 epochs
early_stopping_callback = EarlyStopping(patience=5, restore_best_weights=True, verbose=1)

# Reduce the learning rate once learning stagnates
reduce_lr_callback = ReduceLROnPlateau(
    monitor = 'val_loss', 
    patience = 2, # Reduce if no improvement after 2 epochs
    min_lr = 1e-6, # Lower bound on the learning rate 
    factor = 0.5, # => new_lr = lr * factor
    verbose = 1
)

In [None]:
%%time
history = dbnet.fit(
    train_generator,
    validation_data = validate_generator,
    validation_steps = len(validate_generator),
    steps_per_epoch = len(train_generator),
    epochs = EPOCHS,
    callbacks = [tedeval_callback, reduce_lr_callback, early_stopping_callback],
    verbose = 1
).history