# SVHN Competition

## Preparations

In [1]:
#!/usr/bin/env python3
# TEAM MEMBERS:
# Antonio Krizmanic - 2b193238-8e3c-11ec-986f-f39926f24a9c
# Janek Putz - e31a3cae-8e6c-11ec-986f-f39926f24a9c
import argparse
import datetime
import logging
import os
import re
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default
logger = logging.getLogger('SVHN')

import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa

import bboxes_utils
import efficient_net
from svhn_dataset import SVHN

In [38]:
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
parser.add_argument("--epochs", default=1, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")

parser.add_argument("--logging_level", default="info", type=str, help="Logging level")
parser.add_argument("--fine_tuning", default=False, type=bool, help="Optionally fine tune the efficient net core.")
parser.add_argument("--image_size", default=224, type=int, help="Width and height to resize image to uniform size.")
parser.add_argument("--iou_threshold", default=0.5, type=float, help="Threshold to assign anchors to gold bboxes.")

parser.add_argument("--batch_norm", default=True, type=bool, help="Batch normalization of conv. layers.")
parser.add_argument("--l2", default=0.00, type=float, help="L2 regularization.")
parser.add_argument("--decay", default="None", type=str, help="Learning decay rate type")
parser.add_argument("--learning_rate", default=0.001, type=float, help="Initial learning rate.")
parser.add_argument("--learning_rate_final", default=0.0001, type=float, help="Final learning rate.")

# todo: try batch=1 without resizing


args = parser.parse_args([] if "__file__" not in globals() else None)

# Create logdir name
args.logdir = os.path.join("logs", "{}-{}-{}".format(
    os.path.basename(globals().get("__file__", "notebook")),
    datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
    ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
))

In [6]:
# Fix random seeds and threads
np.random.seed(args.seed)
tf.random.set_seed(args.seed)
tf.config.threading.set_inter_op_parallelism_threads(args.threads)
tf.config.threading.set_intra_op_parallelism_threads(args.threads)

## Load Dataset

In [76]:
# Load the data
svhn = SVHN()

# create anchors
# TODO: create different ratios and sizes
anchors = np.array([[-1, -1, -1, -1]])
for T in range(0, 85, 14):
    for L in range(0, 99, 7):
        anchors = np.append(anchors, [[T, L, T + 28, L + 14]], axis=0)
anchors = np.delete(anchors, 0, 0)

def create_dataset(dataset: tf.data.Dataset, training: bool) -> tf.data.Dataset:

    def prepare_data(example):
        example["classes"] = tf.cast(example["classes"], dtype=tf.int32)
        # resizing
        #print(type(example["image"]))
        #print(tf.shape(example["image"])[0])
        example["bboxes"] = example["bboxes"] * (tf.cast(tf.shape(example["image"])[0], tf.float32) / args.image_size)
        resized_image = tf.image.resize(example["image"], [args.image_size, args.image_size])
        # 
        anchor_classes, anchor_bboxes = tf.numpy_function(
            bboxes_utils.bboxes_training, # name
            [anchors, example["classes"], example["bboxes"], args.iou_threshold], # param values
            (tf.int32, tf.float32) # return types
        )
        
        output = {
            "classes": example["classes"],
            "bboxes": tf.ensure_shape(anchor_bboxes, [len(anchors), 4])
        }
        return resized_image, output

    dataset = dataset.map(prepare_data)
    if training:
        dataset = dataset.shuffle(buffer_size=10000, seed=args.seed)
    dataset = dataset.batch(args.batch_size)

    return dataset

In [77]:
train = create_dataset(svhn.train, True)
dev = create_dataset(svhn.dev, False)
test = create_dataset(svhn.test, False)

## Create Model

### Load EfficientNet

In [44]:
# change dynamic_input_shape in case of batching with size 1 and different sizes
efficientnet_b0 = efficient_net.pretrained_efficientnet_b0(include_top=False, dynamic_input_shape=False)
efficientnet_b0.trainable = args.fine_tuning
for o in efficientnet_b0.outputs:
    print(o)

KerasTensor(type_spec=TensorSpec(shape=(None, 1280), dtype=tf.float32, name=None), name='avg_pool/Mean:0', description="created by layer 'avg_pool'")
KerasTensor(type_spec=TensorSpec(shape=(None, 7, 7, 1280), dtype=tf.float32, name=None), name='top_activation/IdentityN:0', description="created by layer 'top_activation'")
KerasTensor(type_spec=TensorSpec(shape=(None, 14, 14, 112), dtype=tf.float32, name=None), name='block5c_add/add:0', description="created by layer 'block5c_add'")
KerasTensor(type_spec=TensorSpec(shape=(None, 28, 28, 40), dtype=tf.float32, name=None), name='block3b_add/add:0', description="created by layer 'block3b_add'")
KerasTensor(type_spec=TensorSpec(shape=(None, 56, 56, 24), dtype=tf.float32, name=None), name='block2b_add/add:0', description="created by layer 'block2b_add'")
KerasTensor(type_spec=TensorSpec(shape=(None, 112, 112, 16), dtype=tf.float32, name=None), name='block1a_project_bn/FusedBatchNormV3:0', description="created by layer 'block1a_project_bn'")


### Parameterization

In [42]:
if args.l2:
    regularizer = tf.keras.regularizers.L2(args.l2)
else:
    regularizer = None

In [32]:
def bn_relu(input):
    if args.batch_norm:
        return tf.keras.layers.ReLU()(tf.keras.layers.BatchNormalization()(input))
    else:
        return tf.keras.layers.ReLU()(input)

In [43]:
if not args.decay or args.decay in ["None", "none"]:
    learning_rate = args.learning_rate
else:
    decay_steps = (len(train) / args.batch_size) * args.epochs
    if args.decay == 'linear':
        learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(decay_steps=decay_steps,
                                                                      initial_learning_rate=args.learning_rate,
                                                                      end_learning_rate=args.learning_rate_final,
                                                                      power=1.0)
    elif args.decay == 'exponential':
        decay_rate = args.learning_rate_final / args.learning_rate
        learning_rate = tf.optimizers.schedules.ExponentialDecay(decay_steps=decay_steps,
                                                                 decay_rate=decay_rate,
                                                                 initial_learning_rate=args.learning_rate)
    elif args.decay == 'cosine':
        learning_rate = tf.keras.optimizers.schedules.CosineDecay(decay_steps=decay_steps,
                                                                  initial_learning_rate=args.learning_rate)
    else:
        raise NotImplementedError("Use only 'linear', 'exponential' or 'cosine' as LR scheduler")

### Compose Model

In [79]:
inputs = tf.keras.Input(shape=(args.image_size, args.image_size, 3))

pyramid_output = efficientnet_b0(inputs)[2]

# classification head (TODO: try more layers)
classes_conv1 = bn_relu(tf.keras.layers.Conv2D(256, 3, 1, "same", kernel_regularizer=regularizer)(pyramid_output))
classes_conv2 = bn_relu(tf.keras.layers.Conv2D(256, 3, 1, "same", kernel_regularizer=regularizer)(classes_conv1))
classes_output = tf.keras.layers.Dense(SVHN.LABELS, activation=tf.nn.softmax, kernel_regularizer=regularizer)(classes_conv2)

# bbox regression head (TODO: try more layers)
bbox_conv1 = bn_relu(tf.keras.layers.Conv2D(256, 3, 1, "same", kernel_regularizer=regularizer)(pyramid_output))
bbox_conv2 = bn_relu(tf.keras.layers.Conv2D(256, 3, 1, "same", kernel_regularizer=regularizer)(bbox_conv1))
bbox_output = tf.keras.layers.Dense(4, activation=tf.nn.relu)(bbox_conv2)

outputs = {
    "classes": classes_output,
    "bboxes": bbox_output
}

model = tf.keras.models.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
    loss={  # keys fit to output dict
        "classes": tf.keras.losses.SparseCategoricalCrossentropy(),
        "bboxes": tfa.losses.SigmoidFocalCrossEntropy(alpha=0.25, gamma=2)
    },
    metrics={
        "classes": [tf.keras.metrics.SparseCategoricalAccuracy("accuracy")],
        "bboxes": []  # TODO
    }
)

model.summary()

Model: "model_18"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_24 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 efficientnet-b0 (Functional)   [(None, 1280),       4049564     ['input_24[0][0]']               
                                 (None, 7, 7, 1280)                                               
                                , (None, 14, 14, 11                                               
                                2),                                                               
                                 (None, 28, 28, 40)                                        

### Train

In [80]:
best_checkpoint_path = os.path.join(args.logdir, "cags_classification.ckpt")
model.fit(
    train, batch_size=args.batch_size, epochs=args.epochs, validation_data=dev,
    callbacks=[tf.keras.callbacks.TensorBoard(args.logdir, histogram_freq=1, update_freq=100, profile_batch=0),
               tf.keras.callbacks.ModelCheckpoint(filepath=best_checkpoint_path, save_weights_only=False,
                                                  monitor='val_accuracy', mode='max', save_best_only=True)],
)

ValueError: in user code:

    File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\tensorflow_addons\utils\keras_utils.py", line 61, in call  *
        return self.fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\tensorflow_addons\losses\focal_loss.py", line 121, in sigmoid_focal_crossentropy  *
        ce = K.binary_crossentropy(y_true, y_pred, from_logits=from_logits)
    File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\keras\backend.py", line 5262, in binary_crossentropy
        bce = target * tf.math.log(output + epsilon())

    ValueError: Dimensions must be equal, but are 105 and 14 for '{{node mul}} = Mul[T=DT_FLOAT](y_true, Log)' with input shapes: [?,105,4], [?,14,14,4].


In [51]:
test_test_set = test.take(1)
prediction = model.predict(test_test_set)
print(prediction)

InvalidArgumentError: Graph execution error:

ValueError: attempt to get argmax of an empty sequence
Traceback (most recent call last):

  File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\tensorflow\python\ops\script_ops.py", line 271, in __call__
    ret = func(*args)

  File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "C:\Users\janek\Development\Git\Prag\deep-learning-lecture\06_object_detection\bboxes_utils.py", line 165, in bboxes_training
    max_iou_idx = np.argmax(iou_comparisons)

  File "<__array_function__ internals>", line 5, in argmax

  File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\numpy\core\fromnumeric.py", line 1195, in argmax
    return _wrapfunc(a, 'argmax', axis=axis, out=out)

  File "C:\Users\janek\anaconda3\envs\dl-lecture\lib\site-packages\numpy\core\fromnumeric.py", line 57, in _wrapfunc
    return bound(*args, **kwds)

ValueError: attempt to get argmax of an empty sequence


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_predict_function_62565]

## Prediction

In [None]:
# TODO: use tf.image.non_max_suppression here? I think so w.r.t slide 14

In [None]:
# Generate test set annotations, but in `args.logdir` to allow parallel execution.
os.makedirs(args.logdir, exist_ok=True)
with open(os.path.join(args.logdir, "svhn_competition.txt"), "w", encoding="utf-8") as predictions_file:
    # TODO: Predict the digits and their bounding boxes on the test set.
    # Assume that for a single test image we get
    # - `predicted_classes`: a 1D array with the predicted digits,
    # - `predicted_bboxes`: a [len(predicted_classes), 4] array with bboxes;
    for predicted_classes, predicted_bboxes in ...:
        output = []
        for label, bbox in zip(predicted_classes, predicted_bboxes):
            output += [label] + list(bbox)
        print(*output, file=predictions_file)