**lib**

In [20]:
#!pip install tensorflow==2.9.1

In [25]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [21]:
def parse_tf_example(tf_example):
    parsed_example = tf.io.parse_single_example(tf_example,
                                               {
                                                   'image': tf.io.FixedLenFeature((), tf.string),
                                                   'dimension': tf.io.VarLenFeature(tf.float32),
                                                   'orientation': tf.io.VarLenFeature(tf.float32),
                                                   'confidence': tf.io.VarLenFeature(tf.float32)
                                               })
    return {"image": parsed_example['image'],
            'dimension': parsed_example['dimension'],
            'orientation': parsed_example['orientation'],
            'confidence': parsed_example['confidence']}

# Data prep

In [27]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.applications import VGG16
import keras.backend as K
from keras.utils import plot_model

train_file = "path_to_train_file/train.tfrecords"
number_bin = 2
batch_size = 8
shuffle_buffer_size = 1000

def preprocess(image_dataset):
    # parse image
    image = tf.io.decode_image(image_dataset["image"])
    image = tf.divide(image, 255)
    # parse dimension
    dimension = image_dataset["dimension"].values
    # parse and reshape orientation
    orientation = image_dataset["orientation"].values
    orientation = tf.reshape(orientation, (number_bin, 2))
    # parse confidence
    confidence = image_dataset["confidence"].values
    return image, (dimension, orientation, confidence)

def generate_dataset(file_path):
    dataset = tf.data.TFRecordDataset(file_path)
    dataset = dataset.map(parse_tf_example, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    #dataset = dataset.shuffle(shuffle_buffer_size, seed=12)
    # split to train and val
    dataset_size = 11195
    train_size = int(dataset_size*0.8)
    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size)
    # create batch
    train_dataset = train_dataset.batch(batch_size)
    train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
    val_dataset = val_dataset.batch(batch_size)
    val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)
    return train_dataset, val_dataset

train_dataset, val_dataset = generate_dataset(train_file)

# Build Model

In [32]:
def orientation_loss(y_true, y_pred):
    # Find number of anchors
    anchors = tf.reduce_sum(tf.square(y_true), axis=2)
    anchors = tf.greater(anchors, tf.constant(0.5))
    anchors = tf.reduce_sum(tf.cast(anchors, tf.float32), 1)

    # Define the loss
    loss = -(y_true[:,:,0]*y_pred[:,:,0] + y_true[:,:,1]*y_pred[:,:,1])
    loss = tf.reduce_sum(loss, axis=1)
    loss = loss / anchors

    return tf.reduce_mean(loss)

def build_model():
    base_model = VGG16(include_top=False, weights="imagenet", input_shape=(224, 224, 3))
    #for layer in base_model.layers:
    #    layer.trainable=False
    x = base_model.get_layer('block5_pool').output
    x = tf.keras.layers.Flatten()(x)
    # dimesion head
    dimension = tf.keras.layers.Dense(512)(x)
    dimension = tf.keras.layers.LeakyReLU(alpha=0.1)(dimension)
    dimension = tf.keras.layers.Dropout(0.5)(dimension)
    dimension = tf.keras.layers.Dense(3)(dimension)
    dimension = tf.keras.layers.LeakyReLU(alpha=0.1, name='dimension')(dimension)

    # orientation head
    orientation = tf.keras.layers.Dense(256)(x)
    orientation = tf.keras.layers.LeakyReLU(alpha=0.1)(orientation)
    orientation = tf.keras.layers.Dropout(0.5)(orientation)
    orientation = tf.keras.layers.Dense(number_bin*2)(orientation)
    orientation = tf.keras.layers.LeakyReLU(alpha=0.1)(orientation)
    orientation = tf.keras.layers.Reshape((number_bin,-1))(orientation)
    orientation = tf.keras.layers.Lambda(K.l2_normalize, name='orientation')(orientation)

    # confidence head
    confidence = tf.keras.layers.Dense(256)(x)
    confidence = tf.keras.layers.LeakyReLU(alpha=0.1)(confidence)
    confidence = tf.keras.layers.Dropout(0.5)(confidence)
    confidence = tf.keras.layers.Dense(number_bin, activation='softmax', name='confidence')(confidence)
    # model
    model = tf.keras.models.Model(inputs=base_model.inputs, outputs=[dimension, orientation, confidence])
    return model

# Train

In [33]:
model = build_model()
# compile model
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001),
              loss={'dimension': 'mean_squared_error', 'orientation': orientation_loss,
                    'confidence': 'binary_crossentropy'},
              loss_weights={'dimension': 1., 'orientation': 1., 'confidence': 1.})
# define callbacks
checkpoint  = tf.keras.callbacks.ModelCheckpoint('/content/weights.hdf5', save_best_only=True, save_weights_only=True)
tensorboard = tf.keras.callbacks.TensorBoard(log_dir='/content/logs/', histogram_freq=0,
                                             write_graph=True, write_images=False)
model.fit(train_dataset, validation_data=val_dataset, epochs=10, callbacks=[checkpoint, tensorboard])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f1119776b90>