In [1]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import (
    Conv1D,
    BatchNormalization,
    MaxPool1D,
    Dropout,
    ReLU,
    Conv2DTranspose,
    Input,
    Add,
    Softmax,
    Layer
)
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras import Model
import h5py
import numpy as np

In [2]:
class Conv1DTranspose(Layer):
    def __init__(self, filters, kernel_size, stride, **kwargs):
        self.output_channels = filters
        self.kernel_size = (kernel_size, 1)
        self.strides = (stride, 1)
        self.kwargs = kwargs
        super(Conv1DTranspose, self).__init__()

    def build(self, input_shape):
        self.layer = Conv2DTranspose(
            filters=self.output_channels,
            kernel_size=self.kernel_size,
            strides=self.strides,
            **self.kwargs
        )
        self.layer.build((input_shape[0], input_shape[1], 1, input_shape[2]))
        self._trainable_weights = self.layer.trainable_weights
        super(Conv1DTranspose, self).build(input_shape)

    def call(self, x):
        data = K.expand_dims(x, axis=2)
        data = self.layer(data)
        data = K.squeeze(data, axis=2)
        return data


In [None]:
# Do not run this,
# For some reason, it adds an extra dimension to the tensor
# twice instead of once
class Conv1DTranspose(Conv2DTranspose):
    def __init__(self, filters, kernel_size, stride, **kwargs):
        self.output_channels = filters
        super(Conv1DTranspose, self).__init__(
            filters=filters,
            kernel_size=(kernel_size, 1),
            strides=(stride, 1),
            **kwargs
        )

    def build(self, input_shape):
        super(Conv1DTranspose, self).build((input_shape[0], input_shape[1], 1, input_shape[2]))

    def __call__(self, x):
        data = K.expand_dims(x, axis=2)
        data = super(Conv1DTranspose, self).__call__(data)
        data = K.squeeze(data, axis=2)
        return data
        
    def call(self, x):
        x = super(Conv1DTranspose, self).call(x)
        return x


# FCSN Model

In [3]:
input_size = (320, 1024)   # Tensorflow uses the Channels-last format by default
n_classes  = 2

inputs = Input(input_size)

# Block 1
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(inputs)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPool1D(pool_size=2, strides=2, padding="SAME")(x)

# Block 2
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPool1D(pool_size=2, strides=2, padding="SAME")(x)

# Block 3
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=1024, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPool1D(pool_size=2, strides=2, padding="SAME")(x)

# Block 4
x = Conv1D(filters=2048, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=2048, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=2048, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPool1D(pool_size=2, strides=2, padding="SAME")(x)

pool4 = x

# Block 5
x = Conv1D(filters=2048, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=2048, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv1D(filters=2048, kernel_size=3, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = MaxPool1D(pool_size=2, strides=2, padding="SAME")(x)

# Block 6
x = Conv1D(filters=4096, kernel_size=1, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Dropout(0.5)(x)

# Block 7
x = Conv1D(filters=4096, kernel_size=1, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Dropout(0.5)(x)

# Block 8
x = Conv1D(filters=n_classes, kernel_size=1, padding="SAME")(x)
x = BatchNormalization()(x)
x = ReLU()(x)

x = Conv1DTranspose(filters=n_classes, kernel_size=4, padding="SAME", stride=2, use_bias=False)(x)

upscore = x

score_pool = Conv1D(filters=n_classes, kernel_size=1, padding="SAME")(pool4)
score_pool = BatchNormalization()(score_pool)

x = Add()([upscore, score_pool])
x = Conv1DTranspose(filters=n_classes, kernel_size=16, padding="SAME", stride=16, use_bias=False)(x)

outputs = Softmax()(x)

model = Model(inputs=inputs, outputs=outputs, name="FCSN")

In [None]:
print(x.shape)

In [None]:
model.summary()

In [4]:
model.compile(
    loss=tf.keras.losses.categorical_crossentropy, 
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

# Data Pipeline

In [5]:
filename = "fcsn_tvsum.h5"
mode = "r"

with h5py.File(filename, mode) as f:
    data_list = []
    label_list = []
    for video_str in  f.keys():
        record = f[video_str]
        data_pt = record['feature'][()]
        data_list.append(data_pt)
        label = record['label'][()]
        label = np.int32(label)
        label = tf.one_hot(label, depth=2, dtype=tf.float32)
        label_list.append(label)

ds = tf.data.Dataset.from_tensor_slices((data_list, label_list))

ds = ds.shuffle(50)
train_ds = ds.take(35)
test_ds = ds.skip(35)

train_ds = train_ds.batch(5).shuffle(35, reshuffle_each_iteration=True).repeat()
test_ds = test_ds.batch(5).repeat(1)


# Callback functions for logging purposes

In [10]:
class AccuracyHistory(Callback):
    
    def on_train_begin(self, logs={}):
        self.acc = []
    
    def on_epoch_end(self, batch, logs={}):
        self.acc.append(logs.get('val_accuracy'))

# Fitting the generators to train the model

In [11]:
ckpt_path = "./"

history = AccuracyHistory()
cp_callback = ModelCheckpoint(ckpt_path, verbose=1, save_best_only=True, monitor='val_accuracy')

model.fit(
    train_ds,
    epochs=10,
    steps_per_epoch=10,
    callbacks=[history, cp_callback],
    validation_data=test_ds
)



Train for 10 steps, validate for 3 steps
Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.28313, saving model to ./
INFO:tensorflow:Assets written to: ./assets
Epoch 2/10


KeyboardInterrupt: 