In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

from keras import backend as K
from keras.engine.topology import Layer
from keras.models import Model
from keras.layers import Input, Dense, Dropout
from keras.callbacks import ModelCheckpoint
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.callbacks import Callback, ReduceLROnPlateau

import tensorflow as tf
import keras.backend.tensorflow_backend as tfb

from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

import numpy as np



Using TensorFlow backend.


In [2]:
def weighted_binary_crossentropy(target, output):
    """
    Weighted binary crossentropy between an output tensor
    and a target tensor. POS_WEIGHT is used as a multiplier
    for the positive targets.

    Combination of the following functions:
    * keras.losses.binary_crossentropy
    * keras.backend.tensorflow_backend.binary_crossentropy
    * tf.nn.weighted_cross_entropy_with_logits
    """
    # transform back to logits
    _epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype)
    output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
    output = tf.log(output / (1 - output))
    # compute weighted loss
    loss = tf.nn.weighted_cross_entropy_with_logits(targets=target,
                                                    logits=output,
                                                    pos_weight=POS_WEIGHT)
    return tf.reduce_mean(loss, axis=-1)


class FPRMetrics(Callback):
    def __init__(self, v_x, v_y):
        self.validation_x = v_x
        self.validation_y = v_y
        self.best_metrics = {"f1": 0, "p": 0, "r": 0, "auc": 0}

    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict = (np.asarray(self.model.predict(self.validation_x))).round()
        val_targ = self.validation_y
        _val_f1 = f1_score(val_targ, val_predict, average='micro')
        _val_recall = recall_score(val_targ, val_predict, average='micro')
        _val_precision = precision_score(val_targ, val_predict, average='micro')
        _val_roc_auc = roc_auc_score(val_targ, val_predict, average='micro')
        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)
        if _val_f1 > self.best_metrics['f1']:
            self.best_metrics = {"f1": _val_f1, "p": _val_precision, "r": _val_recall, "auc": _val_roc_auc}
        print("\n current - val_f1: %f - val_precision: %f - val_recall %f, roc auc - %f" % (_val_f1, _val_precision, _val_recall, _val_roc_auc))
        print("\n best - val_f1: %f - val_precision: %f - val_recall %f, roc auc - %f" % (self.best_metrics['f1'], self.best_metrics['p'], self.best_metrics['r'], self.best_metrics['auc']))
        return


class RecallPrecisionAtK(Callback):
    def __init__(self, v_x, v_y, k):
        self.validation_x = v_x
        self.validation_y = v_y
        self.k = k

    def on_train_begin(self, logs={}):
        self.val_precision_at_k = []
        self.val_recall_at_k = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict = np.asarray(self.model.predict(self.validation_x))
        val_targ = self.validation_y

        predictions_at_k = np.argpartition(-val_predict, self.k, axis=-1)[:self.k]

        recommended_at_k = np.round(val_predict[predictions_at_k])
        relevant_at_k = np.round(val_targ[predictions_at_k])

        recommended_and_relevant = np.sum(recommended_at_k * relevant_at_k)

        total_relevant = np.sum(np.round(val_targ))

        recall_at_k = recommended_and_relevant / total_relevant
        precision_at_k = recommended_and_relevant / np.sum(recommended_at_k)

        self.val_precision_at_k.append(precision_at_k)
        self.val_recall_at_k.append(recall_at_k)
        print("\n - recall%d: %f - precision%d: %f\n" % (self.k, recall_at_k, self.k, precision_at_k))


class Baseline(Layer):
    def __init__(self, **kwargs):
        super(Baseline, self).__init__(**kwargs)

    def build(self, input_shape):
        super(Baseline, self).build(input_shape)

    def call(self, x):
        y = K.mean(x, axis=1)
        return y

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2])


class ContextGating(Layer):
    def __init__(self, **kwargs):
        super(ContextGating, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='W',
                                 shape=(input_shape[1], input_shape[1]),
                                 initializer='glorot_normal',
                                 regularizer=l2(),
                                 trainable=True)
        self.b = self.add_weight(name='b',
                                 shape=(input_shape[1],),
                                 initializer='zeros',
                                 trainable=True)
        super(ContextGating, self).build(input_shape)

    def call(self, x):
        y = K.dot(x, self.W)  # (b, n) x (n, n) = (b, n)
        y = y + self.b  # (b, n) + (n,)
        y = K.tanh(y)  # (b, n)
        y = y * x  # (b, n) * (b, n)
        return y

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1])

class SqueezeAttentionShort(Layer):
    def __init__(self, squeeze_dimension, **kwargs):
        super(SqueezeAttentionShort, self).__init__(**kwargs)
        self.squeeze_dimension = squeeze_dimension

    def build(self, input_shape):
        self.q0 = self.add_weight(name='q0',
                                  shape=(self.squeeze_dimension, 1),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.sb = self.add_weight(name='squeeze_block',
                                  shape=(input_shape[2], self.squeeze_dimension),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        super(SqueezeAttentionShort, self).build(input_shape)

    def call(self, x):
    # (b, k, n) x (n, 1) 
        sq = K.dot(x, self.sb)
        r0 = K.dot(sq, self.q0)  # (b, k, 1)
        r0 = K.squeeze(r0, -1)
        r0 = K.softmax(r0)  # (b, k, 1)
        r0 = K.batch_dot(r0, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        return r0  # (b, n)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2])

class SqueezeAttentionShortSqueeze(Layer):
    def __init__(self, squeeze_dimension, **kwargs):
        super(SqueezeAttentionShortSqueeze, self).__init__(**kwargs)
        self.squeeze_dimension = squeeze_dimension

    def build(self, input_shape):
        self.q0 = self.add_weight(name='q0',
                                  shape=(self.squeeze_dimension, 1),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.sb = self.add_weight(name='squeeze_block',
                                  shape=(input_shape[2], self.squeeze_dimension),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        super(SqueezeAttentionShortSqueeze, self).build(input_shape)

    def call(self, x):
    # (b, k, n) x (n, 1) 
        sq = K.dot(x, self.sb)
        r0 = K.dot(sq, self.q0)  # (b, k, 1)
        r0 = K.squeeze(r0, -1)
        r0 = K.softmax(r0)  # (b, k, 1)
        r0 = K.batch_dot(r0, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        return K.dot(r0, self.sb)  # (b, n)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.squeeze_dimension)


class SqueezeExpandAttention(Layer):
    def __init__(self, squeeze_dimension, **kwargs):
        super(SqueezeExpandAttention, self).__init__(**kwargs)
        self.squeeze_dimension = squeeze_dimension

    def build(self, input_shape):
        self.q0 = self.add_weight(name='q0',
                                  shape=(self.squeeze_dimension, 1),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.W = self.add_weight(name='W',
                                 shape=(self.squeeze_dimension, self.squeeze_dimension),
                                 initializer='glorot_normal',
                                 trainable=True,
                                 regularizer=l2())
        self.b = self.add_weight(name='b',
                                 shape=(self.squeeze_dimension,),
                                 initializer='zeros',
                                 trainable=True)
        self.sb = self.add_weight(name='squeeze_block',
                                  shape=(input_shape[2], self.squeeze_dimension),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.eb = self.add_weight(name='expand_block',
                                  shape=(self.squeeze_dimension, input_shape[2]),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        super(SqueezeExpandAttention, self).build(input_shape)

    def call(self, x):
    # (b, k, n) x (n, 1)
        sq = K.dot(x, self.sb)
        r0 = K.dot(sq, self.q0)  # (b, k, 1)
        r0 = K.squeeze(r0, -1)
        r0 = K.softmax(r0)  # (b, k, 1)
        r0 = K.batch_dot(r0, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)
        r0 = K.dot(r0, self.sb) # (b, s)

        q1 = K.dot(r0, self.W)  # (b, s) x (s, s) = (b, s)
        q1 = q1 + self.b  # (b,s)+(n,)=(b, s)
        q1 = K.tanh(q1)  # (b, s)
        q1 = K.dot(q1, self.eb) #  (b, n)

        r1 = K.batch_dot(q1, x, axes=(1, 2))  # (b, n) x (b, k, n) = (b, k)
        r1 = K.softmax(r1)  # (b, k)
        r1 = K.batch_dot(r1, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        return r1  # (b, n)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2])


class SqueezeExpandSqueezeAttention(Layer):
    def __init__(self, squeeze_dimension, **kwargs):
        super(SqueezeExpandSqueezeAttention, self).__init__(**kwargs)
        self.squeeze_dimension = squeeze_dimension

    def build(self, input_shape):
        self.q0 = self.add_weight(name='q0',
                                  shape=(self.squeeze_dimension, 1),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.W = self.add_weight(name='W',
                                 shape=(self.squeeze_dimension, self.squeeze_dimension),
                                 initializer='glorot_normal',
                                 trainable=True,
                                 regularizer=l2())
        self.b = self.add_weight(name='b',
                                 shape=(self.squeeze_dimension,),
                                 initializer='zeros',
                                 trainable=True)
        self.sb = self.add_weight(name='squeeze_block',
                                  shape=(input_shape[2], self.squeeze_dimension),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.eb = self.add_weight(name='expand_block',
                                  shape=(self.squeeze_dimension, input_shape[2]),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        super(SqueezeExpandSqueezeAttention, self).build(input_shape)

    def call(self, x):
    # (b, k, n) x (n, 1)
        sq = K.dot(x, self.sb)
        r0 = K.dot(sq, self.q0)  # (b, k, 1)
        r0 = K.squeeze(r0, -1)
        r0 = K.softmax(r0)  # (b, k, 1)
        r0 = K.batch_dot(r0, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)
        r0 = K.dot(r0, self.sb) # (b, s)

        q1 = K.dot(r0, self.W)  # (b, s) x (s, s) = (b, s)
        q1 = q1 + self.b  # (b,s)+(n,)=(b, s)
        q1 = K.tanh(q1)  # (b, s)
        q1 = K.dot(q1, self.eb) #  (b, n)

        r1 = K.batch_dot(q1, x, axes=(1, 2))  # (b, n) x (b, k, n) = (b, k)
        r1 = K.softmax(r1)  # (b, k)
        r1 = K.batch_dot(r1, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        return K.dot(r1, self.sb)  # (b, s)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.squeeze_dimension)


class AttentionBlock(Layer):

    def __init__(self, **kwargs):
        super(AttentionBlock, self).__init__(**kwargs)

    def build(self, input_shape):
        self.q0 = self.add_weight(name='q0',
                                  shape=(input_shape[2], 1),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())
        self.W = self.add_weight(name='W',
                                 shape=(input_shape[2], input_shape[2]),
                                 initializer='glorot_normal',
                                 trainable=True,
                                 regularizer=l2())
        self.b = self.add_weight(name='b',
                                 shape=(input_shape[2],),
                                 initializer='zeros',
                                 trainable=True)

        super(AttentionBlock, self).build(input_shape)

    def call(self, x):
        # (b, k, n) x (n, 1)
        r0 = K.dot(x, self.q0)  # (b, k, 1)
        r0 = K.squeeze(r0, -1)
        r0 = K.softmax(r0)  # (b, k, 1)
        r0 = K.batch_dot(r0, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        q1 = K.dot(r0, self.W)  # (b, n) x (n, n) = (b, n)
        q1 = q1 + self.b  # (b,n)+(n,)=(b, n)
        q1 = K.tanh(q1)  # (b, n)

        r1 = K.batch_dot(q1, x, axes=(1, 2))  # (b, n) x (b, k, n) = (b, k)
        r1 = K.softmax(r1)  # (b, k)
        r1 = K.batch_dot(r1, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        return r1  # (b, n)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2])

class AttentionBlockShort(Layer):

    def __init__(self, **kwargs):
        super(AttentionBlockShort, self).__init__(**kwargs)

    def build(self, input_shape):
        self.q0 = self.add_weight(name='q0',
                                  shape=(input_shape[2], 1),
                                  initializer='glorot_normal',
                                  trainable=True,
                                  regularizer=l2())

        super(AttentionBlockShort, self).build(input_shape)

    def call(self, x):
        # (b, k, n) x (n, 1)
        r0 = K.dot(x, self.q0)  # (b, k, 1)
        r0 = K.squeeze(r0, -1)
        r0 = K.softmax(r0)  # (b, k, 1)
        r0 = K.batch_dot(r0, x, axes=(1, 1))  # (b, k) x (b, k, n) = (b, n)

        return r0  # (b, n)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2])

In [3]:
def get_inputs():
    train_test_split
    return np.load("ag_train_x.npy"), np.load('test_agg_x.npy'), np.load("ag_train_y.npy"), np.load('test_agg_y.npy')


def train(model, x_train, y_train, x_test, y_test, model_save_path):
    fpr_metrics = FPRMetrics(x_test, y_test)
    # at_k_metrics = RecallPrecisionAtK(x_test, y_test, 10)
    save_callback = ModelCheckpoint(
        model_save_path,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        period=1)
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                              patience=4, min_lr=0.000001, min_delta=0.01)
    
    model.fit(
        x=x_train,
        y=y_train,
        batch_size=BATCH_SIZE,
        validation_data=(x_test, y_test),
        shuffle=True,
        epochs=EPOCHS,
        callbacks=[save_callback, fpr_metrics, reduce_lr])

input_length = 8
    
def build_model_with_AB():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    x = AttentionBlock()(inputs)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model


def build_model_with_AB_CG():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = AttentionBlock()(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_ABS():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = AttentionBlockShort()(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    # x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    #x = Dropout(0.5)(x)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_ABS_FC():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = AttentionBlockShort()(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_SEA():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeExpandAttention(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    # x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_SEA_FC():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeExpandAttention(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model
    
def build_model_with_SEAS_FC():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeExpandSqueezeAttention(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_SEA_FC_NOCG():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeExpandAttention(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    # x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_SAS():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeAttentionShort(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    # x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    #x = Dropout(0.5)(x)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_SASS():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeAttentionShortSqueeze(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    # x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

def build_model_with_SASS_FC():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeAttentionShort(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model
    
def build_model_with_SASS_FC_NOCG():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))  # (batch, samples, features)
    # read feature vectors from pretrained CNN (mobilenet/squeezenet)
    x = SqueezeAttentionShort(squeeze_dimension=SQUEEZE_DIMENSION)(inputs)  # (batch, features)
    # x = ContextGating()(x)  # (batch, features)
    #x = Dropout(0.5)(x)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model


def build_baseline_model():
    inputs = Input(shape=(input_length, FEATURE_VECTOR_DIM))
    x = Baseline()(inputs)
    x = Dense(units=FC_LAYER_SIZE, activation='relu')(x)  # (batch, units)
    predictions = Dense(NUM_OF_CLASSES, activation='sigmoid')(x)  # (batch, classes)
    model = Model(inputs=inputs, outputs=predictions)
    return model

In [4]:
FEATURE_VECTOR_DIM = 1024
POS_WEIGHT = 8# multiplier for positive targets, needs to be tuned
FC_LAYER_SIZE = 256
NUM_OF_CLASSES = 75
SQUEEZE_DIMENSION = 256
BATCH_SIZE = 64
EPOCHS = 100
x_train, x_test, y_train, y_test = get_inputs()

In [10]:
models = {}

models['Baseline.pb'] = build_baseline_model
#model_save_path = 'res/AggregationModels/baseline.h5'

models['AB_CG.pb'] = build_model_with_AB_CG
#model = build_model_with_AB_CG()
#model_save_path = 'res/res_512/AB_CG.h5'
models['ABS.pb'] = build_model_with_ABS
#model = build_model_with_ABS()
#model_save_path = 'res/res_512/ABS.h5'
models['SEA.pb'] = build_model_with_SEA
#model = build_model_with_SEA()
#model_save_path = 'res/res_512/SEA.h5'
models['SAS.pb'] = build_model_with_SAS
#model = build_model_with_SAS()
#model_save_path = 'res/res_512/SAS.h5'
models['SASS.pb'] = build_model_with_SASS
#model = build_model_with_SASS()
#model_save_path = 'res/res_512/SASS.h5'
models['ABS_FC.pb'] = build_model_with_ABS_FC
#model = build_model_with_ABS_FC()
#model_save_path = 'res/res_512/ABS_FC.h5'
models['SEA_FC.pb'] = build_model_with_SEA_FC
#model = build_model_with_SEA_FC()
#model_save_path = 'res/res_512/SEA_FC.h5'
models['SEAS_FC.pb'] = build_model_with_SEAS_FC
#model = build_model_with_SEAS_FC()
#model_save_path = 'res/res_512/SEAS_FC.h5'
models['SASS_FC_NOCG.pb'] = build_model_with_SASS_FC_NOCG
#model = build_model_with_SASS_FC_NOCG()
#model_save_path = 'res/res_512/SASS_FC_NOCG.h5'
models['SEA_FC_NOCG.pb'] = build_model_with_SEA_FC_NOCG
#model = build_model_with_SEA_FC_NOCG()
#model_save_path = 'res/res_512/SEA_FC_NOCG.h5'
models['SASS_FC.pb'] = build_model_with_SASS_FC
#model = build_model_with_SASS_FC()
#model_save_path = 'res/res_512/SASS_FC.h5'

for save_path, model_fn in models.items():
    K.clear_session()
    model = model_fn()
    #model.compile(optimizer=Adam(lr=0.0001),
                  #loss=weighted_binary_crossentropy,
                  #metrics=['categorical_accuracy'])
    print('============', save_path, '================')
    print(model.summary())
    frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.outputs])
    tf.train.write_graph(frozen_graph, "res/small", save_path, as_text=False)
    #train(model, x_train, y_train, x_test, y_test, save_path)

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 8, 1024)           0         
_________________________________________________________________
squeeze_attention_short_1 (S (None, 1024)              262400    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               262400    
_________________________________________________________________
dense_2 (Dense)              (None, 75)                19275     
Total params: 544,075
Trainable params: 544,075
Non-trainable params: 0
_________________________________________________________________
None
INFO:tensorflow:Froze 6 variables.
INFO:tensorflow:Converted 6 variables to const ops.
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (I

INFO:tensorflow:Converted 11 variables to const ops.
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 8, 1024)           0         
_________________________________________________________________
squeeze_expand_attention_1 ( (None, 1024)              590336    
_________________________________________________________________
context_gating_1 (ContextGat (None, 1024)              1049600   
_________________________________________________________________
dense_1 (Dense)              (None, 75)                76875     
Total params: 1,716,811
Trainable params: 1,716,811
Non-trainable params: 0
_________________________________________________________________
None
INFO:tensorflow:Froze 9 variables.
INFO:tensorflow:Converted 9 variables to const ops.
Model: "model_1"
_________________________________________________________________
Layer (type)       

In [7]:
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a pruned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    pruned so subgraphs that are not necessary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = tf.graph_util.convert_variables_to_constants(
            session, input_graph_def, output_names, freeze_var_names)
        
        return frozen_graph

In [14]:
!tflite_convert \
    --output_file=res/TFLITE/baeline.tflite \
    --graph_def_file=res/TFLITE/baseline.pb \
    --input_shapes=1,8,1024 \
    --input_arrays=input_1 \
    --output_arrays=dense_2/Sigmoid \
    

INFO:tensorflow:Froze 21 variables.
INFO:tensorflow:Converted 21 variables to const ops.
2019-04-24 17:26:20.494452: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2019-04-24 17:26:20.607407: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6325
pciBusID: 0000:0a:00.0
totalMemory: 10.92GiB freeMemory: 362.50MiB
2019-04-24 17:26:20.607448: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
2019-04-24 17:26:20.851307: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-04-24 17:26:20.851361: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 
2019-04-24 17:26:20.851367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N 
2019-04-24 17:26:20.851533: I t