In [1]:
import os
import sys
import pytest
import inspect
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf

from copy import copy
from itertools import chain
from numpy.testing import assert_allclose
from tensorflow.python.layers import utils
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.backend import batch_dot
from tensorflow.python.estimator.canned.head import _Head
from collections import namedtuple, OrderedDict, defaultdict
from tensorflow.python.keras.models import Model, load_model, save_model
from tensorflow.python.estimator.canned.optimizers import get_optimizer_instance
from tensorflow.python.keras.layers import Input, Masking, Flatten, Layer, Embedding
from tensorflow.python.keras.initializers import RandomNormal,Zeros, glorot_normal,glorot_uniform, TruncatedNormal

In [2]:
class PredictionLayer(Layer):

    def __init__(self, task='binary', use_bias=True, **kwargs):
        if task not in ["binary", "multiclass", "regression"]:
            raise ValueError("task must be binary,multiclass or regression")
        self.task = task
        self.use_bias = use_bias
        super(PredictionLayer, self).__init__(**kwargs)

    def build(self, input_shape):

        if self.use_bias:
            self.global_bias = self.add_weight(
                shape=(1,), initializer=Zeros(), name="global_bias")

        # Be sure to call this somewhere!
        super(PredictionLayer, self).build(input_shape)

    def call(self, inputs, **kwargs):
        x = inputs
        if self.use_bias:
            x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC')
        if self.task == "binary":
            x = tf.sigmoid(x)

        output = tf.reshape(x, (-1, 1))

        return output

    def compute_output_shape(self, input_shape):
        return (None, 1)

    def get_config(self, ):
        config = {'task': self.task, 'use_bias': self.use_bias}
        base_config = super(PredictionLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [3]:
def activation_layer(activation):
    if activation in ("dice", "Dice"):
        act_layer = Dice()
    elif isinstance(activation, (str, unicode)):
        act_layer = tf.keras.layers.Activation(activation)
    elif issubclass(activation, Layer):
        act_layer = activation()
    else:
        raise ValueError(
            "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation))
    return act_layer

In [4]:
class DNN(Layer):
    """The Multi Layer Percetron
      Input shape
        - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``.
      Output shape
        - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``.
      Arguments
        - **hidden_units**:list of positive integer, the layer number and units in each layer.
        - **activation**: Activation function to use.
        - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix.
        - **dropout_rate**: float in [0,1). Fraction of the units to dropout.
        - **use_bn**: bool. Whether use BatchNormalization before activation or not.
        - **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``.
        - **seed**: A Python integer to use as random seed.
    """

    def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None,
                 seed=1024, **kwargs):
        self.hidden_units = hidden_units
        self.activation = activation
        self.l2_reg = l2_reg
        self.dropout_rate = dropout_rate
        self.use_bn = use_bn
        self.output_activation = output_activation
        self.seed = seed

        super(DNN, self).__init__(**kwargs)

    def build(self, input_shape):
        # if len(self.hidden_units) == 0:
        #     raise ValueError("hidden_units is empty")
        input_size = input_shape[-1]
        hidden_units = [int(input_size)] + list(self.hidden_units)
        self.kernels = [self.add_weight(name='kernel' + str(i),
                                        shape=(
                                            hidden_units[i], hidden_units[i + 1]),
                                        initializer=glorot_normal(
                                            seed=self.seed),
                                        regularizer=l2(self.l2_reg),
                                        trainable=True) for i in range(len(self.hidden_units))]
        self.bias = [self.add_weight(name='bias' + str(i),
                                     shape=(self.hidden_units[i],),
                                     initializer=Zeros(),
                                     trainable=True) for i in range(len(self.hidden_units))]
        if self.use_bn:
            self.bn_layers = [tf.keras.layers.BatchNormalization() for _ in range(len(self.hidden_units))]

        self.dropout_layers = [tf.keras.layers.Dropout(self.dropout_rate, seed=self.seed + i) for i in
                               range(len(self.hidden_units))]

        self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))]

        if self.output_activation:
            self.activation_layers[-1] = activation_layer(self.output_activation)

        super(DNN, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, inputs, training=None, **kwargs):

        deep_input = inputs

        for i in range(len(self.hidden_units)):
            fc = tf.nn.bias_add(tf.tensordot(
                deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i])

            if self.use_bn:
                fc = self.bn_layers[i](fc, training=training)

            fc = self.activation_layers[i](fc)

            fc = self.dropout_layers[i](fc, training=training)
            deep_input = fc

        return deep_input

    def compute_output_shape(self, input_shape):
        if len(self.hidden_units) > 0:
            shape = input_shape[:-1] + (self.hidden_units[-1],)
        else:
            shape = input_shape

        return tuple(shape)

    def get_config(self, ):
        config = {'activation': self.activation, 'hidden_units': self.hidden_units,
                  'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate,
                  'output_activation': self.output_activation, 'seed': self.seed}
        base_config = super(DNN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


In [5]:
def combined_dnn_input(sparse_embedding_list, dense_value_list):
    if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
        sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list))
        dense_dnn_input = Flatten()(concat_func(dense_value_list))
        return concat_func([sparse_dnn_input, dense_dnn_input])
    elif len(sparse_embedding_list) > 0:
        return Flatten()(concat_func(sparse_embedding_list))
    elif len(dense_value_list) > 0:
        return Flatten()(concat_func(dense_value_list))
    else:
        raise NotImplementedError("dnn_feature_columns can not be empty list")

In [6]:
class BiInteractionPooling(Layer):
    """Bi-Interaction Layer used in Neural FM,compress the
     pairwise element-wise product of features into one single vector.
      Input shape
        - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``.
      Output shape
        - 3D tensor with shape: ``(batch_size,1,embedding_size)``.
      References
        - [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027)
    """

    def __init__(self, **kwargs):

        super(BiInteractionPooling, self).__init__(**kwargs)

    def build(self, input_shape):

        if len(input_shape) != 3:
            raise ValueError(
                "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))

        super(BiInteractionPooling, self).build(
            input_shape)  # Be sure to call this somewhere!

    def call(self, inputs, **kwargs):

        if K.ndim(inputs) != 3:
            raise ValueError(
                "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))

        concated_embeds_value = inputs
        square_of_sum = tf.square(reduce_sum(
            concated_embeds_value, axis=1, keep_dims=True))
        sum_of_square = reduce_sum(
            concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True)
        cross_term = 0.5 * (square_of_sum - sum_of_square)

        return cross_term

    def compute_output_shape(self, input_shape):
        return (None, 1, input_shape[-1])

In [7]:
def concat_func(inputs, axis=-1, mask=False):
    if not mask:
        inputs = list(map(NoMask(), inputs))
    if len(inputs) == 1:
        return inputs[0]
    else:
        return tf.keras.layers.Concatenate(axis=axis)(inputs)

def add_func(inputs):
    return Add()(inputs)

def reduce_sum(input_tensor,
               axis=None,
               keep_dims=False,
               name=None,
               reduction_indices=None):
    try:
        return tf.reduce_sum(input_tensor,
                             axis=axis,
                             keep_dims=keep_dims,
                             name=name,
                             reduction_indices=reduction_indices)
    except TypeError:
        return tf.reduce_sum(input_tensor,
                             axis=axis,
                             keepdims=keep_dims,
                             name=name)

In [8]:
def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True):
    #from . import feature_column as fc_lib

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []
    sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed,
                                            l2_reg, prefix=prefix + 'sparse', seq_mask_zero=seq_mask_zero)
    return sparse_emb_dict


In [9]:
def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg,
                          prefix='sparse_', seq_mask_zero=True):
    sparse_embedding = {}
    for feat in sparse_feature_columns:
        emb = Embedding(feat.vocabulary_size, feat.embedding_dim,
                        embeddings_initializer=feat.embeddings_initializer,
                        embeddings_regularizer=l2(l2_reg),
                        name=prefix + '_emb_' + feat.embedding_name)
        emb.trainable = feat.trainable
        sparse_embedding[feat.embedding_name] = emb

    if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0:
        for feat in varlen_sparse_feature_columns:
            # if feat.name not in sparse_embedding:
            emb = Embedding(feat.vocabulary_size, feat.embedding_dim,
                            embeddings_initializer=feat.embeddings_initializer,
                            embeddings_regularizer=l2(
                                l2_reg),
                            name=prefix + '_seq_emb_' + feat.name,
                            mask_zero=seq_mask_zero)
            emb.trainable = feat.trainable
            sparse_embedding[feat.embedding_name] = emb
    return sparse_embedding

In [10]:
def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True,
                               support_dense=True, support_group=False):
    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []

    embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix,
                                                    seq_mask_zero=seq_mask_zero)
    group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns)
    dense_value_list = get_dense_input(features, feature_columns)
    if not support_dense and len(dense_value_list) > 0:
        raise ValueError("DenseFeat is not supported in dnn_feature_columns")

    sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns)
    group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features,
                                                                 varlen_sparse_feature_columns)
    group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict)
    if not support_group:
        group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values()))
    return group_embedding_dict, dense_value_list

In [11]:
def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(),
                     mask_feat_list=(), to_list=False):
    group_embedding_dict = defaultdict(list)
    for fc in sparse_feature_columns:
        feature_name = fc.name
        embedding_name = fc.embedding_name
        if (len(return_feat_list) == 0 or feature_name in return_feat_list):
            if fc.use_hash:
                lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list))(
                    sparse_input_dict[feature_name])
            else:
                lookup_idx = sparse_input_dict[feature_name]

            group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx))
    if to_list:
        return list(chain.from_iterable(group_embedding_dict.values()))
    return group_embedding_dict


In [12]:
class VarLenSparseFeat(namedtuple('VarLenSparseFeat',
                                  ['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])):
    __slots__ = ()

    def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True):
        return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name,
                                                    weight_norm)

    @property
    def name(self):
        return self.sparsefeat.name

    @property
    def vocabulary_size(self):
        return self.sparsefeat.vocabulary_size

    @property
    def embedding_dim(self):
        return self.sparsefeat.embedding_dim

    @property
    def use_hash(self):
        return self.sparsefeat.use_hash

    @property
    def dtype(self):
        return self.sparsefeat.dtype

    @property
    def embeddings_initializer(self):
        return self.sparsefeat.embeddings_initializer

    @property
    def embedding_name(self):
        return self.sparsefeat.embedding_name

    @property
    def group_name(self):
        return self.sparsefeat.group_name

    @property
    def trainable(self):
        return self.sparsefeat.trainable

    def __hash__(self):
        return self.name.__hash__()

In [13]:
class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype'])):
    __slots__ = ()

    def __new__(cls, name, dimension=1, dtype="float32"):
        return super(DenseFeat, cls).__new__(cls, name, dimension, dtype)

    def __hash__(self):
        return self.name.__hash__()


In [14]:
DEFAULT_GROUP_NAME = "default_group"
class SparseFeat(namedtuple('SparseFeat',
                            ['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'dtype', 'embeddings_initializer',
                             'embedding_name',
                             'group_name', 'trainable'])):
    __slots__ = ()

    def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, dtype="int32", embeddings_initializer=None,
                embedding_name=None,
                group_name=DEFAULT_GROUP_NAME, trainable=True):

        if embedding_dim == "auto":
            embedding_dim = 6 * int(pow(vocabulary_size, 0.25))
        if embeddings_initializer is None:
            embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020)

        if embedding_name is None:
            embedding_name = name

        return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, dtype,
                                              embeddings_initializer,
                                              embedding_name, group_name, trainable)

    def __hash__(self):
        return self.name.__hash__()


In [15]:
def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear',
                     l2_reg=0):
    linear_feature_columns = copy(feature_columns)
    for i in range(len(linear_feature_columns)):
        if isinstance(linear_feature_columns[i], SparseFeat):
            linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1,
                                                                           embeddings_initializer=Zeros())
        if isinstance(linear_feature_columns[i], VarLenSparseFeat):
            linear_feature_columns[i] = linear_feature_columns[i]._replace(
                sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1,
                                                                         embeddings_initializer=Zeros()))

    linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed,
                                                  prefix=prefix + str(i))[0] for i in range(units)]
    _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix)

    linear_logit_list = []
    for i in range(units):

        if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0:
            sparse_input = concat_func(linear_emb_list[i])
            dense_input = concat_func(dense_input_list)
            linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input])
        elif len(linear_emb_list[i]) > 0:
            sparse_input = concat_func(linear_emb_list[i])
            linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
        elif len(dense_input_list) > 0:
            dense_input = concat_func(dense_input_list)
            linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input)
        else:
            # raise NotImplementedError
            return add_func([])
        linear_logit_list.append(linear_logit)

    return concat_func(linear_logit_list)


In [16]:
def build_input_features(feature_columns, prefix=''):
    input_features = OrderedDict()
    for fc in feature_columns:
        if isinstance(fc, SparseFeat):
            input_features[fc.name] = Input(
                shape=(1,), name=prefix + fc.name, dtype=fc.dtype)
        elif isinstance(fc, DenseFeat):
            input_features[fc.name] = Input(
                shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype)
        elif isinstance(fc, VarLenSparseFeat):
            input_features[fc.name] = Input(shape=(fc.maxlen,), name=prefix + fc.name,
                                            dtype=fc.dtype)
            if fc.weight_name is not None:
                input_features[fc.weight_name] = Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name,
                                                       dtype="float32")
            if fc.length_name is not None:
                input_features[fc.length_name] = Input((1,), name=prefix + fc.length_name, dtype='int32')

        else:
            raise TypeError("Invalid feature column type,got", type(fc))

    return input_features

In [17]:
def NFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128),
        l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0,
        dnn_dropout=0, dnn_activation='relu', task='binary'):
    """Instantiates the Neural Factorization Machine architecture.
    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part.
    :param l2_reg_dnn: float . L2 regularizer strength applied to DNN
    :param seed: integer ,to use as random seed.
    :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in deep net
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    features = build_input_features(
        linear_feature_columns + dnn_feature_columns)

    inputs_list = list(features.values())

    linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
                                    l2_reg=l2_reg_linear)

    sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
                                                                         l2_reg_embedding, seed)

    fm_input = concat_func(sparse_embedding_list, axis=1)
    bi_out = BiInteractionPooling()(fm_input)
    if bi_dropout:
        bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None)
    dnn_input = combined_dnn_input([bi_out], dense_value_list)
    dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
    dnn_logit = tf.keras.layers.Dense(
        1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)

    final_logit = add_func([linear_logit, dnn_logit])

    output = PredictionLayer(task)(final_logit)

    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
    return model

In [18]:
def get_feature_names(feature_columns):
    features = build_input_features(feature_columns)
    return list(features.keys())

In [19]:
def get_dense_input(features, feature_columns):
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat), feature_columns)) if feature_columns else []
    dense_input_list = []
    for fc in dense_feature_columns:
        dense_input_list.append(features[fc.name])
    return dense_input_list

In [20]:
def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns):
    varlen_embedding_vec_dict = {}
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        embedding_name = fc.embedding_name
        if fc.use_hash:
            lookup_idx = Hash(fc.vocabulary_size, mask_zero=True)(sequence_input_dict[feature_name])
        else:
            lookup_idx = sequence_input_dict[feature_name]
        varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx)
    return varlen_embedding_vec_dict


In [21]:
def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False):
    pooling_vec_list = defaultdict(list)
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        combiner = fc.combiner
        feature_length_name = fc.length_name
        if feature_length_name is not None:
            if fc.weight_name is not None:
                seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)(
                    [embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]])
            else:
                seq_input = embedding_dict[feature_name]
            vec = SequencePoolingLayer(combiner, supports_masking=False)(
                [seq_input, features[feature_length_name]])
        else:
            if fc.weight_name is not None:
                seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)(
                    [embedding_dict[feature_name], features[fc.weight_name]])
            else:
                seq_input = embedding_dict[feature_name]
            vec = SequencePoolingLayer(combiner, supports_masking=True)(
                seq_input)
        pooling_vec_list[fc.group_name].append(vec)
    if to_list:
        return chain.from_iterable(pooling_vec_list.values())
    return pooling_vec_list

In [22]:
def mergeDict(a, b):
    c = defaultdict(list)
    for k, v in a.items():
        c[k].extend(v)
    for k, v in b.items():
        c[k].extend(v)
    return c

In [23]:
class NoMask(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(NoMask, self).__init__(**kwargs)

    def build(self, input_shape):
        # Be sure to call this somewhere!
        super(NoMask, self).build(input_shape)

    def call(self, x, mask=None, **kwargs):
        return x

    def compute_mask(self, inputs, mask):
        return None


In [24]:
class Linear(tf.keras.layers.Layer):

    def __init__(self, l2_reg=0.0, mode=0, use_bias=False, seed=1024, **kwargs):

        self.l2_reg = l2_reg
        # self.l2_reg = tf.contrib.layers.l2_regularizer(float(l2_reg_linear))
        if mode not in [0, 1, 2]:
            raise ValueError("mode must be 0,1 or 2")
        self.mode = mode
        self.use_bias = use_bias
        self.seed = seed
        super(Linear, self).__init__(**kwargs)

    def build(self, input_shape):
        if self.use_bias:
            self.bias = self.add_weight(name='linear_bias',
                                        shape=(1,),
                                        initializer=tf.keras.initializers.Zeros(),
                                        trainable=True)
        if self.mode == 1:
            self.kernel = self.add_weight(
                'linear_kernel',
                shape=[int(input_shape[-1]), 1],
                initializer=tf.keras.initializers.glorot_normal(self.seed),
                regularizer=tf.keras.regularizers.l2(self.l2_reg),
                trainable=True)
        elif self.mode == 2:
            self.kernel = self.add_weight(
                'linear_kernel',
                shape=[int(input_shape[1][-1]), 1],
                initializer=tf.keras.initializers.glorot_normal(self.seed),
                regularizer=tf.keras.regularizers.l2(self.l2_reg),
                trainable=True)

        super(Linear, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, inputs, **kwargs):
        if self.mode == 0:
            sparse_input = inputs
            linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=True)
        elif self.mode == 1:
            dense_input = inputs
            fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0))
            linear_logit = fc
        else:
            sparse_input, dense_input = inputs
            fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0))
            linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=False) + fc
        if self.use_bias:
            linear_logit += self.bias

        return linear_logit

    def compute_output_shape(self, input_shape):
        return (None, 1)

    def compute_mask(self, inputs, mask):
        return None

    def get_config(self, ):
        config = {'mode': self.mode, 'l2_reg': self.l2_reg, 'use_bias': self.use_bias, 'seed': self.seed}
        base_config = super(Linear, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [25]:
try:
    unicode
except NameError:
    unicode = str

In [26]:
class Add(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Add, self).__init__(**kwargs)

    def build(self, input_shape):
        # Be sure to call this somewhere!
        super(Add, self).build(input_shape)

    def call(self, inputs, **kwargs):
        if not isinstance(inputs, list):
            return inputs
        if len(inputs) == 1:
            return inputs[0]
        if len(inputs) == 0:
            return tf.constant([[0.0]])

        return tf.keras.layers.add(inputs)

In [27]:
def rmse(y_true, y_pred):
    return tf.keras.backend.sqrt(tf.keras.backend.mean(tf.keras.backend.square(y_pred - y_true)))

In [28]:
data = pd.read_csv(r"D:\article\NFM\movielens_sample.txt")
sparse_features = ["movie_id", "user_id","gender", "age", "occupation", "zip"]
target = ['rating']

# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4)
                          for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}

# 4.Define Model,train,predict and evaluate
model = NFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile(optimizer="adagrad", loss=rmse, metrics=[rmse], )

#history = 
model.fit(train_model_input, train[target].values,
                    batch_size=256, epochs=700, verbose=2, validation_split=0.2, )

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Epoch 1/700


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


1/1 - 0s - loss: 3.6625 - rmse: 3.6625 - val_loss: 3.9633 - val_rmse: 3.9633
Epoch 2/700
1/1 - 0s - loss: 3.6467 - rmse: 3.6467 - val_loss: 3.9509 - val_rmse: 3.9509
Epoch 3/700
1/1 - 0s - loss: 3.6325 - rmse: 3.6325 - val_loss: 3.9410 - val_rmse: 3.9410
Epoch 4/700
1/1 - 0s - loss: 3.6211 - rmse: 3.6211 - val_loss: 3.9323 - val_rmse: 3.9323
Epoch 5/700
1/1 - 0s - loss: 3.6111 - rmse: 3.6111 - val_loss: 3.9242 - val_rmse: 3.9242
Epoch 6/700
1/1 - 0s - loss: 3.6019 - rmse: 3.6019 - val_loss: 3.9167 - val_rmse: 3.9167
Epoch 7/700
1/1 - 0s - loss: 3.5932 - rmse: 3.5932 - val_loss: 3.9094 - val_rmse: 3.9094
Epoch 8/700
1/1 - 0s - loss: 3.5850 - rmse: 3.5850 - val_loss: 3.9024 - val_rmse: 3.9024
Epoch 9/700
1/1 - 0s - loss: 3.5770 - rmse: 3.5770 - val_loss: 3.8957 - val_rmse: 3.8957
Epoch 10/700
1/1 - 0s - loss: 3.5694 - rmse: 3.5694 - val_loss: 3.8891 - val_rmse: 3.8891
Epoch 11/700
1/1 - 0s - loss: 3.5620 - rmse: 3.5620 - val_loss: 3.8827 - val_rmse: 3.8827
Epoch 12/700
1/1 - 0s - loss: 3

Epoch 93/700
1/1 - 0s - loss: 3.0221 - rmse: 3.0221 - val_loss: 3.4041 - val_rmse: 3.4041
Epoch 94/700
1/1 - 0s - loss: 3.0147 - rmse: 3.0147 - val_loss: 3.3977 - val_rmse: 3.3977
Epoch 95/700
1/1 - 0s - loss: 3.0072 - rmse: 3.0072 - val_loss: 3.3912 - val_rmse: 3.3912
Epoch 96/700
1/1 - 0s - loss: 2.9996 - rmse: 2.9996 - val_loss: 3.3847 - val_rmse: 3.3847
Epoch 97/700
1/1 - 0s - loss: 2.9921 - rmse: 2.9921 - val_loss: 3.3783 - val_rmse: 3.3782
Epoch 98/700
1/1 - 0s - loss: 2.9845 - rmse: 2.9845 - val_loss: 3.3717 - val_rmse: 3.3717
Epoch 99/700
1/1 - 0s - loss: 2.9769 - rmse: 2.9769 - val_loss: 3.3652 - val_rmse: 3.3652
Epoch 100/700
1/1 - 0s - loss: 2.9693 - rmse: 2.9693 - val_loss: 3.3587 - val_rmse: 3.3587
Epoch 101/700
1/1 - 0s - loss: 2.9616 - rmse: 2.9616 - val_loss: 3.3521 - val_rmse: 3.3521
Epoch 102/700
1/1 - 0s - loss: 2.9540 - rmse: 2.9539 - val_loss: 3.3456 - val_rmse: 3.3456
Epoch 103/700
1/1 - 0s - loss: 2.9463 - rmse: 2.9463 - val_loss: 3.3390 - val_rmse: 3.3390
Epoch 

Epoch 184/700
1/1 - 0s - loss: 2.2347 - rmse: 2.2346 - val_loss: 2.7538 - val_rmse: 2.7538
Epoch 185/700
1/1 - 0s - loss: 2.2251 - rmse: 2.2250 - val_loss: 2.7462 - val_rmse: 2.7462
Epoch 186/700
1/1 - 0s - loss: 2.2155 - rmse: 2.2155 - val_loss: 2.7386 - val_rmse: 2.7385
Epoch 187/700
1/1 - 0s - loss: 2.2059 - rmse: 2.2059 - val_loss: 2.7309 - val_rmse: 2.7309
Epoch 188/700
1/1 - 0s - loss: 2.1963 - rmse: 2.1963 - val_loss: 2.7233 - val_rmse: 2.7233
Epoch 189/700
1/1 - 0s - loss: 2.1867 - rmse: 2.1867 - val_loss: 2.7157 - val_rmse: 2.7156
Epoch 190/700
1/1 - 0s - loss: 2.1771 - rmse: 2.1770 - val_loss: 2.7080 - val_rmse: 2.7080
Epoch 191/700
1/1 - 0s - loss: 2.1674 - rmse: 2.1674 - val_loss: 2.7004 - val_rmse: 2.7004
Epoch 192/700
1/1 - 0s - loss: 2.1578 - rmse: 2.1577 - val_loss: 2.6928 - val_rmse: 2.6927
Epoch 193/700
1/1 - 0s - loss: 2.1481 - rmse: 2.1481 - val_loss: 2.6851 - val_rmse: 2.6851
Epoch 194/700
1/1 - 0s - loss: 2.1385 - rmse: 2.1384 - val_loss: 2.6774 - val_rmse: 2.6774

1/1 - 0s - loss: 1.3772 - rmse: 1.3771 - val_loss: 2.0759 - val_rmse: 2.0758
Epoch 275/700
1/1 - 0s - loss: 1.3686 - rmse: 1.3685 - val_loss: 2.0690 - val_rmse: 2.0690
Epoch 276/700
1/1 - 0s - loss: 1.3601 - rmse: 1.3600 - val_loss: 2.0622 - val_rmse: 2.0621
Epoch 277/700
1/1 - 0s - loss: 1.3516 - rmse: 1.3515 - val_loss: 2.0554 - val_rmse: 2.0554
Epoch 278/700
1/1 - 0s - loss: 1.3431 - rmse: 1.3431 - val_loss: 2.0486 - val_rmse: 2.0485
Epoch 279/700
1/1 - 0s - loss: 1.3347 - rmse: 1.3347 - val_loss: 2.0419 - val_rmse: 2.0418
Epoch 280/700
1/1 - 0s - loss: 1.3264 - rmse: 1.3263 - val_loss: 2.0352 - val_rmse: 2.0352
Epoch 281/700
1/1 - 0s - loss: 1.3181 - rmse: 1.3180 - val_loss: 2.0288 - val_rmse: 2.0287
Epoch 282/700
1/1 - 0s - loss: 1.3099 - rmse: 1.3098 - val_loss: 2.0221 - val_rmse: 2.0221
Epoch 283/700
1/1 - 0s - loss: 1.3016 - rmse: 1.3015 - val_loss: 2.0155 - val_rmse: 2.0154
Epoch 284/700
1/1 - 0s - loss: 1.2934 - rmse: 1.2934 - val_loss: 2.0089 - val_rmse: 2.0089
Epoch 285/700

Epoch 365/700
1/1 - 0s - loss: 0.8488 - rmse: 0.8487 - val_loss: 1.6438 - val_rmse: 1.6437
Epoch 366/700
1/1 - 0s - loss: 0.8456 - rmse: 0.8456 - val_loss: 1.6421 - val_rmse: 1.6421
Epoch 367/700
1/1 - 0s - loss: 0.8425 - rmse: 0.8425 - val_loss: 1.6397 - val_rmse: 1.6396
Epoch 368/700
1/1 - 0s - loss: 0.8395 - rmse: 0.8394 - val_loss: 1.6381 - val_rmse: 1.6380
Epoch 369/700
1/1 - 0s - loss: 0.8364 - rmse: 0.8364 - val_loss: 1.6358 - val_rmse: 1.6358
Epoch 370/700
1/1 - 0s - loss: 0.8334 - rmse: 0.8334 - val_loss: 1.6342 - val_rmse: 1.6341
Epoch 371/700
1/1 - 0s - loss: 0.8304 - rmse: 0.8303 - val_loss: 1.6320 - val_rmse: 1.6320
Epoch 372/700
1/1 - 0s - loss: 0.8275 - rmse: 0.8274 - val_loss: 1.6303 - val_rmse: 1.6303
Epoch 373/700
1/1 - 0s - loss: 0.8246 - rmse: 0.8246 - val_loss: 1.6283 - val_rmse: 1.6282
Epoch 374/700
1/1 - 0s - loss: 0.8218 - rmse: 0.8217 - val_loss: 1.6268 - val_rmse: 1.6267
Epoch 375/700
1/1 - 0s - loss: 0.8189 - rmse: 0.8188 - val_loss: 1.6249 - val_rmse: 1.6249

1/1 - 0s - loss: 0.6190 - rmse: 0.6189 - val_loss: 1.6225 - val_rmse: 1.6224
Epoch 456/700
1/1 - 0s - loss: 0.6165 - rmse: 0.6164 - val_loss: 1.6233 - val_rmse: 1.6233
Epoch 457/700
1/1 - 0s - loss: 0.6140 - rmse: 0.6140 - val_loss: 1.6242 - val_rmse: 1.6241
Epoch 458/700
1/1 - 0s - loss: 0.6116 - rmse: 0.6115 - val_loss: 1.6251 - val_rmse: 1.6251
Epoch 459/700
1/1 - 0s - loss: 0.6091 - rmse: 0.6090 - val_loss: 1.6261 - val_rmse: 1.6260
Epoch 460/700
1/1 - 0s - loss: 0.6066 - rmse: 0.6066 - val_loss: 1.6270 - val_rmse: 1.6269
Epoch 461/700
1/1 - 0s - loss: 0.6042 - rmse: 0.6041 - val_loss: 1.6281 - val_rmse: 1.6280
Epoch 462/700
1/1 - 0s - loss: 0.6017 - rmse: 0.6016 - val_loss: 1.6290 - val_rmse: 1.6289
Epoch 463/700
1/1 - 0s - loss: 0.5992 - rmse: 0.5991 - val_loss: 1.6300 - val_rmse: 1.6300
Epoch 464/700
1/1 - 0s - loss: 0.5967 - rmse: 0.5967 - val_loss: 1.6310 - val_rmse: 1.6309
Epoch 465/700
1/1 - 0s - loss: 0.5943 - rmse: 0.5942 - val_loss: 1.6322 - val_rmse: 1.6321
Epoch 466/700

Epoch 546/700
1/1 - 0s - loss: 0.3846 - rmse: 0.3845 - val_loss: 1.7478 - val_rmse: 1.7477
Epoch 547/700
1/1 - 0s - loss: 0.3820 - rmse: 0.3819 - val_loss: 1.7497 - val_rmse: 1.7496
Epoch 548/700
1/1 - 0s - loss: 0.3793 - rmse: 0.3793 - val_loss: 1.7509 - val_rmse: 1.7508
Epoch 549/700
1/1 - 0s - loss: 0.3767 - rmse: 0.3766 - val_loss: 1.7527 - val_rmse: 1.7527
Epoch 550/700
1/1 - 0s - loss: 0.3741 - rmse: 0.3740 - val_loss: 1.7541 - val_rmse: 1.7540
Epoch 551/700
1/1 - 0s - loss: 0.3714 - rmse: 0.3714 - val_loss: 1.7559 - val_rmse: 1.7558
Epoch 552/700
1/1 - 0s - loss: 0.3688 - rmse: 0.3687 - val_loss: 1.7572 - val_rmse: 1.7571
Epoch 553/700
1/1 - 0s - loss: 0.3662 - rmse: 0.3661 - val_loss: 1.7591 - val_rmse: 1.7590
Epoch 554/700
1/1 - 0s - loss: 0.3636 - rmse: 0.3635 - val_loss: 1.7603 - val_rmse: 1.7603
Epoch 555/700
1/1 - 0s - loss: 0.3609 - rmse: 0.3609 - val_loss: 1.7622 - val_rmse: 1.7621
Epoch 556/700
1/1 - 0s - loss: 0.3583 - rmse: 0.3582 - val_loss: 1.7636 - val_rmse: 1.7635

Epoch 637/700
1/1 - 0s - loss: 0.1542 - rmse: 0.1541 - val_loss: 1.8948 - val_rmse: 1.8947
Epoch 638/700
1/1 - 0s - loss: 0.1520 - rmse: 0.1519 - val_loss: 1.8963 - val_rmse: 1.8962
Epoch 639/700
1/1 - 0s - loss: 0.1498 - rmse: 0.1497 - val_loss: 1.8978 - val_rmse: 1.8977
Epoch 640/700
1/1 - 0s - loss: 0.1477 - rmse: 0.1476 - val_loss: 1.8992 - val_rmse: 1.8992
Epoch 641/700
1/1 - 0s - loss: 0.1455 - rmse: 0.1455 - val_loss: 1.9009 - val_rmse: 1.9008
Epoch 642/700
1/1 - 0s - loss: 0.1434 - rmse: 0.1434 - val_loss: 1.9021 - val_rmse: 1.9021
Epoch 643/700
1/1 - 0s - loss: 0.1413 - rmse: 0.1413 - val_loss: 1.9038 - val_rmse: 1.9037
Epoch 644/700
1/1 - 0s - loss: 0.1393 - rmse: 0.1392 - val_loss: 1.9052 - val_rmse: 1.9051
Epoch 645/700
1/1 - 0s - loss: 0.1372 - rmse: 0.1371 - val_loss: 1.9067 - val_rmse: 1.9067
Epoch 646/700
1/1 - 0s - loss: 0.1351 - rmse: 0.1351 - val_loss: 1.9081 - val_rmse: 1.9080
Epoch 647/700
1/1 - 0s - loss: 0.1331 - rmse: 0.1330 - val_loss: 1.9096 - val_rmse: 1.9096

<tensorflow.python.keras.callbacks.History at 0x24b40f0e3c8>

In [29]:
pred_ans = model.predict(test_model_input, batch_size=256)
print("test RMSE", round(mean_squared_error(test[target].values, pred_ans, squared=False), 4))

test RMSE 1.9341


In [30]:
print(pred_ans)

[[2.922055 ]
 [1.8336217]
 [3.271982 ]
 [1.9621656]
 [2.2523553]
 [1.9041772]
 [1.8354471]
 [2.2885141]
 [2.0046692]
 [1.96528  ]
 [2.8707707]
 [2.0193365]
 [2.0747538]
 [2.0968213]
 [2.0429196]
 [2.0178013]
 [2.0215576]
 [2.1037204]
 [2.0603945]
 [2.0603945]
 [2.04973  ]
 [1.8672667]
 [2.0215576]
 [1.9922998]
 [2.1997585]
 [1.8897955]
 [2.3505645]
 [2.340204 ]
 [2.7153497]
 [2.2201698]
 [2.1015005]
 [2.2190554]
 [2.2930489]
 [2.5155478]
 [2.0693169]
 [2.1168897]
 [2.5244236]
 [2.35313  ]
 [3.343246 ]
 [2.3119211]]
