# FM

Eileen Zhang 2020/8/20

![FM](../data/fm.png)

**注: 
每个feature 都有自己的 len. 比如, 性别: 男,女,保密, len 为 3. 年龄: 儿童,青年,中年,老年, len 为4
为了避免做多个embeding,当把它们用一个embeding表示时,通常需要使用np.cumsum 给原本的数据加上offsets
**

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
from tensorflow.keras.layers import Layer,Dense,Embedding

In [3]:
class FactorizationMachine(Layer):
    """
    implementation of Factorization Machine.

    Reference:
        S Rendle, Factorization Machines, 2010.
    """

    def __init__(self, reduce_sum=True):
        super().__init__()
        self.reduce_sum = reduce_sum
        
    def build(self, input_shape):    
        super().build(input_shape)
        
    def call(self, x):
        square_of_sum = tf.reduce_sum(x, 1) ** 2
        sum_of_square = tf.reduce_sum(x ** 2, 1)
        ix = square_of_sum - sum_of_square
        if self.reduce_sum:
            ix = tf.reduce_sum(ix, 1, keepdims=True)
        return 0.5 * ix

In [9]:
class FeaturesEmbedding(Layer):

    def __init__(self, field_dims, embed_dim):
        super().__init__()
        self.embedding = Embedding(np.sum(np.sum(field_dims)), embed_dim)
        self.offsets = tf.constant(np.expand_dims(np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.float32),0))
    
    def build(self, input_shape):    
        super().build(input_shape)
        
    def call(self, x):
        """
        :param x: [11,2,3] Long tensor of size ``(batch_size, num_fields)``
        """
        x = x + self.offsets
        return self.embedding(x)

In [69]:
class FactorizationMachineModel(keras.models.Model):
    """
    implementation of Factorization Machine.

    Reference:
        S Rendle, Factorization Machines, 2010.
    """

    def __init__(self, field_dims, embed_dim, units = 20):
        super().__init__()
        self.units = units
        self.embed_dim = embed_dim
        self.embedding = FeaturesEmbedding(field_dims, embed_dim)
        self.fm = FactorizationMachine(reduce_sum=True)
        self.dense = Dense(units)
        self.fc = Dense(1)
        
    def call(self, x):
        """
        :param x: Long tensor of size ``(batch_size, num_fields)``
        """
        self.bias = tf.Variable(np.random.normal(size = (x.shape[0],self.embed_dim)),dtype = tf.float32, name = 'bias', trainable=True)
        x = self.embedding(x)
        x = tf.reduce_sum(x, 1) + self.fm(x) + self.bias
        x = self.dense(x)
        x = self.fc(x)
        return x

In [70]:
model = FactorizationMachineModel([10,20,10,10],3)

In [17]:
test = tf.constant([[1.,2.,3.,4.],[5.,6.,7.,8.]])

In [71]:
model(test)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.46446854],
       [-0.5425227 ]], dtype=float32)>

In [72]:
model.summary()

Model: "factorization_machine_model_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
features_embedding_15 (Featu multiple                  150       
_________________________________________________________________
factorization_machine_15 (Fa multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  80        
_________________________________________________________________
dense_1 (Dense)              multiple                  21        
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________
