## FM with Tensorflow

[Factorization Machines with Tensorflow](http://nowave.it/factorization-machines-with-tensorflow.html) 这篇 Blog 写的真的很赞。


In [40]:
import numpy as np
# Example dummy data from Rendle 2010 
# http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
# Stolen from https://github.com/coreylynch/pyFM
# Categorical variables (Users, Movies, Last Rated) have been one-hot-encoded 
x_data = np.matrix([
#    Users  |     Movies     |    Movie Ratings   | Time | Last Movies Rated
#   A  B  C | TI  NH  SW  ST | TI   NH   SW   ST  |      | TI  NH  SW  ST
    [1, 0, 0,  1,  0,  0,  0,   0.3, 0.3, 0.3, 0,     13,   0,  0,  0,  0 ],
    [1, 0, 0,  0,  1,  0,  0,   0.3, 0.3, 0.3, 0,     14,   1,  0,  0,  0 ],
    [1, 0, 0,  0,  0,  1,  0,   0.3, 0.3, 0.3, 0,     16,   0,  1,  0,  0 ],
    [0, 1, 0,  0,  0,  1,  0,   0,   0,   0.5, 0.5,   5,    0,  0,  0,  0 ],
    [0, 1, 0,  0,  0,  0,  1,   0,   0,   0.5, 0.5,   8,    0,  0,  1,  0 ],
    [0, 0, 1,  1,  0,  0,  0,   0.5, 0,   0.5, 0,     9,    0,  0,  0,  0 ],
    [0, 0, 1,  0,  0,  1,  0,   0.5, 0,   0.5, 0,     12,   1,  0,  0,  0 ]
])
# ratings
y_data = np.array([5, 3, 1, 4, 5, 1, 5])

# Let's add an axis to make tensoflow happy.
y_data.shape += (1, )

In [41]:
# import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
print(tf.__version__)

2.0.0


## Placeholders

Placeholders 的作用是用来作为占位符，一般用来作为数据的占位符。然后在开启一个 Session 运行时，再将数据填充上。

In [42]:
# number of latent factors
k = 5

n, p = x_data.shape

# design matrix
X = tf.placeholder('float', shape=[n, p])

# target vector
y = tf.placeholder('float', shape=[n, 1])

# bias and weights
w0 = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.zeros([p]))

# interaction factors, random initialiazed
V = tf.Variable(tf.random_normal([k, p], stddev=0.01))

# estimate of y, initialize to 0.
y_hat = tf.Variable(tf.zeros([n, 1]))

### linear_terms

In [43]:
linear_terms = tf.add(
                    w0, 
                    tf.reduce_sum(tf.multiply(W, X), 1, keepdims=True)
                )

### interaction terms

In [44]:
interactions = tf.multiply(0.5, 
                    tf.reduce_sum(
                        tf.subtract(
                            tf.pow(tf.matmul(X, tf.transpose(V)), 2), 
                            tf.matmul(tf.pow(X, 2), tf.transpose(tf.pow(V, 2)))
                        ),
                        1, 
                        keepdims=True
                    )
                )

### y_hat

In [45]:
y_hat = tf.add(linear_terms, interactions)

In [46]:
### L2 regularized
lambda_w = tf.constant(0.001, name='lambda_w')
lambda_v = tf.constant(0.001, name='lambda_v')

l2_norm = tf.reduce_sum(
            tf.add(
                tf.multiply(lambda_w, tf.pow(W, 2)),
                tf.multiply(lambda_v, tf.pow(V, 2))
            )
        )

error = tf.reduce_mean(tf.square(tf.subtract(y, y_hat)))
loss = tf.add(error, l2_norm)

### train with optimizer


In [47]:
eta = tf.constant(0.1)
optimizer = tf.train.AdagradOptimizer(eta).minimize(loss)

In [49]:
N_EPOCHS = 1000

# Launch the graph
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(N_EPOCHS):
        indices = np.arange(n)
        np.random.shuffle(indices)
        x_train, y_train = x_data[indices], y_data[indices]
        feed_dict = {X: x_train, y: y_train}
        sess.run(optimizer, feed_dict=feed_dict)
    
    feed_dict = {X: x_data, y: y_data}
    print('MSE: ', sess.run(error, feed_dict=feed_dict))
    print('Loss (regularized error):', sess.run(loss, feed_dict=feed_dict))
    print('Predictions:', sess.run(y_hat, feed_dict=feed_dict))
    print('Learnt weights:', sess.run(W, feed_dict=feed_dict))
    print('Learnt facors:', sess.run(V, feed_dict=feed_dict))

MSE:  5.4072837e-07
Loss (regularized error): 0.003516308
Predictions: [[4.999163 ]
 [3.0002472]
 [1.0005132]
 [3.9988155]
 [4.9998255]
 [1.0010649]
 [4.999561 ]]
Learnt weights: [ 0.12459119  0.19839069 -0.0933651  -0.00271082 -0.06172945  0.20701723
  0.0896178   0.00170326  0.07182097  0.08769058  0.15037209  0.10752896
  0.17412657 -0.19192496  0.0896178   0.        ]
Learnt facors: [[-1.12008616e-01 -2.87603408e-01  1.11922644e-01 -1.44394144e-01
   2.32248694e-01 -2.18376473e-01 -6.92940727e-02 -4.15536994e-03
  -6.79169819e-02 -8.92268568e-02 -2.53333658e-01 -2.24281833e-01
  -2.52054662e-01  3.92731220e-01 -9.44441929e-02 -4.12356574e-03]
 [-1.05130754e-01 -2.67685205e-01  1.20438792e-01 -1.05224602e-01
   2.45371193e-01 -2.09761217e-01 -5.18973842e-02 -7.07101449e-03
  -3.56599502e-02 -5.34529313e-02 -2.22231746e-01 -2.17157274e-01
  -2.50922233e-01  3.89144957e-01 -5.07472269e-02 -1.87486049e-03]
 [ 1.45810679e-01 -6.13388326e-03 -2.56886601e-01  9.30850953e-02
  -4.28107023e

### References

1. [Factorization Machines with Tensorflow](http://nowave.it/factorization-machines-with-tensorflow.html)