## FM with Tensorflow 1.0

[Factorization Machines with Tensorflow](http://nowave.it/factorization-machines-with-tensorflow.html) 这篇 Blog 写的真的很赞。


In [2]:
import numpy as np
# Example dummy data from Rendle 2010 
# http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
# Stolen from https://github.com/coreylynch/pyFM
# Categorical variables (Users, Movies, Last Rated) have been one-hot-encoded 
x_data = np.matrix([
#    Users  |     Movies     |    Movie Ratings   | Time | Last Movies Rated
#   A  B  C | TI  NH  SW  ST | TI   NH   SW   ST  |      | TI  NH  SW  ST
    [1, 0, 0,  1,  0,  0,  0,   0.3, 0.3, 0.3, 0,     13,   0,  0,  0,  0 ],
    [1, 0, 0,  0,  1,  0,  0,   0.3, 0.3, 0.3, 0,     14,   1,  0,  0,  0 ],
    [1, 0, 0,  0,  0,  1,  0,   0.3, 0.3, 0.3, 0,     16,   0,  1,  0,  0 ],
    [0, 1, 0,  0,  0,  1,  0,   0,   0,   0.5, 0.5,   5,    0,  0,  0,  0 ],
    [0, 1, 0,  0,  0,  0,  1,   0,   0,   0.5, 0.5,   8,    0,  0,  1,  0 ],
    [0, 0, 1,  1,  0,  0,  0,   0.5, 0,   0.5, 0,     9,    0,  0,  0,  0 ],
    [0, 0, 1,  0,  0,  1,  0,   0.5, 0,   0.5, 0,     12,   1,  0,  0,  0 ]
])
# ratings
y_data = np.array([5, 3, 1, 4, 5, 1, 5])

# Let's add an axis to make tensoflow happy.
y_data.shape += (1, )

In [3]:
# import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
print(tf.__version__)

2.0.0


## Placeholders

Placeholders 的作用是用来作为占位符，一般用来作为数据的占位符。然后在开启一个 Session 运行时，再将数据填充上。

In [5]:
# number of latent factors
k = 5

n, p = x_data.shape

# design matrix
X = tf.placeholder('float', shape=[n, p])

# target vector
y = tf.placeholder('float', shape=[n, 1])

# bias and weights
w0 = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.zeros([p]))

# interaction factors, random initialiazed
V = tf.Variable(tf.random_normal([k, p], stddev=0.01))

# estimate of y, initialize to 0.
y_hat = tf.Variable(tf.zeros([n, 1]))

### linear_terms

In [6]:
linear_terms = tf.add(
                    w0, 
                    tf.reduce_sum(tf.multiply(W, X), 1, keepdims=True)
                )

### interaction terms

In [7]:
interactions = tf.multiply(0.5, 
                    tf.reduce_sum(
                        tf.subtract(
                            tf.pow(tf.matmul(X, tf.transpose(V)), 2), 
                            tf.matmul(tf.pow(X, 2), tf.transpose(tf.pow(V, 2)))
                        ),
                        1, 
                        keepdims=True
                    )
                )

### y_hat

In [8]:
y_hat = tf.add(linear_terms, interactions)

In [9]:
### L2 regularized
lambda_w = tf.constant(0.001, name='lambda_w')
lambda_v = tf.constant(0.001, name='lambda_v')

l2_norm = tf.reduce_sum(
            tf.add(
                tf.multiply(lambda_w, tf.pow(W, 2)),
                tf.multiply(lambda_v, tf.pow(V, 2))
            )
        )

error = tf.reduce_mean(tf.square(tf.subtract(y, y_hat)))
loss = tf.add(error, l2_norm)

### train with optimizer


In [11]:
eta = tf.constant(0.1)
optimizer = tf.train.AdagradOptimizer(eta).minimize(loss)

In [12]:
N_EPOCHS = 1000

# Launch the graph
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(N_EPOCHS):
        indices = np.arange(n)
        np.random.shuffle(indices)
        x_train, y_train = x_data[indices], y_data[indices]
        feed_dict = {X: x_train, y: y_train}
        sess.run(optimizer, feed_dict=feed_dict)
    
    feed_dict = {X: x_data, y: y_data}
    print('MSE: ', sess.run(error, feed_dict=feed_dict))
    print('Loss (regularized error):', sess.run(loss, feed_dict=feed_dict))
    print('Predictions:', sess.run(y_hat, feed_dict=feed_dict))
    print('Learnt weights:', sess.run(W, feed_dict=feed_dict))
    print('Learnt factors:', sess.run(V, feed_dict=feed_dict))

MSE:  2.6334617e-06
Loss (regularized error): 0.0032062565
Predictions: [[5.0008225]
 [3.0021558]
 [1.0024109]
 [3.9997218]
 [5.001162 ]
 [1.001924 ]
 [5.0014725]]
Learnt weights: [ 0.13208246  0.18306005 -0.10841523 -0.01566492 -0.05459929  0.18787012
  0.08919552 -0.00697165  0.07593074  0.07488453  0.13808195  0.10128716
  0.14856166 -0.1532194   0.08919552  0.        ]
Learnt factors: [[-7.36950859e-02 -2.33698264e-01  9.86326411e-02 -1.04981445e-01
   2.38596946e-01 -1.55026436e-01 -7.00005814e-02  1.34370672e-02
  -2.69906539e-02 -7.78662041e-02 -2.05926731e-01 -2.14775398e-01
  -1.74245656e-01  3.39775354e-01 -7.75893107e-02 -3.79515626e-03]
 [-9.29042026e-02 -2.32147098e-01  7.66933113e-02 -1.07213557e-01
   2.00149998e-01 -1.49390191e-01 -7.83170089e-02  6.60254946e-03
  -3.57870311e-02 -8.26184526e-02 -2.05183178e-01 -2.08080307e-01
  -1.98463157e-01  3.34118664e-01 -7.95174316e-02  5.80727914e-03]
 [ 1.98745012e-01  1.40414238e-01 -2.55441338e-01  2.13614345e-01
  -3.2680255

### References

1. [Factorization Machines with Tensorflow](http://nowave.it/factorization-machines-with-tensorflow.html)