## FM-with-Tensorflow-v2.0

In [304]:
import numpy as np
import tensorflow as tf

In [305]:
print(tf.__version__)

2.0.0


In [306]:
# Example dummy data from Rendle 2010 
# http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
# Stolen from https://github.com/coreylynch/pyFM
# Categorical variables (Users, Movies, Last Rated) have been one-hot-encoded 
x_data = np.matrix([
#    Users  |     Movies     |    Movie Ratings   | Time | Last Movies Rated
#   A  B  C | TI  NH  SW  ST | TI   NH   SW   ST  |      | TI  NH  SW  ST
    [1, 0, 0,  1,  0,  0,  0,   0.3, 0.3, 0.3, 0,     13,   0,  0,  0,  0 ],
    [1, 0, 0,  0,  1,  0,  0,   0.3, 0.3, 0.3, 0,     14,   1,  0,  0,  0 ],
    [1, 0, 0,  0,  0,  1,  0,   0.3, 0.3, 0.3, 0,     16,   0,  1,  0,  0 ],
    [0, 1, 0,  0,  0,  1,  0,   0,   0,   0.5, 0.5,   5,    0,  0,  0,  0 ],
    [0, 1, 0,  0,  0,  0,  1,   0,   0,   0.5, 0.5,   8,    0,  0,  1,  0 ],
    [0, 0, 1,  1,  0,  0,  0,   0.5, 0,   0.5, 0,     9,    0,  0,  0,  0 ],
    [0, 0, 1,  0,  0,  1,  0,   0.5, 0,   0.5, 0,     12,   1,  0,  0,  0 ]
])
# ratings
y_data = np.array([5, 3, 1, 4, 5, 1, 5], dtype=np.float)

# Let's add an axis to make tensoflow happy.
y_data.shape += (1, )

In [307]:
n_epochs = 10
n_features = 16
k_degree = 2
batch_size = 1000
learning_rate = 0.01

In [308]:
dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data)).repeat().shuffle(2000).batch(batch_size)

In [309]:
# 所以说，repeat 不用传具体的数，take 会帮我们解决问题。
dataset10 = dataset.take(1)
print(dataset.element_spec[0].shape)
for x,y in dataset10:
    print(x.shape, y.shape)

(None, 16)
(1000, 16) (1000, 1)


In [310]:
w0 = tf.Variable(np.zeros(1), name="bias", dtype=tf.float64)
W = tf.Variable(np.random.randn(n_features), name="weights", dtype=tf.float64)
V = tf.Variable(np.random.randn(n_features, k_degree), name="factors", dtype=tf.float64)

In [311]:
print(w0)

<tf.Variable 'bias:0' shape=(1,) dtype=float64, numpy=array([0.])>


In [312]:
print(W)

<tf.Variable 'weights:0' shape=(16,) dtype=float64, numpy=
array([-0.4172011 , -0.73362866, -0.10311634,  1.2632677 ,  0.65341653,
        0.31545171, -0.11232642, -1.99357185, -0.41388427, -1.15188704,
        0.95121691, -1.01368156, -1.12185865, -0.74307947,  0.11064095,
        0.67660343])>


In [313]:
print(V)

<tf.Variable 'factors:0' shape=(16, 2) dtype=float64, numpy=
array([[ 0.18687186,  0.9558971 ],
       [ 0.23523107,  1.15699869],
       [ 1.17717205,  2.29807029],
       [ 1.23333281, -0.19038046],
       [ 0.10306188,  0.11041068],
       [ 0.03360757,  0.66864629],
       [-0.95631733, -1.02660211],
       [ 0.81056354,  0.28159882],
       [ 1.92104722, -0.71260133],
       [-0.4882758 , -0.03113105],
       [-0.39190392,  0.16720936],
       [-0.58209245, -0.95043103],
       [ 1.51312858,  0.31423097],
       [ 0.45299798, -0.7138732 ],
       [ 0.15699325,  0.95455789],
       [ 0.05039035,  1.17311192]])>


In [314]:
 optimizer = tf.keras.optimizers.Adagrad()

In [315]:
def run_optimization(X, y):
    # Wrap computation inside a GradientTape for automatic differentiation
    with tf.GradientTape() as g:
        pred = tf.add(w0, tf.reduce_sum(tf.multiply(X, W), 1, keepdims=True))
        loss = tf.reduce_mean(tf.square(tf.subtract(y, pred)))
    
    # compute gradients
    gradients = g.gradient(loss, [w0, W])
    
    # Update w0, W following gradients
    optimizer.apply_gradients(zip(gradients, [w0, W]))
    return loss

In [316]:
train_data = dataset.take(10)

for i, data in enumerate(train_data):
    X, y = data
    loss = run_optimization(X, y)
#     if i % 1000 == 0:
    print("Epoch i: %d, loss: %.3f " % (i, loss.numpy()))

Epoch i: 0, loss: 253.696 
Epoch i: 1, loss: 268.306 
Epoch i: 2, loss: 261.798 
Epoch i: 3, loss: 263.841 
Epoch i: 4, loss: 262.959 
Epoch i: 5, loss: 260.340 
Epoch i: 6, loss: 265.139 
Epoch i: 7, loss: 266.931 
Epoch i: 8, loss: 263.871 
Epoch i: 9, loss: 267.577 


In [317]:
print(w0)
print(W)

<tf.Variable 'bias:0' shape=(1,) dtype=float64, numpy=array([0.00504385])>
<tf.Variable 'weights:0' shape=(16,) dtype=float64, numpy=
array([-0.41213096, -0.72867341, -0.09808359,  1.26826951,  0.65853045,
        0.32048822, -0.10731099, -1.98850929, -0.40881832, -1.14685238,
        0.95616597, -1.00862184, -1.11671271, -0.73809859,  0.11565637,
        0.67660343])>


In [318]:
y_pred = tf.add(w0, tf.reduce_sum(tf.multiply(x_data, W), 1, keepdims=True))
loss = tf.reduce_mean(tf.square(tf.subtract(y_data, y_pred)))

In [319]:
print(y_pred)
print(loss)

tf.Tensor(
[[-13.31415547]
 [-16.04922908]
 [-18.02590087]
 [ -5.54159372]
 [ -8.87960207]
 [ -9.47004759]
 [-14.56040711]], shape=(7, 1), dtype=float64)
tf.Tensor(262.311872875665, shape=(), dtype=float64)


LR, epochs = 100000, 最终 loss = 4.679 这真是坑呀。

In [322]:
class model(object):
    def __init__(self):
        self.name = "base"
        
class lr_model(model):
    def __init__(self):
        self.name = 'lr'
        
class poly_model(model):
    def __init__(self):
        self.name = 'order-2-poly-model'
    
class fm_model(model):
    
    def __init__(self, 
                 dataset, 
                 epochs, 
                 degree = 2,
                 optimizer = None, 
                 verbosity=1):
        self.name = 'fm'
        if optimizer:
            self.optimizer = optimizer
        else:
            self.optimizer = tf.optimizers.Adagrad(learning_rate=0.1)
            
        self.dataset = dataset
        self.epochs  = epochs
        self.degree = degree
        self.verbosity = verbosity
        
        # Xn is the features.
        for X, y in dataset.take(1):
            self.Xm, self.Xn = X.shape
            self.ym, self.yn = y.shape
        
        self.n_features = self.Xn
        
        self.w0 = tf.Variable(np.zeros(1), 
                              dtype=tf.float64, name="bias")
        self.W = tf.Variable(np.random.randn(self.n_features, 1), 
                             dtype=tf.float64, name="weights")
        self.V = tf.Variable(np.random.randn(self.n_features, self.degree), 
                             dtype=tf.float64, name="factors")
        
        if self.verbosity:
            print(self.w0)
            print(self.W)
            print(self.V)
    
    def predict(self, X):
        linear_terms = tf.add(self.w0, tf.reduce_sum(tf.matmul(X, self.W), axis=1, keepdims=True))
        interactions = tf.multiply(0.5,
                                   tf.reduce_sum(
                                       tf.subtract(
                                            tf.square(tf.matmul(X, self.V)),
                                            tf.matmul(tf.square(X), tf.square(self.V))
                                        ),
                                       axis=1,
                                       keepdims=True,
                                    )
                                  )
        pred = linear_terms + interactions
        return pred
    
    def loss(self, y_pred, y):
        loss = tf.reduce_mean(tf.square(tf.subtract(y_pred, y)))
        return loss
        
    
    def train(self):   
        dataset = self.dataset.take(self.epochs)
        for i, data in enumerate(dataset):
            with tf.GradientTape() as g:
                y_pred = self.predict(X)
                cost = self.loss(y_pred, y)
                
            gradients = g.gradient(cost, [self.w0, self.W, self.V])
            self.optimizer.apply_gradients(zip(gradients, [self.w0, self.W, self.V]))
            if self.verbosity:
                print("Epoch %d, loss: %0.3f" % (i + 1, loss.numpy()))
        self.cost = cost
    
    

In [325]:
m = fm_model(dataset, epochs=1000, degree=2, verbosity=0)
m.train()
print(m.cost)

tf.Tensor(7.601638336981552e-18, shape=(), dtype=float64)


In [334]:
y_pred = m.predict(x_data)
for y, y_hat in zip(y_data, y_pred):
    print(y[0], y_hat.numpy()[0])

5.0 5.000000003279677
3.0 2.9999999987619557
1.0 0.9999999988176684
4.0 3.999999999040022
5.0 5.000000000502729
1.0 0.9999999947654405
5.0 5.0000000026476075


上面绝对过拟合了。