In [223]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
os.environ["CUDA_VISIBLE_DEVICES"]='0'

## 数据预处理

In [224]:
cols = ['user', 'item', 'rating', 'timestamp'] # user、item是特征，rating是标签
train = pd.read_csv('data/ua.base', delimiter='\t', names=cols)
test = pd.read_csv('data/ua.test', delimiter='\t', names=cols) # 测试集中的每个用户在训练集中都出现过
print("共有{}个训练样本，{}个测试样本".format(len(train),len(test))) # 测试集中的item并不是在训练集中都出现过，[1582, 1653]只在test中出现

# user id重新映射
user_unique = train['user'].unique().tolist()
user_map = dict(zip(user_unique, range(0, len(user_unique))))
num_user = len(user_map)
train['user'] = train['user'].apply(lambda x: user_map[x])
# item id重新映射
item_unique = train['item'].unique().tolist()
item_map = dict(zip(item_unique, range(0,len(item_unique))))
num_item = len(item_map)
train['item'] = train['item'].apply(lambda x: item_map[x])
num_feature = num_user + num_item # 共有2623个特征
print("共有{}个特征，用户特征{}个，Item特征{}个".format(num_feature,num_user,num_item))

# 构建训练集
train_dataset = np.zeros((len(train),num_feature),dtype = np.float32)
train_y = np.array(train["rating"])
for index, row in train.iterrows():
    user_id = row["user"]
    train_dataset[index][user_id] = 1
    item_id = row["item"]
    train_dataset[index][num_user+item_id] = 1
print("训练集维度{}".format(train_dataset.shape))
print(train_dataset)
# 构建测试集
test_dataset = np.zeros((len(test),num_feature),dtype = np.float32)
test_y = np.array(test["rating"])
for index, row in test.iterrows():
    user_id = row["user"]
    test_dataset[index][user_id] = 1
    item_id = row["item"]
    if item_id not in item_map.keys():
        continue
    else:
        test_dataset[index][num_user+item_id] = 1
print("测试集维度{}".format(test_dataset.shape))
print(test_dataset)

共有90570个训练样本，9430个测试样本
共有2623个特征，用户特征943个，Item特征1680个
训练集维度(90570, 2623)
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
测试集维度(9430, 2623)
[[0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


## FM模型

In [225]:
class FM_Layer(tf.keras.layers.Layer):
    def __init__(self, k, w_reg = 1e-4, v_reg = 1e-4):
        # 此处进行与输入无关的初始化工作
        super(FM_Layer, self).__init__()
        self.k = k # 隐藏层维度
        self.w_reg = w_reg
        self.v_reg = v_reg
        
    def build(self, input_shape):
        # 此处可以获取输入数据的维度，进行剩余依赖输入数据的初始化工作
        self.w0 = self.add_weight(name="w0",shape=(1,1),initializer=tf.zeros_initializer(),trainable=True)
        self.w = self.add_weight(name="w",shape=(input_shape[-1],1),initializer=tf.random_normal_initializer(),
                               regularizer = tf.keras.regularizers.l2(self.w_reg),trainable=True)
        self.v = self.add_weight(name="v",shape=(input_shape[-1],self.k),initializer=tf.random_normal_initializer(),
                              regularizer = tf.keras.regularizers.l2(self.v_reg),trainable=True)
        
    def call(self,inputs):
        # 此处定义层的传播
        # 一阶特征
        first_order = self.w0 + tf.matmul(inputs,self.w)
        # 二阶交互特征
        second_order = 0.5 * tf.reduce_sum(
            tf.pow(tf.matmul(inputs, self.v), 2) -
            tf.matmul(tf.pow(inputs, 2), tf.pow(self.v, 2)), axis=1, keepdims=True)
        outputs = first_order + second_order
        return outputs

In [226]:
class FM(tf.keras.Model):
    def __init__(self,k,w_reg=1e-4,v_reg=1e-4):
        super(FM, self).__init__()
        self.k = k
        self.w_reg = w_reg
        self.v_reg = v_reg
        self.fm_layer = FM_Layer(self.k,self.w_reg,self.v_reg)
        
    def call(self, inputs):
        x = self.fm_layer(inputs)
        # return tf.nn.sigmoid(x) # 分类问题则需要加上softmax
        return x # 回归问题
    
model = FM(128)

In [228]:
batch_size = 512
epoches = 500
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_object = tf.keras.losses.MeanSquaredError()

In [229]:
for epoch in range(epoches):
    epoch_loss = 0
    for batch_index in range(int(len(train_dataset)/batch_size)+1):
        if batch_index == int(len(train_dataset)/batch_size):
            inputs = train_dataset[batch_index*batch_size:]
            y = train_y[batch_index*batch_size:]
        else:
            inputs = train_dataset[batch_index*batch_size:batch_index*batch_size + batch_size]
            y = train_y[batch_index*batch_size:batch_index*batch_size + batch_size]
        with tf.GradientTape() as tape:
            pred = model(inputs)
            loss = loss_object(y_true = y, y_pred = pred)
            epoch_loss += loss
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
    print("epoch {},loss {}".format(epoch+1, epoch_loss/(batch_index+1)))
        

epoch 1,loss 12.670720100402832
epoch 2,loss 10.629537582397461
epoch 3,loss 8.341581344604492
epoch 4,loss 5.389056205749512
epoch 5,loss 2.751526117324829
epoch 6,loss 1.6540265083312988
epoch 7,loss 1.3641780614852905
epoch 8,loss 1.2708055973052979
epoch 9,loss 1.2342758178710938
epoch 10,loss 1.2174787521362305
epoch 11,loss 1.2087048292160034
epoch 12,loss 1.2036818265914917
epoch 13,loss 1.2006181478500366
epoch 14,loss 1.1988255977630615
epoch 15,loss 1.1979199647903442
epoch 16,loss 1.1965081691741943
epoch 17,loss 1.195610761642456
epoch 18,loss 1.1951640844345093
epoch 19,loss 1.1944854259490967
epoch 20,loss 1.1941988468170166
epoch 21,loss 1.196498155593872
epoch 22,loss 1.2017444372177124
epoch 23,loss 1.206620693206787
epoch 24,loss 1.218779444694519
epoch 25,loss 1.243066430091858
epoch 26,loss 1.259053111076355
epoch 27,loss 1.2509996891021729
epoch 28,loss 1.2328853607177734
epoch 29,loss 1.2221964597702026
epoch 30,loss 1.2129032611846924
epoch 31,loss 1.216398477554

epoch 247,loss 1.1878756284713745
epoch 248,loss 1.1882349252700806
epoch 249,loss 1.18843674659729
epoch 250,loss 1.1889839172363281
epoch 251,loss 1.1885124444961548
epoch 252,loss 1.188680648803711
epoch 253,loss 1.1885939836502075
epoch 254,loss 1.1884286403656006
epoch 255,loss 1.1888844966888428
epoch 256,loss 1.1887179613113403
epoch 257,loss 1.1885172128677368
epoch 258,loss 1.1887664794921875
epoch 259,loss 1.1888457536697388
epoch 260,loss 1.188308596611023
epoch 261,loss 1.1878288984298706
epoch 262,loss 1.1878314018249512
epoch 263,loss 1.1877621412277222
epoch 264,loss 1.187513828277588
epoch 265,loss 1.187414526939392
epoch 266,loss 1.187551498413086
epoch 267,loss 1.187551736831665
epoch 268,loss 1.1874895095825195
epoch 269,loss 1.187576413154602
epoch 270,loss 1.1876474618911743
epoch 271,loss 1.187872290611267
epoch 272,loss 1.1879280805587769
epoch 273,loss 1.1879584789276123
epoch 274,loss 1.1880658864974976
epoch 275,loss 1.1880477666854858
epoch 276,loss 1.1881150

epoch 490,loss 1.1866456270217896
epoch 491,loss 1.1866132020950317
epoch 492,loss 1.186593770980835
epoch 493,loss 1.1867070198059082
epoch 494,loss 1.1869703531265259
epoch 495,loss 1.187140941619873
epoch 496,loss 1.1869667768478394
epoch 497,loss 1.1869398355484009
epoch 498,loss 1.187023401260376
epoch 499,loss 1.1866871118545532
epoch 500,loss 1.1865719556808472


In [230]:
for batch_index in range(int(len(test_dataset)/batch_size)+1):
    total_loss = 0
    if batch_index == int(len(test_dataset)/batch_size):
        inputs = test_dataset[batch_index*batch_size:]
        y = test_y[batch_index*batch_size:]
    else:
        inputs = test_dataset[batch_index*batch_size:batch_index*batch_size + batch_size]
        y = test_y[batch_index*batch_size:batch_index*batch_size + batch_size]
    with tf.GradientTape() as tape:
        pred = model(inputs)
        loss = loss_object(y_true = y, y_pred = pred)
        total_loss += loss
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
print("测试集上的损失{}".format(total_loss))

测试集上的损失1.2158063650131226
