In [1]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import copy
import time
import tensorflow_addons as tfa
from utils import *
random.seed(42)

# 书籍信息导入
with open("book_info_all.txt", 'r', encoding='utf-8') as infile:
    text = infile.readlines()

for i in range(5250):
    text[i] = text[i].split(",")
    text[i][14] = text[i][14][:-1]
book_info = pd.DataFrame(text)
book_info.columns = ['itemid','genres','subtype','url','title','author','publisher','publish_time','aver_score','score_num','star_5','star_4','star_3','star_2','star_1']

# 模型相关信息导入
class Sampling(tf.keras.layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a basket."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim), stddev=1.)
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


class Model(tf.keras.Model):
    def __init__(self, num_words, latent=1024, hidden=1024, items_sampling=1.):
        super().__init__()
        self.sampled_items = int(num_words * items_sampling)
        # ************* ENCODER ***********************
        self.encoder1 = tf.keras.layers.Dense(hidden)
        self.ln1 = tf.keras.layers.LayerNormalization()
        self.encoder2 = tf.keras.layers.Dense(hidden)
        self.ln2 = tf.keras.layers.LayerNormalization()
        self.encoder3 = tf.keras.layers.Dense(hidden)
        self.ln3 = tf.keras.layers.LayerNormalization()
        self.encoder4 = tf.keras.layers.Dense(hidden)
        self.ln4 = tf.keras.layers.LayerNormalization()
        self.encoder5 = tf.keras.layers.Dense(hidden)
        self.ln5 = tf.keras.layers.LayerNormalization()
        self.encoder6 = tf.keras.layers.Dense(hidden)
        self.ln6 = tf.keras.layers.LayerNormalization()
        self.encoder7 = tf.keras.layers.Dense(hidden)
        self.ln7 = tf.keras.layers.LayerNormalization()

        # ************* SAMPLING **********************
        self.dense_mean = tf.keras.layers.Dense(latent,
                                                name="Mean")
        self.dense_log_var = tf.keras.layers.Dense(latent,
                                                    name="log_var")

        self.sampling = Sampling(name='Sampler')

        # ************* DECODER ***********************
        self.decoder1 = tf.keras.layers.Dense(hidden)
        self.dln1 = tf.keras.layers.LayerNormalization()
        self.decoder2 = tf.keras.layers.Dense(hidden)
        self.dln2 = tf.keras.layers.LayerNormalization()
        self.decoder3 = tf.keras.layers.Dense(hidden)
        self.dln3 = tf.keras.layers.LayerNormalization()
        self.decoder4 = tf.keras.layers.Dense(hidden)
        self.dln4 = tf.keras.layers.LayerNormalization()
        self.decoder5 = tf.keras.layers.Dense(hidden)
        self.dln5 = tf.keras.layers.LayerNormalization()

        self.decoder_resnet = tf.keras.layers.Dense(self.sampled_items,
                                                    activation='sigmoid',
                                                    name="DecoderR")
        self.decoder_latent = tf.keras.layers.Dense(self.sampled_items,
                                                    activation='sigmoid',
                                                    name="DecoderL")

        
    def call(self, x, training=None):

        sampled_x = x

        z_mean, z_log_var, z = self.encode(sampled_x)
        if training:
            d = self.decode(z)
            # Add KL divergence regularization loss.
            kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
            kl_loss = tf.reduce_mean(kl_loss)
            kl_loss *= -0.5
            self.add_loss(kl_loss)
            self.add_metric(kl_loss, name="kl_div")
        else:
            d = self.decode(z_mean)

        return d

    def decode(self, x):
        e0 = x
        e1 = self.dln1(tf.keras.activations.swish(self.decoder1(e0)))
        e2 = self.dln2(tf.keras.activations.swish(self.decoder2(e1) + e1))
        e3 = self.dln3(tf.keras.activations.swish(self.decoder3(e2) + e1 + e2))

        dr = self.decoder_resnet(e3)
        dl = self.decoder_latent(x)

        return dr * dl

    def encode(self, x):
        e0 = x
        e1 = self.ln1(tf.keras.activations.swish(self.encoder1(e0)))
        e2 = self.ln2(tf.keras.activations.swish(self.encoder2(e1) + e1))
        e3 = self.ln3(tf.keras.activations.swish(self.encoder3(e2) + e1 + e2))
        e4 = self.ln4(tf.keras.activations.swish(self.encoder4(e3) + e1 + e2 + e3))
        e5 = self.ln5(tf.keras.activations.swish(self.encoder5(e4) + e1 + e2 + e3 + e4))

        z_mean = self.dense_mean(e5)
        z_log_var = self.dense_log_var(e5)
        z = self.sampling((z_mean, z_log_var))

        return z_mean, z_log_var, z

# 推荐系统
class RecSystem:
    def __init__(self):
        self.model_nn = tf.keras.models.load_model('model_nn.h5')
        self.model_vae = Model(5251, 512, 1024)
        self.model_vae.load_weights("VAE_RD_2_3_512_1024__")
    # 推荐
    def recommend(self, user, topk=1 ,flag=0):
        if flag == 0:
            data = []
            for i in range(5250):
                uu = copy.copy(user)
                uu.append(i)
                data.append(uu)
            score = self.model_nn.predict(data).tolist()
            best_id = []  # 找电影id
            for i in range(topk):
                tmp_max = max(score)
                tmp_id = score.index(tmp_max)
                best_id.append(tmp_id+1)
                score[tmp_id] = [0]
            return best_id # 从1开始
        else:
            pred = self.model_vae.predict(user)
            rec_idx = (-pred).argsort()[:,:topk] # top k movies
            return rec_idx

rec_system = RecSystem()

有两种推荐方式:

1.用户刚注册不久之后，根据注册时填写的偏好用NN进行推荐 （NN受冷启动影响较小）

2.当用户看过（评分）一定书籍后(比如15本)，用VAE进行推荐

In [2]:
## NN推荐

# type_list = [0, 1, 2, 3, 4, 5]
# sub_type_list = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
#                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
#                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25],
#                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
#                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
#                  [0, 1, 2, 3, 4, 5, 6, 7]]

# 需要将用户的偏好序列化 如下
user = [0,0,1,2,3,4,0,0,0,0,       # 每一行都是 喜欢的大类 + 9个喜欢的小类 组成  不足用0来填充，超过舍去
        1,2,3,4,5,0,0,0,0,0]
# 上述描述的就是 某用户喜欢
# 文学: 小说, 文学, 外国文学, 经典, 中国文学
# 流行: 绘本, 悬疑, 东野圭吾, 科幻

ll = rec_system.recommend(user,topk=1,flag=0)
# topk注明要推荐前几本 flag=0表明是NN user格式如上

for i in ll:
    print(book_info.loc[i-1]) # 注意要-1

itemid                                               191
genres                                                文学
subtype                                               经典
url             https://book.douban.com/subject/6311230/
title                                            秦汉魏晋史探微
author                                               田余庆
publisher                                           中华书局
publish_time                                      2011-6
aver_score                                           9.4
score_num                                           2680
star_5                                             77.9%
star_4                                             19.1%
star_3                                              2.7%
star_2                                              0.2%
star_1                                              0.1%
Name: 190, dtype: object


In [3]:
## VAE推荐
user = np.zeros((1,5251)) # 1 users 初始化
like_list = [1,5,48] # 选出用户评分大于4的书籍id 从1开始
for i in like_list:
    user[0][i] = 1

lll = rec_system.recommend(user,topk=1,flag=1)
for i in lll[0]:
    print(book_info.loc[i-1])

itemid                                              1740
genres                                                流行
subtype                                             日本漫画
url             https://book.douban.com/subject/3122341/
title                                            福星小子（5）
author                                             高橋留美子
publisher                                             尖端
publish_time                                    1995-3-3
aver_score                                           9.2
score_num                                            108
star_5                                             65.7%
star_4                                             24.1%
star_3                                             10.2%
star_2                                              0.0%
star_1                                              0.0%
Name: 1739, dtype: object
