In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')

In [2]:
##重新使用ml-1m
##导入数据，并对数据进行简单处理
def load_data():
    ##读取user
    users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code']
    users = pd.read_table('./data/ml-1m/users.dat', sep='::', header=None, names=users_title, engine='python')
    users = users.drop(['Zip-code'], axis=1)
    users_orig = users.values
    
    ##one_hot
    gender_map = {'F': 0, 'M': 1}
    users['Gender'] = users['Gender'].map(gender_map)
    
    ##年龄离散化
    bins = [i for i in range(0, 61, 10)]
    users['Age'] = pd.cut(users['Age'], bins=bins, labels=False)
    
    ##读取movie数据集
    movies_title = ["MovieID", 'Title', 'Genres']
    movies = pd.read_table('./data/ml-1m/movies.dat', sep='::', header=None, names=movies_title, engine='python')
    movies_orig = movies.values
    
    movies['Title'] = movies['Title'].str.extract(r'(.*)\s*\(\d+\)', expand=False)
    #movies['year'] = movies['Title'].str.extract(r'(\(\d+\))', expand=False)
    
    #电影类型转数字字典
    genres_set = set()
    for val in movies['Genres'].str.split('|'):
        genres_set.update(val)

    genres_set.add('<PAD>')
    genres2int = {val:ii for ii, val in enumerate(genres_set)}

    #将电影类型转成等长数字列表，长度是18
    genres_map = {val:[genres2int[row] for row in val.split('|')] for ii,val in enumerate(set(movies['Genres']))}

    for key in genres_map:
        for cnt in range(max(genres2int.values()) - len(genres_map[key])):
            genres_map[key].insert(len(genres_map[key]) + cnt,genres2int['<PAD>'])
    
    movies['Genres'] = movies['Genres'].map(genres_map)
    
    title_set = set()
    for val in movies['Title'].str.split():
        title_set.update(val)
    
    title_set.add('<PAD>')
    title2int = {val:ii for ii, val in enumerate(title_set)}
    
    ##将电影title专程等长数字列表，长度为15
    title_count = 15
    title_map = {val:[title2int[row] for row in val.split()] for ii, val in enumerate(set(movies['Title']))}
    
    for key in title_map:
        for cnt in range(title_count - len(title_map[key])):
            title_map[key].insert(len(title_map[key]) + cnt, title2int['<PAD>'])
    
    movies['Title'] = movies['Title'].map(title_map)
    
    #读取评分数据集
    ratings_title = ['UserID','MovieID', 'ratings', 'timestamps']
    ratings = pd.read_table('./data/ml-1m/ratings.dat', sep='::', header=None, names=ratings_title, engine = 'python')
    ratings = ratings.filter(regex='UserID|MovieID|ratings')

    #合并三个表
    data = pd.merge(pd.merge(ratings, users), movies)
    
    #将数据分成X和y两张表
    target_fields = ['ratings']
    features_pd, targets_pd = data.drop(target_fields, axis=1), data[target_fields]
    
    features = features_pd.values
    targets_values = targets_pd.values
    
    '''
    title_count: title字段长度
    title_set:title文本的集合
        genres2int:电影类型转数字的字典
        features：输入X
        targets_values：是学习目标
        ratings：评分数据集的pandas对象
        users：用户数据集的pandas对象
        movies：电影数据的pandas对象
        data：三个数据集合并表
        movies_orig：原始数据
        users_orig：原属数据(用户)
    '''
    
    return title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig

In [3]:
def variable_init():
    """
    user:id,gender,age,job
    movie:id, genres, title
    rating: 目标输出
    """
    uid = tf.placeholder(tf.int32, [None, 1], name='uid')
    user_gender = tf.placeholder(tf.int32, [None, 1], name='user_gender')
    user_age = tf.placeholder(tf.int32, [None, 1], name='user_age')   ###这里是经过分桶过后的离散数据，数值型数据可以不同embedding层
    user_job = tf.placeholder(tf.int32, [None, 1], name='user_job')
    
    movie_id = tf.placeholder(tf.int32, [None, 1], name='movie_id')
    movie_genres = tf.placeholder(tf.int32, [None, 18], name='movie_genres')
    movie_title = tf.placeholder(tf.int32, [None, 15], name='movie_title')
    
    rating = tf.placeholder(tf.int32, [None,1], name='rating')
    
    learning_rate = tf.placeholder(tf.float32, name='learning_rate')
    
    return uid, user_age, user_gender, user_job, movie_id, movie_genres, movie_title, rating, learning_rate

In [4]:
title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = load_data()


In [123]:
###各个维度定义
embed_dim = 32
uid_dim = data['UserID'].unique().max() + 1
user_gender_dim = data['Gender'].unique().max() + 1
user_age_dim = data['Age'].unique().max() + 1
user_job_dim = data['JobID'].unique().max() + 1

movie_id_dim = data['MovieID'].unique().max() + 1
movie_title_dim = len(title_set)
movie_genres_dim = max(genres2int.values()) + 1

sentences_size = 15

#文本卷积滑动窗口
window_sizes = {2, 3, 4, 5}

#文本卷积核数量
filter_num = 8

movieid2idx = {val[0]: i for i, val in enumerate(movies.values)}

In [None]:
##超参数
num_epochs = 5
batch_size =128

dropout_keep = 0.5

lr = 0.01

##显示每n个批次的统计信息
show_every_n_batches = 60

In [106]:
###开始网络搭建
"""
第一部分:各个特征的embedding层
"""
def user_feature(**kwargs):
    with tf.name_scope('user_embedding'):
        uid = kwargs.get('uid', '')
        uid_embed_matrix = tf.Variable(tf.random_normal([uid_dim, embed_dim], stddev=1.0, mean=0.0), name='uid_embed_matrix')
        uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name='uid_embed_layer')

        user_gender = kwargs.get('user_gender', '')
        gender_embed_matrix = tf.Variable(tf.random_normal([user_gender_dim, embed_dim], stddev=1.0, mean=0.0), name='gender_embed_matrix')
        gender_embed_layer = tf.nn.embedding_lookup(gender_embed_matrix, user_gender, name='gender_embed_layer')

        user_age = kwargs.get('user_age', '')
        age_embed_matrix = tf.Variable(tf.random_normal([user_age_dim, embed_dim], stddev=1.0, mean=0.0), name='age_embed_matrix')
        age_embed_layer = tf.nn.embedding_lookup(age_embed_matrix, user_age, name='age_embed_layer')

        user_job = kwargs.get('user_job', '')
        job_embed_matrix = tf.Variable(tf.random_normal([user_job_dim, embed_dim], stddev=1.0, mean=0.0), name='job_embed_matrix')
        job_embed_layer = tf.nn.embedding_lookup(job_embed_matrix, user_job, name='job_embed_layer')
        
        return uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer

def movie_feature(**kwargs):
    with tf.name_scope('movie_embedding'):
        movie_id = kwargs.get('movie_id')
        movie_id_embed_matrix = tf.Variable(tf.random_normal([movie_id_dim, embed_dim], stddev=1.0, mean=0.0), name='movie_id_embed_matrix')
        movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name='movie_id_embed_layer')
        
        movie_genres = kwargs.get('movie_genres')
        movie_genres_embed_matrix = tf.Variable(tf.random_normal([movie_genres_dim, embed_dim], stddev=1.0, mean=0.0), name='movie_genres_embed_matrix')
        movie_genres_embed_layer = tf.nn.embedding_lookup(movie_genres_embed_matrix, movie_genres, name='movie_genres_embed_layer')
        movie_genres_embed_layer = tf.reduce_sum(movie_genres_embed_layer, axis=1, keep_dims=True)
        
        return movie_id_embed_layer, movie_genres_embed_layer
    

In [96]:
##对于电影名称这类的文本数据，这里采用文本卷积提取特征
def get_movie_cnn_layer(movie_title):
    ##电影名称的embedding
    with tf.name_scope('title_embedding'):
        movie_title_embed_matrix = tf.Variable(tf.random_normal([movie_title_dim, embed_dim], stddev=1.0), name='movie_title_embed_matrix')
        ##shape(128, 32)
        movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_title, name='movie_title_embed_layer')
        ##暂时不加这一个维度，不懂为什么要加这一个维度,   文本特征，缺少了像图像数据的channel(通道)维度
        movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1)
    
    pool_layer_lst = []
    for windows in window_sizes:
        with tf.name_scope('movie_txt_conv_maxpool_{}'.format(windows)):
            filter_weights = tf.Variable(tf.truncated_normal([windows, embed_dim, 1, filter_num], stddev=0.1), name='filter_weights')
            filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name='filter_bias')
            conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1,1,1,1], padding='VALID', name='conv_layer')
            relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer, filter_bias), name='relu_layer')

            ##池化层，针对不同的窗口尺寸的卷积核进行计算
            maxpool_layer = tf.nn.max_pool(relu_layer, [1, sentences_size - windows + 1, 1, 1], [1, 1, 1, 1], padding='VALID', name='maxpool_layer')
            pool_layer_lst.append(maxpool_layer)
    pool_layer = tf.concat(pool_layer_lst, 3, name='pool_layer')
    max_num = len(window_sizes) * filter_num
    pool_layer_flat = tf.reshape(pool_layer, [-1, 1, max_num], name='pool_layer_flat')
    
    return pool_layer_flat

In [97]:
def movie_combine(movie_id_embed_layer, movie_genres_embed_layer, pool_layer_flat):
    with tf.name_scope('movie_fc'):
        ##第一层
        movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name='movie_id_fc_layer', activation=tf.nn.relu)
        movie_genres_fc_layer = tf.layers.dense(movie_genres_embed_layer, embed_dim, name='movie_genres_fc_layer', activation=tf.nn.relu)
        
        ##第二层---特征组合？
        movie_combine_layer = tf.concat([movie_id_fc_layer, movie_genres_fc_layer, pool_layer_flat], axis=-1) ###axis在这里表示按照哪个维度进行拼接
        movie_combine_layer = tf.layers.dense(movie_combine_layer, 200, activation=tf.nn.relu)
        
        movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200])  ##相当于转置
    
    return movie_combine_layer, movie_combine_layer_flat

In [98]:
##将user的embedding向量一起全连接生成user的特征
def user_combine(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer):
    with tf.name_scope('user_fc'):
        ##第一层
        uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name='uid_fc_layer', activation=tf.nn.relu)
        gender_fc_layer = tf.layers.dense(gender_embed_layer, embed_dim, name='gender_fc_layer', activation=tf.nn.relu)
        age_fc_layer = tf.layers.dense(age_embed_layer, embed_dim, name='age_fc_layer', activation=tf.nn.relu)
        job_fc_layer = tf.layers.dense(job_embed_layer, embed_dim, name='job_fc_layer', activation=tf.nn.relu)
        
        ##第二层
        user_combine_layer = tf.concat([uid_fc_layer, gender_fc_layer, age_fc_layer, job_fc_layer], 2)
        user_combine_layer = tf.layers.dense(user_combine_layer, 200, activation=tf.tanh, name='user_combine_layer')
        
        user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200])
    return user_combine_layer, user_combine_layer_flat

In [124]:
###构建计算图
tf.reset_default_graph()
train_graph = tf.Graph()
with train_graph.as_default():
    #获取输入
    uid, user_age, user_gender, user_job, movie_id, movie_genres, movie_title, target, learning_rate = variable_init()
    
    #获取user特征的embedding向量
    uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer = user_feature(uid=uid, user_age=user_age, user_gender=user_gender, user_job= user_job)
    
    movie_id_embed_layer, movie_genres_embed_layer = movie_feature(movie_id=movie_id, movie_genres=movie_genres)
    
    ##获取movie title的特征，embedding向量
    pool_layer_flat = get_movie_cnn_layer(movie_title)
    
    ##获取用户特征---拼接层
    user_combine_layer, user_combine_layer_flat = user_combine(uid_embed_layer=uid_embed_layer, gender_embed_layer=gender_embed_layer, age_embed_layer=age_embed_layer, job_embed_layer=job_embed_layer)
    
    ##获取电影特征拼接层
    movie_combine_layer, movie_combine_layer_flat = movie_combine(movie_id_embed_layer=movie_id_embed_layer, movie_genres_embed_layer=movie_genres_embed_layer, pool_layer_flat=pool_layer_flat)
    
    with tf.name_scope('inference'):
        ##将用户特征和电影特征作为输入， 全连接层，输出一个值
        inference = tf.reduce_sum(user_combine_layer_flat * movie_combine_layer_flat, axis=1)
        ##与rating的维度不对应
        inference = tf.expand_dims(inference, axis=-1)
    
    cost = tf.losses.mean_squared_error(target, inference)
    loss = tf.reduce_mean(cost)
    
    ##优化器
    global_step = tf.Variable(0, name='global_step', trainable=False)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss)
    train_op = optimizer.apply_gradients(gradients, global_step=global_step)

In [73]:
inference

<tf.Tensor 'inference/ExpandDims:0' shape=(?, 1) dtype=float32>

In [128]:
def get_batch(X, y, batch_size):
    for start in range(0, len(X), batch_size):
        end = min(start + batch_size, len(X))
        yield X[start:end], y[start: end]

In [132]:
###前馈神经网络搭建结束
#训练
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import numpy as np
import datetime

kf = StratifiedKFold(n_splits=5, shuffle=True)
losses = {'train': [], 'test': []}
with tf.Session(graph=train_graph) as sess:
#     for train_idx, test_idx in kf.split(features, targets_values):
#         train_X, train_y = features[train_idx], targets_values[train_idx]
        
#         test_X, test_y = features[test_idx], targets_values[test_idx]
        
#         train_batch = get_batch(train_X, train_y, batch_size)
#         test_batch = get_batch(test_X, test_y, batch_size)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    for epoch_i in range(num_epochs):
        ###将数据集分成训练集和测试集，随机种子不固定
        train_X, test_X, train_y, test_y = train_test_split(features, targets_values, test_size=0.2, random_state=0)
        
        train_batches = get_batch(train_X, train_y, batch_size)
        test_batches = get_batch(test_X, test_y, batch_size)
   
        for batch_i in range(len(train_X) // batch_size):
            x, y = next(train_batches)
            
            genres = np.zeros([batch_size, 18])
            titles = np.zeros([batch_size, sentences_size])
            
            for i in range(batch_size):
                genres[i] = x.take(6, 1)[i]
                titles[i] = x.take(5, 1)[i]
            
            feed_dict = {
                uid: np.reshape(x.take(0, 1), [batch_size, 1]),
                user_gender: np.reshape(x.take(2, 1), [batch_size, 1]),
                user_age: np.reshape(x.take(3, 1), [batch_size, 1]),
                user_job: np.reshape(x.take(4, 1), [batch_size, 1]),
                movie_id: np.reshape(x.take(1, 1), [batch_size, 1]),
                movie_genres: genres,
                movie_title: titles,
                target: np.reshape(y, [batch_size, 1]),
                learning_rate: lr
            }
            
            step, train_loss, _ = sess.run([global_step, loss, train_op],feed_dict)
            losses['train'].append(train_loss)
            if (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0:
                time_str = datetime.datetime.now().isoformat()
                print('{:>3} Batch {:>4}/{} train_loss = {:.3f}'.format(time_str, batch_i, (len(train_X) // batch_size), train_loss))
        for batch_i in range(len(test_X) // batch_size):
            x, y = next(test_batches)
            
            categories = np.zeros([batch_size, 18])
            for i in range(batch_size):
                categories[i] = x.take(6, 1)[i]
            
            titles = np.zeros([batch_size, sentences_size])
            for i in range(batch_size):
                titles[i] = x.take(5,1)[i]
            
            feed_dict = {
                uid: np.reshape(x.take(0, 1), [batch_size, 1]),
                user_gender: np.reshape(x.take(2, 1), [batch_size, 1]),
                user_age: np.reshape(x.take(3, 1), [batch_size, 1]),
                user_job: np.reshape(x.take(4, 1), [batch_size, 1]),
                movie_id: np.reshape(x.take(1, 1), [batch_size, 1]),
                movie_genres: genres,
                movie_title: titles,
                target: np.reshape(y, [batch_size, 1]),
                learning_rate: lr
            }
            
            step, train_loss, _ = sess.run([global_step, loss, train_op], feed_dict=feed_dict)
            
            losses['test'].append(train_loss)            
            if (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0:
                time_str = datetime.datetime.now().isoformat()
                print('{}:Epoch {:>3} Batch {:>4}/{} test_loss = {:.3f}'.format(time_str, epoch_i, batch_i, (len(train_X) // batch_size), train_loss))
    saver.save(sess, './DNN_Rec')
    print('it`s over!')

2020-06-30T19:33:36.929154 Batch    0/6251 train_loss = 905.436
2020-06-30T19:33:37.136268 Batch   20/6251 train_loss = 15.003
2020-06-30T19:33:37.363791 Batch   40/6251 train_loss = 1.739
2020-06-30T19:33:37.585963 Batch   60/6251 train_loss = 1.778
2020-06-30T19:33:37.802719 Batch   80/6251 train_loss = 1.787
2020-06-30T19:33:38.029167 Batch  100/6251 train_loss = 1.544
2020-06-30T19:33:38.244463 Batch  120/6251 train_loss = 1.075
2020-06-30T19:33:38.473562 Batch  140/6251 train_loss = 1.513
2020-06-30T19:33:38.686011 Batch  160/6251 train_loss = 1.533
2020-06-30T19:33:38.916806 Batch  180/6251 train_loss = 1.274
2020-06-30T19:33:39.129098 Batch  200/6251 train_loss = 1.311
2020-06-30T19:33:39.343748 Batch  220/6251 train_loss = 1.195
2020-06-30T19:33:39.569535 Batch  240/6251 train_loss = 1.463
2020-06-30T19:33:39.787758 Batch  260/6251 train_loss = 1.261
2020-06-30T19:33:40.006312 Batch  280/6251 train_loss = 1.244
2020-06-30T19:33:40.215934 Batch  300/6251 train_loss = 1.207
2020-

2020-06-30T19:34:06.673292 Batch 2680/6251 train_loss = 0.869
2020-06-30T19:34:06.876965 Batch 2700/6251 train_loss = 0.954
2020-06-30T19:34:07.097252 Batch 2720/6251 train_loss = 0.857
2020-06-30T19:34:07.309306 Batch 2740/6251 train_loss = 1.028
2020-06-30T19:34:07.520880 Batch 2760/6251 train_loss = 0.949
2020-06-30T19:34:07.733196 Batch 2780/6251 train_loss = 0.840
2020-06-30T19:34:07.948455 Batch 2800/6251 train_loss = 1.134
2020-06-30T19:34:08.164824 Batch 2820/6251 train_loss = 0.911
2020-06-30T19:34:08.376345 Batch 2840/6251 train_loss = 0.915
2020-06-30T19:34:08.589678 Batch 2860/6251 train_loss = 1.205
2020-06-30T19:34:08.804490 Batch 2880/6251 train_loss = 0.896
2020-06-30T19:34:09.012155 Batch 2900/6251 train_loss = 0.937
2020-06-30T19:34:09.230045 Batch 2920/6251 train_loss = 0.962
2020-06-30T19:34:09.444323 Batch 2940/6251 train_loss = 0.916
2020-06-30T19:34:09.658575 Batch 2960/6251 train_loss = 0.969
2020-06-30T19:34:09.891143 Batch 2980/6251 train_loss = 1.019
2020-06-

2020-06-30T19:34:35.263681 Batch 5340/6251 train_loss = 0.860
2020-06-30T19:34:35.465012 Batch 5360/6251 train_loss = 0.940
2020-06-30T19:34:35.674798 Batch 5380/6251 train_loss = 0.978
2020-06-30T19:34:35.878876 Batch 5400/6251 train_loss = 1.036
2020-06-30T19:34:36.090586 Batch 5420/6251 train_loss = 0.950
2020-06-30T19:34:36.303890 Batch 5440/6251 train_loss = 0.805
2020-06-30T19:34:36.514980 Batch 5460/6251 train_loss = 1.010
2020-06-30T19:34:36.723176 Batch 5480/6251 train_loss = 0.708
2020-06-30T19:34:36.926104 Batch 5500/6251 train_loss = 1.351
2020-06-30T19:34:37.130392 Batch 5520/6251 train_loss = 0.900
2020-06-30T19:34:37.341160 Batch 5540/6251 train_loss = 0.845
2020-06-30T19:34:37.542702 Batch 5560/6251 train_loss = 0.878
2020-06-30T19:34:37.742256 Batch 5580/6251 train_loss = 0.838
2020-06-30T19:34:37.935787 Batch 5600/6251 train_loss = 1.118
2020-06-30T19:34:38.133788 Batch 5620/6251 train_loss = 0.858
2020-06-30T19:34:38.330726 Batch 5640/6251 train_loss = 1.173
2020-06-

2020-06-30T19:34:59.838395:Epoch   0 Batch 1540/6251 test_loss = 0.882
2020-06-30T19:35:00.043516:Epoch   0 Batch 1560/6251 test_loss = 0.905
2020-06-30T19:35:01.171033 Batch    9/6251 train_loss = 0.858
2020-06-30T19:35:01.387108 Batch   29/6251 train_loss = 0.880
2020-06-30T19:35:01.598531 Batch   49/6251 train_loss = 0.806
2020-06-30T19:35:01.816342 Batch   69/6251 train_loss = 0.919
2020-06-30T19:35:02.031095 Batch   89/6251 train_loss = 0.705
2020-06-30T19:35:02.245161 Batch  109/6251 train_loss = 0.705
2020-06-30T19:35:02.460995 Batch  129/6251 train_loss = 0.945
2020-06-30T19:35:02.676032 Batch  149/6251 train_loss = 0.952
2020-06-30T19:35:02.888811 Batch  169/6251 train_loss = 0.918
2020-06-30T19:35:03.101844 Batch  189/6251 train_loss = 0.838
2020-06-30T19:35:03.319752 Batch  209/6251 train_loss = 0.865
2020-06-30T19:35:03.540752 Batch  229/6251 train_loss = 0.970
2020-06-30T19:35:03.755539 Batch  249/6251 train_loss = 0.758
2020-06-30T19:35:03.962925 Batch  269/6251 train_los

2020-06-30T19:35:29.305198 Batch 2609/6251 train_loss = 0.725
2020-06-30T19:35:29.527916 Batch 2629/6251 train_loss = 0.883
2020-06-30T19:35:29.740724 Batch 2649/6251 train_loss = 0.871
2020-06-30T19:35:29.954921 Batch 2669/6251 train_loss = 0.665
2020-06-30T19:35:30.171346 Batch 2689/6251 train_loss = 0.839
2020-06-30T19:35:30.384687 Batch 2709/6251 train_loss = 0.765
2020-06-30T19:35:30.588904 Batch 2729/6251 train_loss = 0.792
2020-06-30T19:35:30.803465 Batch 2749/6251 train_loss = 0.775
2020-06-30T19:35:31.020021 Batch 2769/6251 train_loss = 1.068
2020-06-30T19:35:31.236744 Batch 2789/6251 train_loss = 0.747
2020-06-30T19:35:31.448621 Batch 2809/6251 train_loss = 0.937
2020-06-30T19:35:31.663175 Batch 2829/6251 train_loss = 0.809
2020-06-30T19:35:31.881271 Batch 2849/6251 train_loss = 0.876
2020-06-30T19:35:32.095102 Batch 2869/6251 train_loss = 0.690
2020-06-30T19:35:32.311484 Batch 2889/6251 train_loss = 0.792
2020-06-30T19:35:32.525169 Batch 2909/6251 train_loss = 0.937
2020-06-

2020-06-30T19:35:58.343519 Batch 5269/6251 train_loss = 0.829
2020-06-30T19:35:58.547421 Batch 5289/6251 train_loss = 0.906
2020-06-30T19:35:58.758228 Batch 5309/6251 train_loss = 0.856
2020-06-30T19:35:58.964420 Batch 5329/6251 train_loss = 0.760
2020-06-30T19:35:59.170317 Batch 5349/6251 train_loss = 0.883
2020-06-30T19:35:59.376492 Batch 5369/6251 train_loss = 1.097
2020-06-30T19:35:59.586791 Batch 5389/6251 train_loss = 0.881
2020-06-30T19:35:59.794283 Batch 5409/6251 train_loss = 0.681
2020-06-30T19:36:00.000932 Batch 5429/6251 train_loss = 1.155
2020-06-30T19:36:00.211959 Batch 5449/6251 train_loss = 0.937
2020-06-30T19:36:00.419800 Batch 5469/6251 train_loss = 0.690
2020-06-30T19:36:00.632241 Batch 5489/6251 train_loss = 0.955
2020-06-30T19:36:00.845405 Batch 5509/6251 train_loss = 0.826
2020-06-30T19:36:01.065224 Batch 5529/6251 train_loss = 0.928
2020-06-30T19:36:01.302196 Batch 5549/6251 train_loss = 0.697
2020-06-30T19:36:01.706388 Batch 5569/6251 train_loss = 0.945
2020-06-

2020-06-30T19:36:25.338625:Epoch   1 Batch 1469/6251 test_loss = 0.779
2020-06-30T19:36:25.542301:Epoch   1 Batch 1489/6251 test_loss = 0.850
2020-06-30T19:36:25.759678:Epoch   1 Batch 1509/6251 test_loss = 1.253
2020-06-30T19:36:25.985227:Epoch   1 Batch 1529/6251 test_loss = 1.081
2020-06-30T19:36:26.206079:Epoch   1 Batch 1549/6251 test_loss = 0.775
2020-06-30T19:36:27.716714 Batch   18/6251 train_loss = 0.741
2020-06-30T19:36:27.937310 Batch   38/6251 train_loss = 0.923
2020-06-30T19:36:28.160703 Batch   58/6251 train_loss = 0.832
2020-06-30T19:36:28.382159 Batch   78/6251 train_loss = 0.839
2020-06-30T19:36:28.607833 Batch   98/6251 train_loss = 0.927
2020-06-30T19:36:28.829796 Batch  118/6251 train_loss = 0.730
2020-06-30T19:36:29.074057 Batch  138/6251 train_loss = 0.951
2020-06-30T19:36:29.310664 Batch  158/6251 train_loss = 0.782
2020-06-30T19:36:29.535636 Batch  178/6251 train_loss = 0.960
2020-06-30T19:36:29.751256 Batch  198/6251 train_loss = 1.187
2020-06-30T19:36:29.97067

2020-06-30T19:36:56.250268 Batch 2558/6251 train_loss = 1.149
2020-06-30T19:36:56.446985 Batch 2578/6251 train_loss = 1.009
2020-06-30T19:36:56.649641 Batch 2598/6251 train_loss = 0.770
2020-06-30T19:36:56.848393 Batch 2618/6251 train_loss = 0.881
2020-06-30T19:36:57.052058 Batch 2638/6251 train_loss = 0.866
2020-06-30T19:36:57.254183 Batch 2658/6251 train_loss = 0.677
2020-06-30T19:36:57.455474 Batch 2678/6251 train_loss = 0.728
2020-06-30T19:36:57.659716 Batch 2698/6251 train_loss = 0.954
2020-06-30T19:36:57.868991 Batch 2718/6251 train_loss = 0.918
2020-06-30T19:36:58.068454 Batch 2738/6251 train_loss = 0.802
2020-06-30T19:36:58.277993 Batch 2758/6251 train_loss = 0.922
2020-06-30T19:36:58.473920 Batch 2778/6251 train_loss = 0.777
2020-06-30T19:36:58.687526 Batch 2798/6251 train_loss = 0.676
2020-06-30T19:36:58.896500 Batch 2818/6251 train_loss = 0.964
2020-06-30T19:36:59.102888 Batch 2838/6251 train_loss = 0.788
2020-06-30T19:36:59.447673 Batch 2858/6251 train_loss = 0.938
2020-06-

2020-06-30T19:37:26.409422 Batch 5218/6251 train_loss = 0.921
2020-06-30T19:37:26.666380 Batch 5238/6251 train_loss = 0.887
2020-06-30T19:37:26.982262 Batch 5258/6251 train_loss = 0.794
2020-06-30T19:37:27.317709 Batch 5278/6251 train_loss = 0.739
2020-06-30T19:37:27.590267 Batch 5298/6251 train_loss = 0.968
2020-06-30T19:37:27.841709 Batch 5318/6251 train_loss = 0.664
2020-06-30T19:37:28.080470 Batch 5338/6251 train_loss = 0.732
2020-06-30T19:37:28.313199 Batch 5358/6251 train_loss = 0.733
2020-06-30T19:37:28.546895 Batch 5378/6251 train_loss = 0.813
2020-06-30T19:37:28.777138 Batch 5398/6251 train_loss = 0.704
2020-06-30T19:37:28.994083 Batch 5418/6251 train_loss = 0.762
2020-06-30T19:37:29.215961 Batch 5438/6251 train_loss = 0.905
2020-06-30T19:37:29.454113 Batch 5458/6251 train_loss = 1.080
2020-06-30T19:37:29.676806 Batch 5478/6251 train_loss = 0.914
2020-06-30T19:37:29.894620 Batch 5498/6251 train_loss = 1.004
2020-06-30T19:37:30.106172 Batch 5518/6251 train_loss = 0.979
2020-06-

2020-06-30T19:37:54.278383:Epoch   2 Batch 1418/6251 test_loss = 0.850
2020-06-30T19:37:54.518663:Epoch   2 Batch 1438/6251 test_loss = 0.764
2020-06-30T19:37:54.745813:Epoch   2 Batch 1458/6251 test_loss = 0.699
2020-06-30T19:37:54.973226:Epoch   2 Batch 1478/6251 test_loss = 0.950
2020-06-30T19:37:55.188791:Epoch   2 Batch 1498/6251 test_loss = 0.989
2020-06-30T19:37:55.416075:Epoch   2 Batch 1518/6251 test_loss = 0.731
2020-06-30T19:37:55.643421:Epoch   2 Batch 1538/6251 test_loss = 0.608
2020-06-30T19:37:55.865609:Epoch   2 Batch 1558/6251 test_loss = 0.745
2020-06-30T19:37:57.117436 Batch    7/6251 train_loss = 0.855
2020-06-30T19:37:57.349544 Batch   27/6251 train_loss = 0.843
2020-06-30T19:37:57.573377 Batch   47/6251 train_loss = 0.809
2020-06-30T19:37:57.793992 Batch   67/6251 train_loss = 1.001
2020-06-30T19:37:58.008642 Batch   87/6251 train_loss = 0.899
2020-06-30T19:37:58.230160 Batch  107/6251 train_loss = 0.741
2020-06-30T19:37:58.454472 Batch  127/6251 train_loss = 0.59

2020-06-30T19:38:25.277812 Batch 2467/6251 train_loss = 0.919
2020-06-30T19:38:25.508501 Batch 2487/6251 train_loss = 1.015
2020-06-30T19:38:25.823344 Batch 2507/6251 train_loss = 1.055
2020-06-30T19:38:26.057611 Batch 2527/6251 train_loss = 0.823
2020-06-30T19:38:26.285459 Batch 2547/6251 train_loss = 0.683
2020-06-30T19:38:26.520724 Batch 2567/6251 train_loss = 0.674
2020-06-30T19:38:26.831652 Batch 2587/6251 train_loss = 0.737
2020-06-30T19:38:27.150907 Batch 2607/6251 train_loss = 0.646
2020-06-30T19:38:27.448435 Batch 2627/6251 train_loss = 0.740
2020-06-30T19:38:27.802787 Batch 2647/6251 train_loss = 0.668
2020-06-30T19:38:28.140505 Batch 2667/6251 train_loss = 0.756
2020-06-30T19:38:28.453480 Batch 2687/6251 train_loss = 0.831
2020-06-30T19:38:28.754583 Batch 2707/6251 train_loss = 0.723
2020-06-30T19:38:29.008223 Batch 2727/6251 train_loss = 1.090
2020-06-30T19:38:29.249105 Batch 2747/6251 train_loss = 0.796
2020-06-30T19:38:29.504125 Batch 2767/6251 train_loss = 0.933
2020-06-

2020-06-30T19:38:56.700197 Batch 5127/6251 train_loss = 0.822
2020-06-30T19:38:56.931376 Batch 5147/6251 train_loss = 0.736
2020-06-30T19:38:57.151523 Batch 5167/6251 train_loss = 0.903
2020-06-30T19:38:57.375735 Batch 5187/6251 train_loss = 0.905
2020-06-30T19:38:57.600829 Batch 5207/6251 train_loss = 0.943
2020-06-30T19:38:57.820825 Batch 5227/6251 train_loss = 1.021
2020-06-30T19:38:58.040157 Batch 5247/6251 train_loss = 0.713
2020-06-30T19:38:58.260681 Batch 5267/6251 train_loss = 0.593
2020-06-30T19:38:58.489886 Batch 5287/6251 train_loss = 0.856
2020-06-30T19:38:58.729952 Batch 5307/6251 train_loss = 0.772
2020-06-30T19:38:58.957759 Batch 5327/6251 train_loss = 0.982
2020-06-30T19:38:59.178465 Batch 5347/6251 train_loss = 0.768
2020-06-30T19:38:59.417233 Batch 5367/6251 train_loss = 0.784
2020-06-30T19:38:59.639932 Batch 5387/6251 train_loss = 0.721
2020-06-30T19:38:59.854958 Batch 5407/6251 train_loss = 0.733
2020-06-30T19:39:00.077902 Batch 5427/6251 train_loss = 0.825
2020-06-

2020-06-30T19:39:23.275275:Epoch   3 Batch 1327/6251 test_loss = 1.006
2020-06-30T19:39:23.483722:Epoch   3 Batch 1347/6251 test_loss = 0.937
2020-06-30T19:39:23.697050:Epoch   3 Batch 1367/6251 test_loss = 0.874
2020-06-30T19:39:23.902885:Epoch   3 Batch 1387/6251 test_loss = 1.008
2020-06-30T19:39:24.117358:Epoch   3 Batch 1407/6251 test_loss = 0.722
2020-06-30T19:39:24.324807:Epoch   3 Batch 1427/6251 test_loss = 0.767
2020-06-30T19:39:24.537390:Epoch   3 Batch 1447/6251 test_loss = 0.809
2020-06-30T19:39:24.752700:Epoch   3 Batch 1467/6251 test_loss = 0.621
2020-06-30T19:39:24.966938:Epoch   3 Batch 1487/6251 test_loss = 0.886
2020-06-30T19:39:25.178122:Epoch   3 Batch 1507/6251 test_loss = 0.794
2020-06-30T19:39:25.392088:Epoch   3 Batch 1527/6251 test_loss = 0.781
2020-06-30T19:39:25.594720:Epoch   3 Batch 1547/6251 test_loss = 0.969
2020-06-30T19:39:27.107921 Batch   16/6251 train_loss = 0.827
2020-06-30T19:39:27.341998 Batch   36/6251 train_loss = 0.746
2020-06-30T19:39:27.6067

2020-06-30T19:39:56.762395 Batch 2396/6251 train_loss = 0.838
2020-06-30T19:39:56.967845 Batch 2416/6251 train_loss = 0.707
2020-06-30T19:39:57.176768 Batch 2436/6251 train_loss = 0.809
2020-06-30T19:39:57.391218 Batch 2456/6251 train_loss = 0.903
2020-06-30T19:39:57.600738 Batch 2476/6251 train_loss = 1.092
2020-06-30T19:39:57.804201 Batch 2496/6251 train_loss = 0.768
2020-06-30T19:39:58.014632 Batch 2516/6251 train_loss = 0.870
2020-06-30T19:39:58.231362 Batch 2536/6251 train_loss = 0.941
2020-06-30T19:39:58.448789 Batch 2556/6251 train_loss = 0.933
2020-06-30T19:39:58.657605 Batch 2576/6251 train_loss = 0.941
2020-06-30T19:39:58.864577 Batch 2596/6251 train_loss = 0.581
2020-06-30T19:39:59.077417 Batch 2616/6251 train_loss = 0.966
2020-06-30T19:39:59.294150 Batch 2636/6251 train_loss = 0.694
2020-06-30T19:39:59.514249 Batch 2656/6251 train_loss = 0.968
2020-06-30T19:39:59.729643 Batch 2676/6251 train_loss = 0.648
2020-06-30T19:39:59.942234 Batch 2696/6251 train_loss = 0.750
2020-06-

2020-06-30T19:40:26.784913 Batch 5056/6251 train_loss = 0.738
2020-06-30T19:40:26.995834 Batch 5076/6251 train_loss = 0.873
2020-06-30T19:40:27.197194 Batch 5096/6251 train_loss = 1.034
2020-06-30T19:40:27.396936 Batch 5116/6251 train_loss = 0.972
2020-06-30T19:40:27.607515 Batch 5136/6251 train_loss = 0.866
2020-06-30T19:40:27.814726 Batch 5156/6251 train_loss = 0.978
2020-06-30T19:40:28.029633 Batch 5176/6251 train_loss = 0.893
2020-06-30T19:40:28.235258 Batch 5196/6251 train_loss = 0.888
2020-06-30T19:40:28.443087 Batch 5216/6251 train_loss = 0.777
2020-06-30T19:40:28.647466 Batch 5236/6251 train_loss = 0.877
2020-06-30T19:40:28.861737 Batch 5256/6251 train_loss = 0.824
2020-06-30T19:40:29.068774 Batch 5276/6251 train_loss = 0.896
2020-06-30T19:40:29.278773 Batch 5296/6251 train_loss = 0.808
2020-06-30T19:40:29.491987 Batch 5316/6251 train_loss = 0.761
2020-06-30T19:40:29.700197 Batch 5336/6251 train_loss = 0.790
2020-06-30T19:40:29.915587 Batch 5356/6251 train_loss = 0.742
2020-06-

2020-06-30T19:40:53.978605:Epoch   4 Batch 1276/6251 test_loss = 0.737
2020-06-30T19:40:54.186081:Epoch   4 Batch 1296/6251 test_loss = 0.652
2020-06-30T19:40:54.397585:Epoch   4 Batch 1316/6251 test_loss = 0.979
2020-06-30T19:40:54.626574:Epoch   4 Batch 1336/6251 test_loss = 0.861
2020-06-30T19:40:54.876628:Epoch   4 Batch 1356/6251 test_loss = 0.871
2020-06-30T19:40:55.102079:Epoch   4 Batch 1376/6251 test_loss = 0.874
2020-06-30T19:40:55.373832:Epoch   4 Batch 1396/6251 test_loss = 0.680
2020-06-30T19:40:55.647945:Epoch   4 Batch 1416/6251 test_loss = 0.758
2020-06-30T19:40:55.897261:Epoch   4 Batch 1436/6251 test_loss = 0.883
2020-06-30T19:40:56.128426:Epoch   4 Batch 1456/6251 test_loss = 0.844
2020-06-30T19:40:56.351542:Epoch   4 Batch 1476/6251 test_loss = 0.637
2020-06-30T19:40:56.577305:Epoch   4 Batch 1496/6251 test_loss = 0.780
2020-06-30T19:40:56.808322:Epoch   4 Batch 1516/6251 test_loss = 0.969
2020-06-30T19:40:57.029862:Epoch   4 Batch 1536/6251 test_loss = 0.688
2020-0

In [134]:
##获取已经计算的张量
"""
                uid: np.reshape(x.take(0, 1), [batch_size, 1]),
                user_gender: np.reshape(x.take(2, 1), [batch_size, 1]),
                user_age: np.reshape(x.take(3, 1), [batch_size, 1]),
                user_job: np.reshape(x.take(4, 1), [batch_size, 1]),
                movie_id: np.reshape(x.take(1, 1), [batch_size, 1]),
                movie_genres: genres,
                movie_title: titles,
                target: np.reshape(y, [batch_size, 1]),
                learning_rate: lr
"""
def get_tensor(loaded_graph):
    ##特征张量
    uid = loaded_graph.get_tensor_by_name('uid:0')
    user_gender = loaded_graph.get_tensor_by_name('user_gender:0')
    user_age = loaded_graph.get_tensor_by_name('user_age:0')
    user_job = loaded_graph.get_tensor_by_name('user_job:0')
    movie_id = loaded_graph.get_tensor_by_name('movie_id:0')
    movie_genres = loaded_graph.get_tensor_by_name('movie_genres:0')
    movie_title = loaded_graph.get_tensor_by_name('movie_title:0')
    target = loaded_graph.get_tensor_by_name('rating:0')
    learning_rate = loaded_graph.get_tensor_by_name('learning_rate:0')
    
    ##user矩阵*movie矩阵
    inference = loaded_graph.get_tensor_by_name('inference/ExpandDims:0')
    
    ##user和movie组合特征向量
    movie_combine_layer_flat = loaded_graph.get_tensor_by_name('movie_fc/Reshape:0')
    user_combine_layer_flat = loaded_graph.get_tensor_by_name('user_fc/Reshape:0')
    
    return uid, user_gender, user_age, user_job, movie_id, movie_genres, movie_title, target, learning_rate, inference, movie_combine_layer_flat, user_combine_layer_flat

In [223]:
def predict_rating(user_id_val, movie_id_val):
    ###!!!!!!session内的变量名，不可与session外变量名一致
    loaded_graph = tf.Graph()
    with tf.Session(graph=loaded_graph) as sess:
        ##导入之前训练好的模型
        #导入神经网络的计算图结构
        loader = tf.train.import_meta_graph('./DNN_Rec' + '.meta')  ##加载已经持久化的计算图
        loader.restore(sess, './DNN_Rec') ##恢复模型之前的参数以及计算

        uid_, user_gender_, user_age_, user_job_, movie_id_, movie_genres_, movie_title_, target_, learning_rate_, inference_, _, _ = get_tensor(loaded_graph)
        
        genres = np.zeros([1, 18])
        
        genres[0] = data[data['MovieID'] == movie_id_val]['Genres'].values[0]
        
        titles = np.zeros([1, 15])
        
        titles[0] = data[data['MovieID'] == movie_id_val]['Title'].values[0]
        
        gender = np.reshape(data[data['UserID'] == user_id_val]['Gender'].values[0], [1, 1])
        
        age = np.reshape(data[data['UserID'] == user_id_val]['Age'].values[0], [1, 1])
        
        job = np.reshape(data[data['UserID'] == user_id_val]['JobID'].values[0], [1, 1])
        
        movie_id = np.reshape([movie_id_val], [1, 1])
        
        feed_dict = {
            uid_: np.reshape([user_id_val], [1, 1]),
            user_gender_: gender,
            user_age_: age,
            user_job_: job,
            movie_id_: movie_id,
            movie_title_: titles,
            movie_genres_: genres
        }
        
        predict = sess.run([inference_], feed_dict=feed_dict)
        
        return predict
        

In [246]:
###计算user特征矩阵和movie特征矩阵
loaded_graph = tf.Graph()
movie_matrics = []
user_matrics = []
with tf.Session(graph=loaded_graph) as sess:
    ##导入之前训练好的模型
    #导入神经网络的计算图结构
    loader = tf.train.import_meta_graph('./DNN_Rec' + '.meta')  ##加载已经持久化的计算图
    loader.restore(sess, './DNN_Rec') ##恢复模型之前的参数以及计算

    uid_, user_gender_, user_age_, user_job_, movie_id_, movie_genres_, movie_title_, target_, learning_rate_, inference_, movie_combine_layer_flat_, user_combine_layer_flat_ = get_tensor(loaded_graph)


    for movieid in movies['MovieID'].values:
        genres = np.zeros([1, 18])
        titles = np.zeros([1, 15])
        genres[0] = movies.values[movieid2idx[movieid]][2]   ###这里的特征ID处理有问题，作者的处理方式感觉不太科学，暂时先用吧
        titles[0] = movies.values[movieid2idx[movieid]][1]
        movieid = np.reshape([movieid], [1, 1])
        feed_dict = {
            movie_id_: movieid,
            movie_title_: titles,
            movie_genres_: genres
        }
        movie_combine_layer_flat_val = sess.run([movie_combine_layer_flat_], feed_dict=feed_dict)
        movie_matrics.append(movie_combine_layer_flat_val)
    
    for userid in users['UserID'].values:
        gender = np.reshape(data[data['UserID'] == userid]['Gender'].values[0], [1, 1])
        
        age = np.reshape(data[data['UserID'] == userid]['Age'].values[0], [1, 1])
        
        job = np.reshape(data[data['UserID'] == userid]['JobID'].values[0], [1, 1])
                
        feed_dict = {
            uid_: np.reshape([userid], [1, 1]),
            user_gender_: gender,
            user_age_: age,
            user_job_: job,
        }
        user_combine_layer_flat_val = sess.run([user_combine_layer_flat_], feed_dict=feed_dict)
        user_matrics.append(user_combine_layer_flat_val)
    
    movie_matrics = np.array(movie_matrics).reshape([-1, 200])
    user_matrics = np.array(user_matrics).reshape([-1, 200])
    

INFO:tensorflow:Restoring parameters from ./DNN_Rec


In [225]:
print(predict_rating(234, 1401))

INFO:tensorflow:Restoring parameters from ./DNN_Rec
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
[array([[3.6275811]], dtype=float32)]


In [326]:
##通过观看的电影来进行推荐(将接收的movieID获取到该电影的特征向量，通过和电影矩阵进行计算相似度，返回相似度最高的n部电影)
def recommend_by_movie(movie_id_val, top_k=20):
    loaded_graph = tf.Graph()
    with tf.Session(graph=loaded_graph) as sess:
        loader = tf.train.import_meta_graph('./DNN_Rec' + '.meta')
        loader.restore(sess, './DNN_Rec')
        
        ##相似度计算(余弦相似度计算: a*b / sqrt(sum(a ** 2)) + sqrt(sum(b ** 2))
        
        ##分母计算
        norm_movie_matrics = tf.sqrt(tf.reduce_sum(tf.square(movie_matrics), 1, keep_dims=True))
        ##计算a / sqrt(sum(a ** 2))
#         normalizerd_matrics = movie_matrics / norm_movie_matrics
        
        ##相似度计算，用以推荐
        probs_embeddings = (movie_matrics[movieid2idx[movie_id_val]]).reshape([1, 200])
        norm_probs_embeddings = tf.sqrt(tf.reduce_sum(tf.square(probs_embeddings), 1, keep_dims=True))
        normalizerd_matrics = movie_matrics / (norm_movie_matrics * norm_probs_embeddings)
        probs_similarity = tf.matmul(probs_embeddings, tf.transpose(normalizerd_matrics))
        
        sims = (probs_similarity.eval())
        rec = np.squeeze(sims)
        res = np.argsort(rec)[-1:-(top_k + 1):-1]
        print(res)
        result = []
        print("您看的电影是：{}".format(movies_orig[movieid2idx[movie_id_val]]))
        print("以下是给您的推荐：")
        
        for rec_val in res:
#             print(rec_val)
            print(movies_orig[rec_val])
            result.append(movies_orig[rec_val])
        return result


In [327]:
result = recommend_by_movie(356)

INFO:tensorflow:Restoring parameters from ./DNN_Rec
[1717  712 2552 1347  645  648 2497 1348 2148  684 2095 2087 2673 1994
 1961  753  779  810  845 1463]
您看的电影是：[356 'Forrest Gump (1994)' 'Comedy|Romance|War']
以下是给您的推荐：
[1773 'Tokyo Fist (1995)' 'Action|Drama']
[721 'Halfmoon (Paul Bowles - Halbmond) (1995)' 'Drama']
[2621 'Xiu Xiu: The Sent-Down Girl (Tian yu) (1998)' 'Drama|Romance']
[1368 'Forbidden Christ, The (Cristo proibito, Il) (1950)' 'Drama']
[651 'Superweib, Das (1996)' 'Comedy']
[654 'Und keiner weint mir nach (1996)' 'Drama|Romance']
[2566 "Doug's 1st Movie (1999)" "Animation|Children's"]
[1369 "I Can't Sleep (J'ai pas sommeil) (1994)" 'Drama|Thriller']
[2217 'Elstree Calling (1930)' 'Comedy|Musical']
[693 'Under the Domin Tree (Etz Hadomim Tafus) (1994)' 'Drama']
[2164 'Surf Nazis Must Die (1987)' 'Drama']
[2156 'Best Man, The (Il Testimone dello sposo) (1997)' 'Comedy|Drama']
[2742 'M�nage (Tenue de soir�e) (1986)' 'Comedy|Drama']
[2063 'Seventh Heaven (Le Septi�me ciel

In [428]:
import random
def recommend_by_user(user_id_val, top_k=20):
    loaded_graph = tf.Graph()
    with tf.Session(graph=loaded_graph) as sess:
        loader = tf.train.import_meta_graph('./DNN_Rec' + '.meta')
        loader.restore(sess, './DNN_Rec')
        
        ##相似度计算(余弦相似度计算: a*b / sqrt(sum(a ** 2)) + sqrt(sum(b ** 2))
        
        ##分母计算
        norm_user_matrics = tf.sqrt(tf.reduce_sum(tf.square(user_matrics), 1, keep_dims=True))
        ##计算a / sqrt(sum(a ** 2))
#         normalizerd_matrics = movie_matrics / norm_movie_matrics
        
        ##相似度计算，用以推荐
        probs_embeddings = (user_matrics[user_id_val -1]).reshape([1, 200])
        norm_probs_embeddings = tf.sqrt(tf.reduce_sum(tf.square(probs_embeddings), 1, keep_dims=True))
        normalizerd_matrics = user_matrics / (norm_user_matrics * norm_probs_embeddings)
        probs_similarity = tf.matmul(probs_embeddings, tf.transpose(normalizerd_matrics))
        
        sims = (probs_similarity.eval())
        rec = np.squeeze(sims)
        res = np.argsort(rec)[-1:-(top_k + 1):-1]
#         print(res)
        movie_names = []
#         print("您看的电影是：{}".format(movies_orig[movieid2idx[movie_id_val]]))
        print("以下是给您的推荐：")
        results = set()
        
        ##计算共现矩阵
        movie_id_dict = {
            user_id_val: data[data['UserID'] == user_id_val]['MovieID'].values
        }
        for res_user_id in res:
            idx = (data[data['UserID'] == res_user_id]).index
            movie_id_list = []
            for idx_i in idx:
                if data.loc[idx_i]['ratings'] > 3:
                    movie_id_list.append(data.loc[idx_i]['MovieID'])
            movie_id_dict[idx_i] = movie_id_list
        while len(results) != 20:
            for _userid, _movieid in movie_id_dict.items():
                if _userid != user_id_val:
                    c = random.choice(_movieid)
                    if c not in movie_id_dict[user_id_val]:
                        results.add(c)
                        break
#             break
        for rec_val in results:
#             print(rec_val)
            print(movies_orig[rec_val])
            movie_names.append(movies_orig[rec_val])
        return movie_names

In [429]:
###计算与该用户最相似的多个用户，并将这多个用户喜欢的电影推荐给该用户
recommend_by_user(10)

INFO:tensorflow:Restoring parameters from ./DNN_Rec
以下是给您的推荐：
[2761 'Iron Giant, The (1999)' "Animation|Children's"]
[788 'Nutty Professor, The (1996)' 'Comedy|Fantasy|Romance|Sci-Fi']
[17 'Sense and Sensibility (1995)' 'Drama|Romance']
[26 'Othello (1995)' 'Drama']
[810 'Kazaam (1996)' "Children's|Comedy|Fantasy"]
[296 'Pulp Fiction (1994)' 'Crime|Drama']
[3692 "Class of Nuke 'Em High (1986)" 'Comedy|Horror']
[303 'Quick and the Dead, The (1995)' 'Action|Adventure|Western']
[51 'Guardian Angel (1994)' 'Action|Drama|Thriller']
[1498 'Inventing the Abbotts (1997)' 'Drama|Romance']
[3585 'Great Locomotive Chase, The (1956)' 'Adventure|War']
[1105 'Children of the Corn IV: The Gathering (1996)' 'Horror']
[3723 'Hamlet (1990)' 'Drama']
[461 'Go Fish (1994)' 'Drama|Romance']
[1519 'Broken English (1996)' 'Drama']
[1379 'Young Guns II (1990)' 'Action|Comedy|Western']
[3367 "Devil's Brigade, The (1968)" 'War']
[1007 'Apple Dumpling Gang, The (1975)' "Children's|Comedy|Western"]
[1846 'Nil By 

[array([2761, 'Iron Giant, The (1999)', "Animation|Children's"],
       dtype=object),
 array([788, 'Nutty Professor, The (1996)',
        'Comedy|Fantasy|Romance|Sci-Fi'], dtype=object),
 array([17, 'Sense and Sensibility (1995)', 'Drama|Romance'], dtype=object),
 array([26, 'Othello (1995)', 'Drama'], dtype=object),
 array([810, 'Kazaam (1996)', "Children's|Comedy|Fantasy"], dtype=object),
 array([296, 'Pulp Fiction (1994)', 'Crime|Drama'], dtype=object),
 array([3692, "Class of Nuke 'Em High (1986)", 'Comedy|Horror'],
       dtype=object),
 array([303, 'Quick and the Dead, The (1995)', 'Action|Adventure|Western'],
       dtype=object),
 array([51, 'Guardian Angel (1994)', 'Action|Drama|Thriller'], dtype=object),
 array([1498, 'Inventing the Abbotts (1997)', 'Drama|Romance'],
       dtype=object),
 array([3585, 'Great Locomotive Chase, The (1956)', 'Adventure|War'],
       dtype=object),
 array([1105, 'Children of the Corn IV: The Gathering (1996)', 'Horror'],
       dtype=object),
 

In [405]:
idx = (data[data['UserID'] == 4]).index
# print(list(idx))
movie_id_list = []
for idx_i in idx:
    if data.loc[idx_i]['ratings'] > 3:
        movie_id_list.append(data.loc[idx_i]['MovieID'])
# idx

In [417]:
data[data['UserID'] == 10]

Unnamed: 0,UserID,MovieID,ratings,Gender,Age,JobID,Title,Genres
2252,10,914,5,0,3,1,"[4817, 5158, 3012, 2407, 2407, 2407, 2407, 240...","[12, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1..."
2890,10,3408,4,0,3,1,"[2561, 2583, 2407, 2407, 2407, 2407, 2407, 240...","[11, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1..."
4205,10,2355,4,0,3,1,"[2543, 4295, 1252, 2407, 2407, 2407, 2407, 240...","[14, 0, 3, 18, 18, 18, 18, 18, 18, 18, 18, 18,..."
5906,10,1197,5,0,3,1,"[4536, 1618, 4455, 2407, 2407, 2407, 2407, 240...","[16, 13, 3, 17, 18, 18, 18, 18, 18, 18, 18, 18..."
8223,10,1287,3,0,3,1,"[1678, 2407, 2407, 2407, 2407, 2407, 2407, 240...","[16, 13, 11, 18, 18, 18, 18, 18, 18, 18, 18, 1..."
...,...,...,...,...,...,...,...,...
553727,10,2043,5,0,3,1,"[512, 4668, 4089, 1907, 639, 1140, 2407, 2407,...","[13, 0, 6, 18, 18, 18, 18, 18, 18, 18, 18, 18,..."
553885,10,2045,3,0,3,1,"[106, 881, 3258, 1252, 2407, 2407, 2407, 2407,...","[13, 0, 11, 17, 18, 18, 18, 18, 18, 18, 18, 18..."
553940,10,2046,4,0,3,1,"[5193, 350, 1907, 3740, 2407, 2407, 2407, 2407...","[13, 0, 1, 18, 18, 18, 18, 18, 18, 18, 18, 18,..."
554476,10,2047,4,0,3,1,"[5028, 4455, 2407, 2407, 2407, 2407, 2407, 240...","[0, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18..."
