# 推荐书籍
该特简单推荐系统的思路:
输入用户ID:userID
对该用户计算 P = a * b
p:该用户对书籍book可能感兴趣的程度
a:该用户打过x标签的次数
b:书籍book被打过x标签的次数
note:一本书可以被一个用户打多种标签

## 准备数据

In [18]:
from numpy import *
import pandas as pd
import tensorflow as tf

In [19]:
tags_df = pd.read_table(r'D:\dataSet\delicious\tags.txt')
tags_df.head()

Unnamed: 0,id,value
0,1,collection_development
1,2,library
2,3,collection
3,4,development
4,5,lesson_plan


In [20]:
records = pd.read_table(r'D:\dataSet\delicious\user_taggedbookmarks.txt')
records.drop(records.columns[[3,4,5,6,7,8]], 1, inplace=True)
records.head()  #一本书可能被打多个标签

Unnamed: 0,userID,bookmarkID,tagID
0,8,1,1
1,8,2,1
2,8,7,1
3,8,7,6
4,8,7,7


## 构建模型
对于每个标签,找到被打这个标签次数最多的n本书
找到用户最常用的k个标签,推荐对应标签下最热门的书

In [21]:
def structDict(tag_book = {}, user_tag = {}):
    for index, row in records.iterrows():
        tagID = row['tagID'] ; bookmarkID = row['bookmarkID']
        tag_book.setdefault(tagID, {})
        tag_book[tagID].setdefault(bookmarkID, 0)
        tag_book[tagID][bookmarkID] += 1

        userID = row['userID']
        user_tag.setdefault(userID, {})
        user_tag[userID].setdefault(tagID, 0)
        user_tag[userID][tagID] += 1

    return tag_book, user_tag

In [23]:
#p函数时间复杂度太高爆内存,所以采取了recommend函数:给定userID后再计算字典
def p(user_tag_dict, tag_book_dict):    #计算用户对书籍的兴趣p
    user_book_interest = {}
    for user in user_tag_dict.keys():   #遍历用户
        user_book_interest.setdefault(user, {})
        for tag in user_tag_dict[user].keys():   #遍历用户的标签
            for book in tag_book_dict[tag].keys():   #遍历被该标签标记的书
                user_book_interest[user].setdefault(book, 0)
                user_book_interest[user][book] += user_tag_dict[user][tag] \
                * tag_book_dict[tag][book]        
    
    user_book_interest_sorted = {}
    for key in user_book_interest.keys():
        user_book_interest_sorted[key] = sorted(user_book_interest[key].items(), \
                                               key = lambda x:x[1], reverse = True)
    
    return user_book_interest_sorted

def recommend(userID, user_tag_dict, tag_book_dict, recom_num = 3):
    recommend_dict = {}
    targeted_book = records.loc[records['userID'] == userID]['bookmarkID']
    for tag in user_tag_dict[userID].keys():
        for book in tag_book_dict[tag].keys():
            recommend_dict.setdefault(book, 0)
            recommend_dict[book] += user_tag_dict[userID][tag] * \
                                     tag_book_dict[tag][book]
    
    #排序选取recom_num本书
    recommend_dict_sorted = sorted(recommend_dict.items(), key = lambda x:x[1], reverse = True)
    
    #排除该用户已经看过的书:打过标签代表看过
    recommend_dict_sorted = list(filter(lambda x: x not in targeted_book, recommend_dict_sorted))  
    
    if len(recommend_dict_sorted) < recom_num: #长度不够,能推荐多少就推荐多少本书
        return recommend_dict_sorted
    else:
        return recommend_dict_sorted[:recom_num]

def main():
    userID = input('Please input the ID of user: ')
    tag_book, user_tag = structDict()
    if int(userID) not in user_tag.keys():
        print('该用户没有过行为,梁康的简单推荐系统无法为该用户推荐')
        return -1
    
    recom_num = input('Please input the amount of books recommender: ')
    result = recommend(int(userID), user_tag, tag_book, int(recom_num))
    print(result)
    return 0

if __name__ == '__main__':
    main()

Please input the ID of user: 8
Please input the amount of books recommender: 3
[(1526, 360), (1416, 284), (1, 252)]
