In [1]:
import pandas as pd
from py2neo import Graph
import numpy as np
import itertools

In [2]:
#py2neo连接图数据库
lib_graph = Graph(
    "http://localhost:7474",
    username="neo4j",
    password="neo4j"
)

# 策略1-同作者其他书籍

In [28]:
def get_Rec_Author(ISBN):
    book = ISBN
    df1 = lib_graph.run("match (a:BOOK)<-[r1:write]-(b:Author)-[r2:write]->(c:BOOK) where a.ISBN = {param} and c <> a return c.ISBN, c.TITLE, c.TOTAL_CIRC", param = book).to_data_frame()
    if df1.empty:    # 如果改书作者没有其他的书
        return df1
    df1 = df1.drop_duplicates()
    book_df1 = df1.rename(columns={'c.ISBN':'ISBN', 'c.TITLE':'TITLE', 'c.TOTAL_CIRC':'TOTAL_CIRC'}, index=None)
    book_df1 = book_df1.sort_values(by='TOTAL_CIRC', ascending=False) # 降序排序
    book_df1.index = range(len(book_df1))
    return book_df1

In [30]:
if __name__ == '__main__':
    ISBN = '978-7-5399-3244-6'
    res = get_Rec_Author(ISBN)
    print(res[:10])

            ISBN           TITLE  TOTAL_CIRC
0  7-204-04442-8           永在的温情        1255
1  7-5387-1220-8           苏曼殊作品         706
2  7-5387-0272-5            鲜血梅花         604
3  7-80114-210-1     林语堂散文经典全编.1         587
4  7-80590-208-9            武则天传         328
5  7-5613-2821-4            品味人生         305
6  7-5613-2364-6            女性人生         300
7  7-5600-1421-6     生活的艺术:[英文版]         288
8  7-5404-0297-0  人生的盛宴:林语堂人生随笔集         222
9  7-5613-2537-1             红牡丹         201


# 策略2-目标读者到目标书籍最短路径读者的阅读书目

In [31]:
# 找到所有最短路上的人，给出推荐
def getRec_path_people(name1, book1):
    name = name1
    book = book1
    df1 = lib_graph.run("match (a:Reader), (b:BOOK), p = allShortestPaths((a)-[*]-(b)), (c:Reader)  where a.NAME={param1} and b.ISBN={param2} and (c in nodes(p)) and c <> a return distinct c.CERT_ID, c.NAME, c.TOTAL_LEND_QTY", param1=name, param2=book).to_data_frame()
    df1_list = np.array(df1['c.CERT_ID']).tolist()
    rec_list = pd.DataFrame(columns=['b.ISBN', 'b.TITLE', 'r.LEND_DATE'])
    for item in df1_list:
        data1 = lib_graph.run("match (a:Reader)-[r]->(b:BOOK) where a.CERT_ID={param3} return b.ISBN, b.TITLE, r.LEND_DATE order by r.LEND_DATE desc", param3=item).to_data_frame()
        if data1.empty == False:
            result = rec_list.append(data1)
            rec_list = result
    df2 = rec_list.drop_duplicates()
    person_df2 = df2.rename(columns={'b.ISBN': 'ISBN', 'b.TITLE': 'TITLE', 'r.LEND_DATE': 'LEND_DATE'})
    person_df2 = person_df2.sort_values(by='LEND_DATE', ascending=False) # 降序排序
    person_df2.index = range(len(person_df2))
    return person_df2

In [32]:
if __name__ == '__main__':
    name1 = '谢泽宇'
    book1 = '978-7-302-42328-7'
    res = getRec_path_people(name1, book1)
    print(res)

                                ISBN  \
0                  978-7-5123-9396-7   
1                  978-7-121-30010-3   
2                  978-7-111-55294-9   
3                  978-7-03-045711-0   
4                  978-7-115-40309-4   
5                  978-7-5432-2605-0   
6                  978-7-111-52354-3   
7                  978-7-115-43220-9   
8                  978-7-115-38428-7   
9   978-7-111-19989-2, 7-89482-079-2   
10                 978-7-302-42328-7   
11                 978-7-115-42057-2   
12                 978-7-111-53845-5   
13                 978-7-111-44514-2   
14                 978-7-03-027494-6   
15                     7-302-12505-8   
16                 978-7-115-40609-5   
17                 978-7-5399-6300-6   
18                     7-302-12505-8   
19                 978-7-302-27595-4   
20                 978-7-115-40709-2   
21                 978-7-115-40309-4   
22                 978-7-111-49468-3   
23                 978-7-302-16709-9   


# 策略3-目标书籍的前十名读者的其他共同阅读书目

In [16]:
def getRec_book(book1):
    book = book1
    df_peo = lib_graph.run("match (a:BOOK)<-[r1]-(b:Reader) where a.ISBN = {param1} return distinct b.CERT_ID, b.NAME, b.TOTAL_LEND_QTY order by b.TOTAL_LEND_QTY desc limit 10", param1 = book).to_data_frame()
    people_list = np.array(df_peo['b.CERT_ID']).tolist()
    people_list2 = list(itertools.combinations(people_list, 2)) # 排列组合
    rec_list = pd.DataFrame(columns=['b1.ISBN', 'b1.TITLE', 'b1.TOTAL_CIRC'])
    for item in people_list2:
        param1 = item[0]
        param2 = item[1]
        data1 = lib_graph.run(
            "match p = (a:Reader)-[r1]->(b1:BOOK)<-[r2]-(b:Reader) where b.CERT_ID={people1} and a.CERT_ID={people2} return b1.ISBN, b1.TITLE, b1.TOTAL_CIRC",
            people1=param1, people2=param2).to_data_frame()
        if data1.empty == False:
            result = rec_list.append(data1)
            rec_list = result
    df2 = rec_list.drop_duplicates()
    person_df2 = df2.rename(columns={'b1.ISBN': 'ISBN', 'b1.TITLE': 'TITLE', 'b1.TOTAL_CIRC': 'TOTAL_CIRC'})
    person_df2 = person_df2.sort_values(by='TOTAL_CIRC', ascending=False) # 降序排序
    person_df2.index = range(len(person_df2))
    return person_df2
    # 感觉需要根据类别做筛选

In [18]:
if __name__ == '__main__':
    book1 = '978-7-302-42328-7'
    res = getRec_book(book1)
    print(res)

                 ISBN         TITLE TOTAL_CIRC
0       7-04-006697-1          振动力学        348
1       7-302-02368-9     数据结构:C语言版        261
2       7-5053-9115-1       有限元方法编程        231
3       7-111-07585-4        神经网络设计        216
4   978-7-5641-0873-1    工程塑性力学-修订版         95
5   978-7-03-020082-2          密码传奇         77
6   978-7-111-32653-3    Python学习手册         51
7   978-7-302-27595-4        统计学习方法         39
8   978-7-115-40709-2  Python语言及其应用         22
9   978-7-302-42328-7          机器学习         18
10  978-7-302-43696-6    Django开发宝典         11
