In [1]:
import pandas as pd
from neo4j import GraphDatabase

In [2]:
result = pd.read_csv("cleaned_result.csv")
result.head(5)

Unnamed: 0,index,id,name,brand,categories,manufacturer,rating,text,username,age
0,0,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,This product so far has not disappointed. My c...,Adapter,46
1,1,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,great for beginner or experienced person. Boug...,truman,40
2,2,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,Inexpensive tablet for him to use and learn on...,DaveZ,26
3,3,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,4.0,I've had my Fire HD 8 two weeks now and I love...,Shacks,29
4,4,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,I bought this for my grand daughter when she c...,explore42,59


In [3]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [4]:
# make connection to the database
conn = Neo4jConnection(uri = "neo4j://localhost:7687",
                       user = "neo4j",              
                       pwd = "Anni,0710")

In [5]:
#clear database
def clear(conn):
    conn.query('match(n) detach delete n')

In [6]:
# Adds user nodes
def add_users():
    user_df = pd.read_csv("user.csv")
    
    query = '''
    UNWIND $rows AS row
    MERGE (u:User {username: row.username, age: row.age})
    '''
    return conn.query(query, parameters = {'rows': user_df.to_dict('records')})    

In [7]:
# Adds product nodes and the relation: user reviews product to the Neo4j graph.
def add_products():
    product_df = pd.read_csv("product.csv")
    
    query = '''
    UNWIND $rows AS row
    MERGE (p:Product {id: row.id, name: row.name, brand:row.brand, manufacturer:row.manufacturer})
    '''
    return conn.query(query, parameters = {'rows': product_df.to_dict('records')})

In [8]:
# Add relation between user and product: user reviews product.
def add_user_product_relation(dataframe):
    
    query = '''
    UNWIND $rows as row
    MATCH (u: User {username: row.username})
    MATCH (p:Product {id: row.id})
    MERGE (u)-[:Reviews {id:row.index, rating:row.rating, text:row.text}]->(p)
    '''
    return conn.query(query, parameters = {'rows': dataframe.to_dict('records')})

In [9]:
#Adds category nodes to the Neo4j graph.
def add_categories():
    
    buffer_category_lists = []
    unique_category = []
    
    category_lists = list(result["categories"])
        
    buffer_category_lists = [string.split(",") for string in category_lists]
    for i in buffer_category_lists:
        for j in i:
            unique_category.append(j)
    
    # get unique category list
    unique_category = list(set(unique_category))
    
    for category in unique_category:
        c = '"' + category +'"'
        query = ''' 
        create(c:Category {category: 
        ''' + c + '''})'''
        conn.query(query)

In [10]:
def add_product_categories_relation(dataframe):
    for index,row in dataframe.iterrows():
        categories = list(row[4].split(","))
        product_id = "% s" % row[1]
        for c in categories:
            c = '"'+c+'"'
            query = '''
            MATCH (p:Product {id: '''+ product_id+'''})
            MATCH (c:Category {category:''' + c + '''})
            MERGE (p)-[:IN]->(c)
           '''
            conn.query(query)

In [11]:
#construct graph
clear(conn)
# set constrain to nodes so that they won't repeatly added 
conn.query('CREATE CONSTRAINT User IF NOT EXISTS ON (u:User) ASSERT u.username IS UNIQUE')
conn.query('CREATE CONSTRAINT Product IF NOT EXISTS ON (p:Product) ASSERT p.id IS UNIQUE')
conn.query('CREATE CONSTRAINT Category IF NOT EXISTS ON (c:Category) ASSERT c.category IS UNIQUE')

add_products()
print('product finished')
add_categories()
print('category finished')
add_users()
print('user finished')
add_user_product_relation(result)
print('review finished')
add_product_categories_relation(result)
print('in_category relation finished')

product finished
category finished
user finished
review finished
in_category relation finished


In [12]:
len(conn.query('''MATCH(u:User)
return u.username'''))

33237

In [13]:
conn.query('''MATCH(u:User)-[r:Reviews]->(p:Product)
with u,count(distinct p) as cou
where cou>1
return u.username, cou
order by cou desc
limit 10''')[0]

<Record u.username='John' cou=26>

In [14]:
len(conn.query('match (p:Product) -[:IN]->(c:Category) return p,c'))

998

In [41]:
#build content-based filtering query
def cbf_query(username):
    username = '"' + username+ '"'
    query = '''MATCH(u:User{username:'''+ username +'''})-[:Reviews]->(p:Product)-[:IN]->(c:Category)<-[:IN]-(p2:Product)
WHERE NOT EXISTS ((u)-[:Reviews]->(p2))
WITH p, p2, COUNT(distinct c) AS intersection
MATCH (p)-[:IN]->(pc:Category)
WITH p, p2, intersection, COLLECT(pc.category) AS s1
MATCH (p2)-[:IN]->(p2c:Category)
WITH p, p2, s1, intersection, COLLECT(p2c.category) AS s2
WITH p, p2, intersection, s1+[x IN s2 WHERE NOT x IN s1] AS unionSet, s1, s2
RETURN p.id as Userproduct, p2.id as Recommendate, 
s1 as UserProductCategory, s2 as RecommendateProductCategory,
((1.0*intersection)/SIZE(unionSet)) AS jaccard ORDER BY jaccard DESC
limit 10'''
    return query

In [42]:
conn.query(cbf_query('John'))

[<Record Userproduct=26 Recommendate=71 UserProductCategory=['iPad & Tablets', 'Electronics', 'Amazon Tablets', 'Electronics Features', 'Tablets & eBook Readers', 'Computers/Tablets & Networking', 'All Tablets', 'Computers & Tablets', 'Tablets', 'Fire Tablets'] RecommendateProductCategory=['Amazon Tablets', 'All Tablets', 'Tablets', 'Fire Tablets'] jaccard=1.0>,
 <Record Userproduct=28 Recommendate=71 UserProductCategory=['Amazon Tablets', 'Frys', 'Fire Tablets', 'Tablets & eBook Readers', 'Computers/Tablets & Networking', 'All Tablets', 'Tablets', 'Computers & Tablets'] RecommendateProductCategory=['Amazon Tablets', 'All Tablets', 'Tablets', 'Fire Tablets'] jaccard=1.0>,
 <Record Userproduct=26 Recommendate=79 UserProductCategory=['iPad & Tablets', 'Electronics', 'Amazon Tablets', 'Electronics Features', 'Tablets & eBook Readers', 'Computers/Tablets & Networking', 'All Tablets', 'Computers & Tablets', 'Tablets', 'Fire Tablets'] RecommendateProductCategory=['Computers & Tablets', 'Amaz

In [17]:
conn.query('match (n) return n')

[<Record n=<Node id=168290 labels=frozenset({'Product'}) properties={'name': 'All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi, 16 GB - Includes Special Offers, Magenta', 'id': 1, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=168291 labels=frozenset({'Product'}) properties={'name': 'Kindle Oasis E-reader with Leather Charging Cover - Merlot, 6 High-Resolution Display (300 ppi), Wi-Fi - Includes Special Offers,,', 'id': 2, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=168292 labels=frozenset({'Product'}) properties={'name': 'Amazon Kindle Lighted Leather Cover,,,\r\nAmazon Kindle Lighted Leather Cover,,,', 'id': 3, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=168293 labels=frozenset({'Product'}) properties={'name': 'Kindle Keyboard,,,\r\nKindle Keyboard,,,', 'id': 4, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=168294 labels=frozenset({'Product'}) properties={'name': 'All-New Fire HD 8 Tablet, 8 HD Di

In [43]:
conn.close()