In [1]:
import pandas as pd
from neo4j import GraphDatabase

In [2]:
result = pd.read_csv("cleaned_result.csv")
result.head(5)

Unnamed: 0,index,id,name,brand,categories,manufacturer,rating,text,username,age
0,0,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,This product so far has not disappointed. My c...,Adapter,45
1,1,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,great for beginner or experienced person. Boug...,truman,44
2,2,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,Inexpensive tablet for him to use and learn on...,DaveZ,50
3,3,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,4.0,I've had my Fire HD 8 two weeks now and I love...,Shacks,47
4,4,1,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...",Amazon,5.0,I bought this for my grand daughter when she c...,explore42,44


In [3]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [4]:
# make connection to the database
conn = Neo4jConnection(uri = "neo4j://localhost:7687", 
                       user = "neo4j",              
                       pwd = "Hjm123456")

In [5]:
#clear database
def clear(conn):
    conn.query('match(n) detach delete n')

In [6]:
# Adds user nodes
def add_users():
    user_df = pd.read_csv("user.csv")
    
    query = '''
    UNWIND $rows AS row
    MERGE (u:User {username: row.username, age: row.age})
    '''
    return conn.query(query, parameters = {'rows': user_df.to_dict('records')})    

In [7]:
# Adds product nodes and the relation: user reviews product to the Neo4j graph.
def add_products():
    product_df = pd.read_csv("product.csv")
    
    query = '''
    UNWIND $rows AS row
    MERGE (p:Product {id: row.id, name: row.name, brand:row.brand, manufacturer:row.manufacturer})
    '''
    return conn.query(query, parameters = {'rows': product_df.to_dict('records')})

In [8]:
# Add relation between user and product: user reviews product.
def add_user_product_relation(dataframe):
    
    query = '''
    UNWIND $rows as row
    MATCH (u: User {username: row.username, age:row.age})
    MATCH (p:Product {id: row.id})
    MERGE (u)-[:Reviews {id:row.index, rating:row.rating, text:row.text}]->(p)
    '''
    return conn.query(query, parameters = {'rows': dataframe.to_dict('records')})

In [9]:
#Adds category nodes to the Neo4j graph.
def add_categories():
    
    buffer_category_lists = []
    unique_category = []
    
    category_lists = list(result["categories"])
        
    buffer_category_lists = [string.split(",") for string in category_lists]
    for i in buffer_category_lists:
        for j in i:
            unique_category.append(j)
    
    # get unique category list
    unique_category = list(set(unique_category))
    
    for category in unique_category:
        c = '"' + category +'"'
        query = ''' 
        create(c:Category {category: 
        ''' + c + '''})'''
        conn.query(query)

In [10]:
def add_product_categories_relation(dataframe):
    query = '''
    UNWIND $rows as row
    UNWIND row.categories AS category_name
    MATCH (p:Product {id: row.id})
    MATCH (c:Category {category: category_name})
    MERGE (p)-[:IN_CATEGORY]->(c)
   '''
    return conn.query(query, parameters = {'rows':dataframe.to_dict('records')})

In [11]:
#construct graph
clear(conn)
add_users()
add_products()
add_categories()
add_user_product_relation(result.head())
add_product_categories_relation(result.head())
# add_products(result.head(300))
# add_users(result.head(300))

Query failed: {code: Neo.ClientError.Schema.ConstraintValidationFailed} {message: Node(123816) already exists with label `User` and property `username` = 'johnbgood'}


[]

In [12]:
conn.query('match (n) return n')

[<Record n=<Node id=123818 labels=frozenset({'Product'}) properties={'name': 'All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi, 16 GB - Includes Special Offers, Magenta', 'id': 1, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=123819 labels=frozenset({'Product'}) properties={'name': 'Kindle Oasis E-reader with Leather Charging Cover - Merlot, 6 High-Resolution Display (300 ppi), Wi-Fi - Includes Special Offers,,', 'id': 2, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=123820 labels=frozenset({'Product'}) properties={'name': 'Amazon Kindle Lighted Leather Cover,,,\r\nAmazon Kindle Lighted Leather Cover,,,', 'id': 3, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=123821 labels=frozenset({'Product'}) properties={'name': 'Kindle Keyboard,,,\r\nKindle Keyboard,,,', 'id': 4, 'brand': 'Amazon', 'manufacturer': 'Amazon'}>>,
 <Record n=<Node id=123822 labels=frozenset({'Product'}) properties={'name': 'All-New Fire HD 8 Tablet, 8 HD Di

In [13]:
conn.close()