In [1]:
from dotenv import load_dotenv
import os
from langchain_community.graphs import Neo4jGraph
import pandas as pd
import json
from tqdm import tqdm

In [2]:
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('AURA_INSTANCENAME') # don't need this I guess

In [3]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD
)

In [4]:
cypher = """
    MATCH (n)
    RETURN count(n)
"""
kg.query(cypher)

[{'count(n)': 0}]

In [5]:
def delete_graph():
    cypher = """
        MATCH (n)
        DETACH DELETE n
    """
    kg.query(cypher)
delete_graph()

In [38]:
def addCompany(company_name):
    cypher = """
        MATCH (c:Company {name: $company_name})
        RETURN count(c)
    """
    result = kg.query(cypher, {'company_name':company_name})
    if result[0]['count(c)'] != 0:
        return

    cypher = """
        CREATE (c:Company {name: $company_name})
        RETURN c
    """
    kg.query(cypher, {'company_name':company_name})

In [39]:
# # demo cell
# path = '../TempJSONs/Suppliers/Ningbo Tuopu Group Co., Ltd.(Formerly Ningbo Tuopu Brake System Co., Ltd.).json'
# with open(path, 'r') as f:
#     data = json.load(f)
# # print(data)
# supplier_dict = {'name':'Ningbo Tuopu Group Co., Ltd.(Formerly Ningbo Tuopu Brake System Co., Ltd.)'}
# metadata_fields = ['top500', 'Year Established', 'Number of Employees', 'Sales Turnover', 'Representative', 'URL', 'Address', 'Country']
# for key in data:
#     if key in metadata_fields:
#         supplier_dict[key.replace(' ', '_')] = data[key]
# print(supplier_dict)

# cypher = """CREATE (c:Company {"""
# for key in supplier_dict:
#     cypher += key + ": $" + key + ", "
# cypher = cypher[:-2]
# cypher += "}) RETURN c"

# print(cypher)
# # kg.query(cypher, supplier_dict)

In [40]:
def addSupplierCompany(supplier_file):
    metadata_fields = ['top500', 'Year Established', 'Number of Employees', 'Sales Turnover', 'Representative', 'URL', 'Address', 'Country']

    name = supplier_file[:-5]
    # create dictiory for executing cyper query
    supplier_dict = {'name':name}

    # read supplier file
    with open(f'../TempJSONs/Suppliers/{supplier_file}', 'r') as f:
        data = json.load(f)

    # add required fields in dictionary
    for key in data:
        if key in metadata_fields:
            supplier_dict[key.replace(' ', '_')] = data[key]

    # construct cypher query
    cypher = """MERGE (c:Supplier {"""
    for key in supplier_dict:
        cypher += key + ": $" + key + ", "
    cypher = cypher[:-2]
    cypher += "}) RETURN c"

    # execute cypher query
    kg.query(cypher, supplier_dict)

In [41]:
def addSupplierCountry(supplier_file):
    # read supplier file
    with open(f'../TempJSONs/Suppliers/{supplier_file}', 'r') as f:
        data = json.load(f)
    if 'Country' not in data:
        return
    country_name = data['Country']
    
    cypher = """
        MERGE (c:Country {name: $country_name})
        RETURN c
    """
    kg.query(cypher, {'country_name':country_name})
    

In [42]:
def addSupplierCountryRelation(supplier_file):
    # read supplier file
    with open(f'../TempJSONs/Suppliers/{supplier_file}', 'r') as f:
        data = json.load(f)
    if 'Country' not in data:
        return
    country_name = data['Country']
    supplier_name = supplier_file[:-5]

    cypher = """
        MERGE (s:Supplier {name:$supplier_name})
        MERGE (c:Country {name:$country_name})
        MERGE (s)-[r:LOCATED_IN]->(c)
    """
    kg.query(cypher, {'supplier_name':supplier_name, 'country_name':country_name})

In [43]:
def addSupplierPartsRelation(supplier_file):
    # read supplier file
    with open(f'../TempJSONs/Suppliers/{supplier_file}', 'r') as f:
        data = json.load(f)
    if 'parts_sold' not in data:
        return
    supplier_name = supplier_file[:-5]
    parts = list(data['parts_sold'].keys())
    # print(parts)
    # return

    for part in parts:
        cypher = """
            MERGE (s:Supplier {name:$supplier_name})
            MERGE (p:Part {name:$part})
            MERGE (s)-[:HAS_PART]->(p)
        """
        kg.query(cypher, {'supplier_name':supplier_name, 'part':part})

In [44]:
def addSupplierBuyerRelation(supplier_file):
    # read supplier file
    with open(f'../TempJSONs/Suppliers/{supplier_file}', 'r') as f:
        data = json.load(f)
    if 'parts_sold' not in data:
        return
    supplier_name = supplier_file[:-5]
    buyers = list(data['buyers'].keys())
    buyers = [buyer.split(':')[0] for buyer in buyers]

    for buyer in buyers:
        cypher = """
            MERGE (s:Supplier {name:$supplier_name})
            MERGE (b:Buyer {name:$buyer})
            MERGE (s)-[:SELLS_TO]->(b)
        """
        kg.query(cypher, {'supplier_name':supplier_name, 'buyer':buyer})

In [47]:
# create nodes Supplier(Company), Country, Part
# create relationships LOCATED_IN(Company, Country), SELLS_TO(c1, c2), HAS_PART(Company, Part)

supplier_jsons_list = os.listdir('../TempJSONs/Suppliers')
metadata_fields = ['top500', 'Year Established', 'Number of Employees', 'Sales Turnover', 'Representative', 'URL', 'Address', 'Country']
for i in tqdm(range(len(supplier_jsons_list[:10]))):
    supplier_file = supplier_jsons_list[i]
    addSupplierCompany(supplier_file)
    addSupplierCountry(supplier_file)
    addSupplierCountryRelation(supplier_file)
    addSupplierPartsRelation(supplier_file)
    addSupplierBuyerRelation(supplier_file)

100%|██████████| 340/340 [04:52<00:00,  1.16it/s]


In [14]:
def seeCompleteGraph():
    cypher = """
        MATCH (n)-[r]->(m)
        RETURN n, r, m
    """
    kg.query(cypher)
seeCompleteGraph()