In [1]:
from dotenv import load_dotenv
import os
from langchain_community.graphs import Neo4jGraph
import pandas as pd
import json
from tqdm import tqdm

In [2]:
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('AURA_INSTANCENAME') # don't need this I guess

In [6]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD
)

In [8]:
cypher = """
    MATCH (n)
    RETURN count(n)
"""
kg.query(cypher)

[{'count(n)': 0}]

In [7]:
def delete_graph():
    cypher = """
        MATCH (n)
        DETACH DELETE n
    """
    kg.query(cypher)
delete_graph()

In [None]:
def addCompany(company_name):
    cypher = """
        MATCH (c:Company {name: $company_name})
        RETURN count(c)
    """
    result = kg.query(cypher, {'company_name':company_name})
    if result[0]['count(c)'] != 0:
        return

    cypher = """
        CREATE (c:Company {name: $company_name})
        RETURN c
    """
    kg.query(cypher, {'company_name':company_name})

In [10]:
# demo cell
path = '../TempJSONs/Suppliers/Ningbo Tuopu Group Co., Ltd.(Formerly Ningbo Tuopu Brake System Co., Ltd.).json'
with open(path, 'r') as f:
    data = json.load(f)
# print(data)
supplier_dict = {'name':'Ningbo Tuopu Group Co., Ltd.(Formerly Ningbo Tuopu Brake System Co., Ltd.)'}
metadata_fields = ['top500', 'Year Established', 'Number of Employees', 'Sales Turnover', 'Representative', 'URL', 'Address', 'Country']
for key in data:
    if key in metadata_fields:
        supplier_dict[key.replace(' ', '_')] = data[key]
print(supplier_dict)

cypher = """CREATE (c:Company {"""
for key in supplier_dict:
    cypher += key + ": $" + key + ", "
cypher = cypher[:-2]
cypher += "}) RETURN c"

print(cypher)
# kg.query(cypher, supplier_dict)

{'name': 'Ningbo Tuopu Group Co., Ltd.(Formerly Ningbo Tuopu Brake System Co., Ltd.)', 'top500': True, 'Year_Established': 'Apr. 2004', 'Number_of_Employees': '7,612 employees (as of Dec. 31, 2022)', 'Sales_Turnover': 'CNY 159,92.82 million (FY ended Dec. 31, 2022)', 'Representative': 'Chairman: Jianshu WU', 'URL': 'http://www.tuopu.com', 'Address': 'No.268, Yuwangshan Road, Beilun District, Ningbo City, Zhejiang Province, China', 'Country': 'China'}
CREATE (c:Company {name: $name, top500: $top500, Year_Established: $Year_Established, Number_of_Employees: $Number_of_Employees, Sales_Turnover: $Sales_Turnover, Representative: $Representative, URL: $URL, Address: $Address, Country: $Country}) RETURN c


In [9]:
supplier_jsons_list = os.listdir('../TempJSONs/Suppliers')
metadata_fields = ['top500', 'Year Established', 'Number of Employees', 'Sales Turnover', 'Representative', 'URL', 'Address', 'Country']
for i in tqdm(range(len(supplier_jsons_list[:5]))):
    supplier = supplier_jsons_list[i]
    # create dictiory for executing cyper query
    supplier_dict = {'name':supplier[:-5]}
    
    # read supplier file
    with open(f'../TempJSONs/Suppliers/{supplier}', 'r') as f:
        data = json.load(f)

    # add required fields in dictionary
    for key in data:
        if key in metadata_fields:
            supplier_dict[key.replace(' ', '_')] = data[key]
    
    # construct cypher query
    cypher = """CREATE (c:Company {"""
    for key in supplier_dict:
        cypher += key + ": $" + key + ", "
    cypher = cypher[:-2]
    cypher += "}) RETURN c"

    # execute cypher query
    kg.query(cypher, supplier_dict)

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:00<00:00,  5.08it/s]
