In [None]:
import sys
sys.version

In [None]:
! pip install -q python-dotenv
! pip install -q neo4j
! pip install -q langchain

In [None]:
from neo4j import GraphDatabase
import json

In [None]:
! pip install -q langchain-openai

In [None]:
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings

In [None]:
import dotenv
import os

dotenv.load_dotenv()

os.environ["NEO4J_URI"] = os.getenv("URL")
os.environ["NEO4J_USERNAME"]= os.getenv("USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("PASSWORD")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAIKEY")

In [None]:
path =r'..\data\aacsb\processed\SECTIONS_processed_2024-04-04_16-24-04.json'
json_file = json.load(open(path))
print(type(json_file)) #list

In [None]:
for element in json_file:
    for key,val in element.items():
        print(f'{key} :{val}')

        if type(val) is str:
            print(f'{key}, val_length: {len(val)}')

In [None]:
section_query = """
MERGE(s:Section {sectionNum: $sectionsIn.Section_Num})
    ON CREATE SET 
        s.title =  $sectionsIn.Section_Title,
        s.description =  $sectionsIn.Section_Description, 
        s.nodeType = 'SECTION', 
        s.nodeCat = 'AACSB',
        s.source = 'https://www.aacsb.edu/educators/accreditation/business-accreditation/aacsb-business-accreditation-standards',
        s.year = 2020
RETURN s
"""

In [None]:
#
# Open Neo4j driver
#

# driver = GraphDatabase.driver(connectionURL, auth =(username,password))
# print(driver.verify_connectivity())

In [None]:
#
# Close Connetion to Neo4j
#

# driver.close()
# print(driver.verify_connectivity())

In [None]:
#
# Neo4j with LangChain wrapper
# [ERROR RESOLVED w/ APOC PLUGIN ENABLED]
#

graphDB = Neo4jGraph()

In [None]:
section_1 = json_file[0]

In [None]:
print(type(section_1))

for k,v in section_1.items():
    print(k,type(v))


In [None]:
graphDB.query(section_query, 
         params={'sectionsIn':section_1})

In [None]:
#
# No duplicatipe nodes
#

graphDB.query("""
CREATE CONSTRAINT unique_node IF NOT EXISTS 
    FOR (s:Session) REQUIRE s.sectionNum IS UNIQUE
""")


In [None]:
node_count = 0
for section in json_file:
    print(f"Creating `:Section ` node for section ID {section['Section_Num']}")
    graphDB.query(section_query, 
            params={
                'sectionsIn': section
            })
    node_count += 1
print(f"Created {node_count} nodes")

In [None]:
graphDB.query("""
MATCH (n)
RETURN COUNT(n)
""")

In [None]:
#
# STANDARDS NODES and child nodes
# @TODO Link by -- parentStandardNum
#

standards_query = """
MERGE(s:Standard {standardNum: $standardsIn.Standard_num})
    ON CREATE SET 
        s.parentSectionNum = $standardsIn.Section,
        s.title =  $standardsIn.Standard_title,
        s.text =  $standardsIn.Standard_formal, 
        s.nodeType = 'STANDARD', 
        s.nodeCat = 'AACSB',
        s.source = 'https://www.aacsb.edu/educators/accreditation/business-accreditation/aacsb-business-accreditation-standards',
        s.year = 2020,
        s.definitionNodeID = NULL,
        s.basisForJudgementNodeID=NULL,
        s.supportingDocNodeID=NULL
RETURN s
"""

definitions_query = """
MERGE(d:Definitions {parentStandardNum: $standardsIn.Standard_num})
    ON CREATE SET 
        d.text = $standardsIn.Definitions,
        d.standardTitle =  $standardsIn.Standard_title,
        d.nodeType = 'DEFINITIONS', 
        d.nodeCat = 'AACSB'
RETURN d
"""

basis_query = """
MERGE(b:Basis {parentStandardNum: $standardsIn.Standard_num})
    ON CREATE SET 
        b.text = $standardsIn.Basis_for_judgement,
        b.standardTitle =  $standardsIn.Standard_title,
        b.nodeType = 'BASIS', 
        b.nodeCat = 'AACSB'
RETURN b
"""

docs_query = """
MERGE(d:Documentation {parentStandardNum: $standardsIn.Standard_num})
    ON CREATE SET 
        d.text = $standardsIn.Supporting_docs,
        d.standardTitle =  $standardsIn.Standard_title,
        d.nodeType = 'DOCUMENTATION', 
        d.nodeCat = 'AACSB'
RETURN d
"""

In [None]:
#
# @TODO Chunking Function
# @TODO Modofy Loop for standards

node_count = 0
for standard in json_file:
    print(f"Creating `:Standard ` node for section ID {standard['Standard_Num']}")
    graphDB.query(section_query, 
            params={
                'standardsIn': standard
            })
    node_count += 1
print(f"Created {node_count} nodes")
