In [5]:
from neo4j import GraphDatabase
import pandas as pd
from dotenv import load_dotenv, find_dotenv
import os

# Read Example CSV
- take example csv, turn it into Pandas DF, push the columns, primary key, labels, etc into

In [6]:
load_dotenv(find_dotenv())

True

In [4]:
driver = GraphDatabase.driver(uri = os.getenv('GRAPH_URI'),
                                          auth = (os.getenv('GRAPH_USER'), os.getenv('GRAPH_PASS'))
                                          )
# session = databaseConnection.session()


In [6]:
oil_pipeline_accidents = pd.read_csv('../../archive/demo/oil_pipeline_accidents.csv')

In [18]:
oil_pipeline_accidents.columns.tolist()

['Report Number',
 'Supplemental Number',
 'Accident Year',
 'Accident Date/Time',
 'Operator ID',
 'Operator Name',
 'Pipeline/Facility Name',
 'Pipeline Location',
 'Pipeline Type',
 'Liquid Type',
 'Liquid Subtype',
 'Liquid Name',
 'Accident City',
 'Accident County',
 'Accident State',
 'Accident Latitude',
 'Accident Longitude',
 'Cause Category',
 'Cause Subcategory',
 'Unintentional Release (Barrels)',
 'Intentional Release (Barrels)',
 'Liquid Recovery (Barrels)',
 'Net Loss (Barrels)',
 'Liquid Ignition',
 'Liquid Explosion',
 'Pipeline Shutdown',
 'Shutdown Date/Time',
 'Restart Date/Time',
 'Public Evacuations',
 'Operator Employee Injuries',
 'Operator Contractor Injuries',
 'Emergency Responder Injuries',
 'Other Injuries',
 'Public Injuries',
 'All Injuries',
 'Operator Employee Fatalities',
 'Operator Contractor Fatalities',
 'Emergency Responder Fatalities',
 'Other Fatalities',
 'Public Fatalities',
 'All Fatalities',
 'Property Damage Costs',
 'Lost Commodity Costs',

In [16]:
class Graph:
    
    # CONNECTION
    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    # OPERATIONS
    def add_table_node(self, name, primary_key, columns, labels):
        
        def create_table(tx, name, primary_key, columns, labels):
            query = f"""
            CREATE ({name}:{labels} {{name: "{name}", primaryKey: "{primary_key}", columns: {columns}}});
            """
            return tx.run(
                query
            ).single()
            
        with self._driver.session() as session:
            session.execute_write(create_table, name, primary_key, columns, labels)
            
    def add_basic_node(self, name, description, labels):
        
        def create_node(tx, name, description, labels):
            query = f"""
            CREATE ({name}:{labels} {{name: "{name}", description: "{description}"}});
            """
            return tx.run(
                query
            ).single()
            
        with self._driver.session() as session:
            session.execute_write(create_node, name, description, labels)
    
    def add_pandas_table_node(self, pandas_dataframe, name, primary_key, labels):
        
        def create_node(tx, pandas_dataframe, name, primary_key, labels):
            query = f"""
            CREATE ({name}:{labels} {{name: "{name}", primaryKey: "{primary_key}", columns: {pandas_dataframe.columns.tolist()}}});
            """
            return tx.run(
                query
            )
            
        with self._driver.session() as session:
            session.execute_write(create_node, pandas_dataframe, name, primary_key, labels)
    
    def delete_all(self):
        
        def delete_all(tx):
            query = "MATCH (n) DETACH DELETE n"
            return tx.run(
                query
            )
            
        with self._driver.session() as session:
            session.execute_write(delete_all)
    
    def delete_node(self, name):
        # need to delete relationships first, which is DETACH keyword
        # Node names SHOULD be unique - will add functionality to add query to check for unique names or else it will just delete all nodes with name
        def delete_node_name(tx, name):
            query = f"""
            MATCH (n {{name: "{name}"}}) DETACH DELETE n
            """
            return tx.run(
                query
            )
        
        with self._driver.session() as session:
            session.execute_write(delete_node_name, name)
            
    def add_relationship(self, name_node_one, name_node_two, relation_name):
        
        def add_relation(tx, name_node_one, name_node_two, relation_name):
            query = f"""
            MATCH (a {{name : "{name_node_one}"}}), (b {{name: "{name_node_two}"}})
            CREATE (a) -[:{relation_name}]-> (b)
            """
            return tx.run(
                query
            )
        with self._driver.session() as session:
            session.execute_write(add_relation, name_node_one, name_node_two, relation_name)
            
    def delete_relationship(self, name_node_one, name_node_two, relation_name):
        
        def delete_relation(tx, name_node_one, name_node_two, relation_name):
            query = f"""
            MATCH(a {{name : "{name_node_one}"}})-[r:{relation_name}]->(b {{name: "{name_node_two}"}})
            DELETE r
            """
            return tx.run(
                query
            )
        with self._driver.session() as session:
            session.execute_write(delete_relation, name_node_one, name_node_two, relation_name)
    # Creates 4 relationships between 3 nodes to signify you need to join the join table to get information from both tables accurately
    def add_join_table_relationship(self, name_node_one, name_node_two, name_join_table):
        self.add_relationship(name_node_one, name_node_two, "JOIN_TABLE_NEEDED")
        self.add_relationship(name_node_two, name_node_one, "JOIN_TABLE_NEEDED")
        self.add_relationship(name_node_one, name_join_table, "JOIN")
        self.add_relationship(name_node_two, name_join_table, "JOIN")


In [17]:
g = Graph(os.getenv('GRAPH_URI'), os.getenv('GRAPH_USER') , os.getenv('GRAPH_PASS'))

In [43]:
g.add_table_node("BEN", "Test", ["ben", "noel"], "TABLE:NOEL")

In [21]:
g.add_pandas_table_node(pandas_dataframe=oil_pipeline_accidents, name = "OilExample", labels="TABLE:OIL", primary_key="Report Number")

In [50]:
g.add_relationship("BEN", "JSA", "LOVES")

In [53]:
g.delete_relationship("BEN", "JSA", "LOVES")

In [8]:
g.delete_node("BEN")

In [13]:
g.delete_all()

In [10]:
name = "testTable"
primary_key = "test_table_primary_key"
columns = ['test column 1', 'test column 2']
labels = "TEST:PLAYER:LABELS"
query = f"""
CREATE ({name}:{labels} {{name: "{name}", primaryKey: "{primary_key}", columns: {columns}}});
"""
print(query)


CREATE (testTable:TEST:PLAYER:LABELS {name: "testTable", primaryKey: "test_table_primary_key", columns: ['test column 1', 'test column 2']});



In [21]:
# def create_table_node(tx, name, primary_key, columns, labels):
#     query = f"""
#         CREATE ({name}:{labels} {{name: "{name}", primaryKey: "{primary_key}", columns: {columns}}});
#         """
#     return tx.run(
#         query
#     )
# session.execute_write(create_table_node, "testTable2", "testTable2Key", ["lame", "sucks"], "TABLE:JOIN_TABLE")

<neo4j._sync.work.result.Result at 0x18a789fae48>

In [20]:
session = driver.session()


In [18]:
session.run(query)

<neo4j._sync.work.result.Result at 0x18a789d5788>

In [19]:
session.close()

In [14]:
g.add_table_node(name = "Students",
                 primary_key = "Student ID",
                 columns = ["Student ID", "Last Name", "First Name"],
                 labels = "TABLE")
g.add_table_node(name = "Enrollments",
                 primary_key = ["Student ID", "Class ID"],
                 columns = ["Student ID", "Class ID", "Enrollment ID"],
                 labels = "TABLE:JOIN_TABLE")
g.add_table_node(name = "Classes",
                 primary_key = ["Class ID"],
                 columns = ["Class ID", "Title", "Description"],
                 labels = "TABLE")

In [18]:
g.add_join_table_relationship("Students", "Classes", "Enrollments")

In [11]:
g.add_relationship(name_node_one="Students", name_node_two="Classes", relation_name="JOIN_TABLE_NEEDED")
g.add_relationship(name_node_one="Students", name_node_two="Enrollments", relation_name="JOIN")
g.add_relationship(name_node_one="Classes", name_node_two="Enrollments", relation_name="JOIN")
