In [None]:
import neo4j
import csv
import pandas as pd

driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "1234")) 
driver.verify_connectivity()

In [None]:
def load_log(localFile):
    datasetList = []
    headerCSV = []
    i = 0
    with open(localFile) as f:
        reader = csv.reader(f)
        for row in reader:
            if (i==0):
                headerCSV = list(row)
                i +=1
            else:
                datasetList.append(row)
    log = pd.DataFrame(datasetList,columns=headerCSV)
    
    return headerCSV, log

def create_event_query(logHeader, filename, LogID = ""):
    query = f'LOAD CSV WITH HEADERS FROM \"file://{filename}\" as line'
    event_line = ""
    for col in logHeader:
        if col in ['timestamp']:
            column = f'datetime(line.{col})'
        else:
            column = 'line.'+col
        newLine = ''
        if (logHeader.index(col) == 0):
            newLine = f' CREATE (e:Event {{ {col}: {column},'
        else:
            newLine = f' {col}: {column},'
        if (logHeader.index(col) == len(logHeader)-1):
            newLine = f' {col}: {column} }})'
        event_line = event_line + newLine
    braces = ['{', '}']
    query = f"""
        {query} 
        CALL {braces[0]}
        WITH line
        {event_line}
        {braces[1]} IN TRANSACTIONS
        """
    return query

def create_application_query(logHeader, filename, LogID = ""):
    query = f'LOAD CSV WITH HEADERS FROM \"file://{filename}\" as line'
    application_line = ""
    for col in logHeader:
        if col in ['timestamp']:
            column = f'datetime(line.{col})'
        else:
            column = 'line.'+col
        newLine = ''
        if (logHeader.index(col) == 0):
            newLine = f' CREATE (a:Application {{ {col}: {column},'
        else:
            newLine = f' {col}: {column},'
        if (logHeader.index(col) == len(logHeader)-1):
            newLine = f' {col}: {column} }})'
        application_line = application_line + newLine
    braces = ['{', '}']
    query = f"""
        {query} 
        CALL {braces[0]}
        WITH line
        {application_line}
        {braces[1]} IN TRANSACTIONS
        """
    return query

def clear_relations(driver):
        braces = ['{', '}']
        delete_relation_query = f"""
            MATCH ()-[r]-()
            CALL {braces[0]}
            WITH r
            DELETE r
            {braces[1]} IN TRANSACTIONS
            """
        run_query(driver, delete_relation_query)

def clear_nodes(driver):
    braces = ['{', '}']
    delete_node_query = f"""
        MATCH (n)
        CALL {braces[0]}
        WITH n
        DELETE n
        {braces[1]} IN TRANSACTIONS
        """
    run_query(driver, delete_node_query)

def clear_db(driver):
    clear_relations(driver)
    clear_nodes(driver)
    
def run_query(driver, query):
    with driver.session() as session:
        # result = session.run(query)
        result = session.run(query)
        if result != None: 
            return result.value()
        else:
            return None


In [None]:
# optionally first clear the DB
# clear_db(driver)

In [None]:
%%time

header, csvlog = load_log('./prepared_data/full_events.csv')
q_events = create_event_query(header, 'full_events.csv', '')
run_query(driver, q_events)

In [None]:
%%time

header, csvlog = load_log('./prepared_data/full_traces.csv')
q_traces = create_application_query(header, 'full_traces.csv', '')
run_query(driver, q_traces)