In [1]:
import os
from neo4j import GraphDatabase
from tqdm import tqdm

from config import NEO4J, DATA_DIR, NEO4J_IMPORT_DIR
from cypher_queries import QUERIES

In [2]:
class Neo4jConnection:
    """
    Class for initiating Neo4j session.
    """

    def __init__(self, uri, user, password, database):
        """
        Init method
        Arguments
        ----------
        uri : str
        user : str
        password: str
        """
        self.driver = GraphDatabase.driver(uri, auth=(user, password), database=database)

    def close(self):
        """
        Closes Driver
        """
        self.driver.close()

    def query(self, query, **kwargs):
        """
        Makes query for initating session
        Arguments
        ----------
        query : query

        Returns
        ----------
        response : list
        """
        with self.driver.session() as session:
            response = list(session.run(query, **kwargs))
            return response

In [3]:
files_list = [
    {"name": "ecs", "path": f"{DATA_DIR}/data/csv/ecs", "files": []},
    {"name": "tax", "path": f"{DATA_DIR}/data/csv/tax", "files": []},
    {"name": "pwy", "path": f"{DATA_DIR}/data/csv/pwy", "files": []},
]

conn = Neo4jConnection(NEO4J["URL"], NEO4J["USER"], NEO4J["PW"], NEO4J["DB"])

queries_store = [
    [QUERIES["master_data"], "file:///master.csv", "Importing Master Data"]
]

In [4]:
import numpy as np
import pandas as pd

In [5]:
master = pd.read_csv("../data/master.csv")
master.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7529 entries, 0 to 7528
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Subject_ID        7529 non-null   int64 
 1   Sample_body_site  7529 non-null   object
 2   Visit_number      7529 non-null   int64 
 3   Visit_ID          7529 non-null   object
 4   Type              7529 non-null   object
 5   urls              7529 non-null   object
 6   Age               7529 non-null   int64 
 7   Sex               7529 non-null   object
 8   Diagnosis         7529 non-null   object
 9   Race              7529 non-null   object
dtypes: int64(3), object(7)
memory usage: 588.3+ KB


In [7]:
query = queries_store[0]
conn.query(query[0], path=query[1])
conn.close()

In [4]:
for item in files_list:
    if os.path.exists(item["path"]):
        item["files"] = os.listdir(item["path"])

        for file in item["files"]:
            queries_store.append(
                [
                    QUERIES[item["name"]],
                    f"file:///{item['name']}/{file}",
                    f"Importing {item['name']} data from {file}",
                ]
            )
    else:
        print(
            f"Unable to locate folder {item['path']}. {item['name']}-data will not be imported."
        )

print("Executing queries:")
with tqdm(total=len(queries_store)) as pbar:
    for query in queries_store:
        pbar.set_description(query[2])
        pbar.refresh()
        conn.query(query[0], path=query[1])

        pbar.update()

conn.close()

NameError: name 'files_list' is not defined