In [1]:
# hide
# default_exp utils

# Sciflow utils

In [2]:
# export

import ast
import os
from pathlib import Path

import _ast
import nbformat
import pandas as pd
import pyodbc
from nbdev.export import find_default_export, get_config, read_nb
from nbqa.find_root import find_project_root

In [3]:
%load_ext autoreload
%autoreload 2

# Paths

In [4]:
# export


def lib_path(*lib_relative_path):
    lib_root_path = find_project_root(srcs=(str(Path(".").resolve()),))
    return Path(os.path.join(lib_root_path, *lib_relative_path))

In [5]:
assert str(lib_path("nbs")).endswith("sciflow/nbs")
assert Path("test/test_clustering.ipynb").resolve() == lib_path(
    "nbs", "test", "test_clustering.ipynb"
)

# File

In [6]:
# export


def load_nb(nb_path):
    nb = read_nb(nb_path)
    module_name = find_default_export(nb["cells"]).replace(".", "/")
    module_path = os.path.join(get_config().path("lib_path"), f"{module_name}.py")
    return nb, module_path

In [7]:
nb, module_path = load_nb("test/test_clustering.ipynb")

In [8]:
assert type(nb) == nbformat.notebooknode.NotebookNode
assert os.path.exists(module_path)

In [9]:
# export


def load_nb_module(nb_path):
    nb, module_path = load_nb(nb_path)
    with open(module_path, "r") as module_file:
        lines = module_file.readlines()
    module_code = "\n".join(lines)
    return nb, module_code

In [10]:
nb, module_code = load_nb_module("test/test_clustering.ipynb")

In [11]:
assert type(nb) == nbformat.notebooknode.NotebookNode
assert type(ast.parse(module_code)) == _ast.Module

# ODBC Connection

In [12]:
# export


def prepare_env(env_file_path: str = None):
    if env_file_path is None:
        env_file_path = os.path.expanduser("~/.sciflow/env")
    # TODO create this for user
    if not os.path.exists(env_file_path):
        raise EnvironmentError(
            f"You need to create a Sciflow environment vars file at: {env_file_path}"
        )
    with (open(env_file_path, "r")) as env_file:
        for line in env_file.readlines():
            key, value = line.strip().split("=", 1)
            os.environ[key.replace("export ", "")] = value

In [13]:
# export


def odbc_connect(env_file_path: str = None):
    required_vars = ("ODBC_DRIVER", "ODBC_HOST", "ODBC_PORT", "ODBC_USER", "ODBC_PWD")
    if not all([v in os.environ for v in required_vars]):
        prepare_env(env_file_path)
    connection = pyodbc.connect(
        """Driver={}; 
           ConnectionType=Direct;
           HOST={};
           PORT={};
           AuthenticationType=Plain;
           UID={};
           PWD={};
           SSL=1;
           TrustedCerts={}""".format(
            os.environ["ODBC_DRIVER"],
            os.environ["ODBC_HOST"],
            os.environ["ODBC_PORT"],
            os.environ["ODBC_USER"],
            os.environ["ODBC_PWD"],
            os.environ["SSL_CERTS"],
        ),
        autocommit=True,
    )
    return connection

In [14]:
# export


def query(conn, sql):
    with conn.cursor() as cursor:
        df = pd.read_sql(sql, conn)
    return df

In [15]:
try:
    del os.environ["ODBC_DRIVER"]
    del os.environ["ODBC_HOST"]
    del os.environ["ODBC_PORT"]
    del os.environ["ODBC_USER"]
    del os.environ["ODBC_PWD"]
    del os.environ["SSL_CERTS"]
except KeyError:
    pass

In [16]:
%%time

conn = odbc_connect()

CPU times: user 21.8 ms, sys: 1.06 ms, total: 22.8 ms
Wall time: 556 ms


In [17]:
%%time
assert type(conn) == pyodbc.Connection
assert query(conn, "SELECT 1 AS test_col")["test_col"].iloc[0] == 1

CPU times: user 3.2 ms, sys: 0 ns, total: 3.2 ms
Wall time: 288 ms
