In [None]:
# default_exp packaging

# Simplified conda & pip packaging & deployment

> Package management can be challenging for Data Science workflows. Many enterprises will not be able to publish all their packages to public repositories like pypi or conda. Different stages of the Data Science lifecycle require different capabilities with respect to library management. Early stage research values flexibility and the ability to add the latest packages on the fly. Production models want reliability and known, fixed dependencies. This module aims to make it easier to meet these goals.

# 1. Determine minimal dependencies

We are explicit about the code that makes up the Data Science workflow using the sciflow steps/flows method. This helps extract the virtual environments which are needed to run the flows & allows downstream processes to have a reliable (pinned) version of dependencies that work for a workflow instance.

## 1.1 Dependency Calculation
* [pigar](https://github.com/damnever/pigar): the pigar library is used to calculate the dependencies used of all moduels in the `sciflow` lib.

# 2. Making It Easier

Additions to `Makefile`

```
local_release: art_pip art_conda
art_pip: deploy to private artifactory pypi repository
art_conda: deploy to private artifactory conda channel
```

The `sciflow_prepare` command is used to prepare the environment with any auithentication details that are needed for package management.

In [None]:
# export

import os
import subprocess
import sys
from configparser import ConfigParser
from pathlib import Path
from typing import List
from urllib.parse import urlparse

import yaml
from fastcore.script import call_parse
from nbdev.export import Config

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# export


def reqs_file_to_sep_str(pip_reqs_path: Path) -> str:
    with open(pip_reqs_path, "r") as pip_reqs_file:
        lines = pip_reqs_file.readlines()
    return reqs_lines_to_sep_str(lines)

In [None]:
# export


def run_py_module(command, args, cwd, env=None):

    output = subprocess.run(
        [sys.executable, "-m", command, *(str(i).strip() for i in args)],
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE,
        cwd=cwd,
        env=env,
        universal_newlines=True,
    )

    output_code = output.returncode
    output.stdout
    err = output.stderr

    if output_code != 0:
        raise EnvironmentError(err)
    return output_code

In [None]:
# export


def determine_dependencies(
    out_dir: Path = None, generated_pip_file_name: str = "requirements-generated.txt"
):
    try:
        pass
    except:
        print("Pigar dependency is not installed - not able to determine dependencies")
        return
    lib_path = Config().path("lib_path")
    if out_dir is None:
        out_dir = lib_path.resolve().parent

    command = "pigar"
    args = ["-p", generated_pip_file_name, "-P", lib_path]

    run_py_module(command, args, out_dir)

    return reqs_file_to_sep_str(os.path.join(out_dir, generated_pip_file_name))

In [None]:
# export


def reqs_lines_to_sep_str(req_lines: List[str], sep: str = " "):
    return " ".join(
        [
            l.replace(" ", "").strip()
            for l in req_lines
            if not l.startswith("#") and len(l.strip()) > 0
        ]
    )

In [None]:
test_dir = Path("test").resolve()
generated_reqs_path = os.path.join(test_dir, "requirements-generated.txt")
if os.path.exists(generated_reqs_path):
    os.remove(generated_reqs_path)
assert not os.path.exists(generated_reqs_path)
determine_dependencies(out_dir=test_dir)
assert os.path.exists(generated_reqs_path)
os.remove(generated_reqs_path)

# Requirement.txt Manipulation

> Read pip requirements file and convert to a structure that can be used to transform that output to a different format.

For more information see here:

https://www.python.org/dev/peps/pep-0440/#version-specifiers

In [None]:
test_lines = (
    "fastcore == 1.3.19",
    "\n",
    "#",
    "nbformat >= 5.0.8",
    "# scidev/nb_lint.py: 10,11,12",
    "nbqa ~= 0.5.6",
    "nbqa <=0.5.6",
)

In [None]:
assert (
    "fastcore==1.3.19 nbformat>=5.0.8 nbqa~=0.5.6 "
    "nbqa<=0.5.6" == reqs_lines_to_sep_str(test_lines)
)

In [None]:
determine_dependencies(out_dir=test_dir)
reqs_str = reqs_file_to_sep_str(generated_reqs_path)
os.remove(generated_reqs_path)

In [None]:
# export


def update_requirements(
    project_dir: Path = None, output_filename: str = "settings.ini"
):
    if project_dir is None:
        lib_path = Config().path("lib_path")
        project_dir = lib_path.resolve().parent

    config = ConfigParser(delimiters=["="])
    settings_path = os.path.join(project_dir, "settings.ini")
    config.read(settings_path)

    os.path.join(project_dir, "requirements-generated.txt")
    reqs_str = determine_dependencies(out_dir=project_dir)

    out_path = os.path.join(project_dir, output_filename)
    config.set("DEFAULT", "requirements", reqs_str)

    with open(out_path, "w") as configfile:
        config.write(configfile)

In [None]:
determine_dependencies(out_dir=test_dir)
update_requirements(test_dir)

In [None]:
config = ConfigParser(delimiters=["="])
test_config_file = os.path.join(test_dir, "settings.ini")
config.read(test_config_file)
assert "nbdev" in config.get("DEFAULT", "requirements")

In [None]:
required_keys = (
    "lib_name",
    "description",
    "version",
    "custom_sidebar",
    "license",
    "status",
    "console_scripts",
    "nbs_path",
    "lib_path",
    "title",
    "tst_flags",
)

In [None]:
assert all([config.get("DEFAULT", k) is not None for k in required_keys])

# Create conda build file

In [None]:
# export


def create_conda_meta_file(project_dir: Path = None, out_file: str = "meta.yaml"):
    if project_dir is None:
        lib_path = Config().path("lib_path")
        project_dir = lib_path.resolve().parent

    meta_data = {
        "package": {
            "name": Config().get("lib_name"),
            "version": Config().get("version"),
        },
        "source": {"path": str(Config().path("lib_path").resolve().parent)},
        "requirements": {
            "host": ["pip", "python", "setuptools"],
            "run": determine_dependencies(out_dir=project_dir).split(" "),
        },
    }
    with open(os.path.join(project_dir, out_file), "w") as conda_build_file:
        yaml.dump(meta_data, conda_build_file)

In [None]:
create_conda_meta_file(Path("test"))

# Update All Project Requirements

In [None]:
# export


@call_parse
def sciflow_update_reqs():
    create_conda_meta_file()
    update_requirements()
    print("Updated library requirements for conda & nbdev")

In [None]:
sciflow_update_reqs()

Updated library requirements for conda & nbdev


# Prepare Artifactory Environment

> This code should be in projects not here.

In [None]:
# export


def delete_multiple_element(list_object, indices):
    indices = sorted(indices, reverse=True)
    for idx in indices:
        if idx < len(list_object):
            list_object.pop(idx)

In [None]:
# export


def read_deploy_vars():
    with open(os.path.join(Path.home(), ".condarc"), "r") as conda_rc_file:
        conda_rc = yaml.load(conda_rc_file, Loader=yaml.FullLoader)
        conda_url = conda_rc["channels"][0]
    deployment = {
        "conda_url": conda_url,
        "artifactory_user": urlparse(conda_url).netloc.split(":")[0],
        "artifactory_token": urlparse(conda_url).netloc.split(":")[1].split("@")[0],
        "artifactory_url": urlparse(conda_url).netloc.split(":")[1].split("@")[1],
        "artifactory_conda_channel": "conda-local",
        "lib_name": Config().lib_name,
        "version": Config().version,
        "build_number": 0,
    }
    return deployment

In [None]:
deploy_vars = read_deploy_vars()
assert deploy_vars["lib_name"] == "sciflow"

In [None]:
# export


def write_art_conda_envs_to_file():
    dep_vars = read_deploy_vars()

    with open(os.path.join(Path.home(), ".profile"), "r") as profile_file:
        existing_lines = profile_file.readlines()
        to_remove = []
        for i, line in enumerate(existing_lines):
            if (
                line.strip().startswith("export ARTIFACTORY_")
                or line.strip().startswith("export LIB_NAME")
                or line.strip().startswith("export VERSION")
                or line.strip().startswith("export BUILD_NUMBER")
            ):
                to_remove.append(i)
            if not line.endswith("\n"):
                existing_lines[i] = line + "\n"
        delete_multiple_element(existing_lines, to_remove)

    with open(os.path.join(Path.home(), ".profile"), "w") as profile_file:
        new_lines = [
            "export ARTIFACTORY_USER={artifactory_user}\n".format(**dep_vars),
            "export ARTIFACTORY_PASSWORD={artifactory_token}\n".format(**dep_vars),
            "export ARTIFACTORY_URL={artifactory_url}\n".format(**dep_vars),
            "export ARTIFACTORY_CONDA_CHANNEL={artifactory_conda_channel}\n".format(
                **dep_vars
            ),
            "export LIB_NAME={lib_name}\n".format(**dep_vars),
            "export VERSION={version}\n".format(**dep_vars),
            "export BUILD_NUMBER={build_number}\n".format(**dep_vars),
        ]
        existing_lines.extend(new_lines)
        profile_file.writelines(existing_lines)
    return existing_lines

In [None]:
lines = write_art_conda_envs_to_file()
lines

['eval "$(conda shell.bash hook)"\n',
 'source .bashrc 2>/dev/null\n',
 'export ARTIFACTORY_USER=nexus_eut_svc\n',
 'export ARTIFACTORY_PASSWORD=AKCp8hzCxcyh1Uh9s4LMNZKmEHY1V4jwUhfqRsEUvdvMNZKhyncwt8MTtzwaq2rZkhx6dxW3e\n',
 'export ARTIFACTORY_URL=ndartifactory.jfrog.io\n',
 'export ARTIFACTORY_CONDA_CHANNEL=conda-local\n',
 'export LIB_NAME=sciflow\n',
 'export VERSION=0.0.1\n',
 'export BUILD_NUMBER=0\n']

In [None]:
assert len(lines) >= 7

In [None]:
# export


def correct_pypirc():
    with open(os.path.join(Path.home(), ".pypirc"), "r") as pypirc_file:
        lines = pypirc_file.readlines()

    for i, line in enumerate(lines):
        if line.startswith("repository:"):
            repository_idx = i

    lines[
        repository_idx
    ] = "repository: https://ndartifactory.jfrog.io/artifactory/api/pypi/pypi\n"

    with open(os.path.join(Path.home(), ".pypirc"), "w") as pypirc_file:
        pypirc_file.writelines(lines)

In [None]:
correct_pypirc()

In [None]:
# export


@call_parse
def sciflow_prepare():
    dep_vars = read_deploy_vars()
    correct_pypirc()

    for dep_key in dep_vars.keys():
        os.environ[dep_key.upper()] = str(dep_vars[dep_key])

In [None]:
if "ARTIFACTORY_CONDA_CHANNEL" in os.environ:
    del os.environ["ARTIFACTORY_CONDA_CHANNEL"]

In [None]:
sciflow_prepare()

In [None]:
assert "conda-local" == os.environ["ARTIFACTORY_CONDA_CHANNEL"]