In [1]:
# default_exp packaging

# Simplified conda & pip packaging & deployment

> Package management can be challenging for Data Science workflows. Many enterprises will not be able to publish all their packages to public repositories like pypi or conda. Different stages of the Data Science lifecycle require different capabilities with respect to library management. Early stage research values flexibility and the ability to add the latest packages on the fly. Production models want reliability and known, fixed dependencies. This module aims to make it easier to meet these goals.

# 1. Determine minimal dependencies

We are explicit about the code that makes up the Data Science workflow using the sciflow steps/flows method. This helps extract the virtual environments which are needed to run the flows & allows downstream processes to have a reliable (pinned) version of dependencies that work for a workflow instance.

## 1.1 Dependency Calculation
* [pigar](https://github.com/damnever/pigar): the pigar library is used to calculate the dependencies used of all moduels in the `sciflow` lib.

# 2. Making It Easier

Additions to `Makefile`

```
local_release: art_pip art_conda
art_pip: deploy to private artifactory pypi repository
art_conda: deploy to private artifactory conda channel
```

## 2.1 Conda Artifactory Environment Variables needed

* export ARTIFACTORY_USER=..
* export ARTIFACTORY_PASSWORD=..
* export ARTIFACTORY_URL=ndartifactory.jfrog.io
* export ARTIFACTORY_CONDA_CHANNEL=conda-local

In [92]:
# export

import os
import subprocess
import sys
from pathlib import Path
from typing import List
from configparser import ConfigParser
from nbdev.export import Config
import yaml

In [3]:
%load_ext autoreload
%autoreload 2

In [59]:
# export


def determine_dependencies(generated_pip_file_name: str = "requirements-generated.txt"):
    try:
        pass
    except:
        print("Pigar dependency is not installed - not able to determine dependencies")
        return
    lib_path = Config().path("lib_path")
    project_root = lib_path.resolve().parent
    command = "pigar"
    args = ["-p", generated_pip_file_name, "-P", lib_path]

    output = subprocess.run(
        [sys.executable, "-m", command, *(str(i).strip() for i in args)],
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE,
        cwd=project_root,
        env=None,
        universal_newlines=True,
    )

    output_code = output.returncode

    output.stdout
    err = output.stderr

    if output_code != 0:
        raise EnvironmentError(err)
        
    return reqs_file_to_sep_str(os.path.join(project_root, generated_pip_file_name))

In [60]:
determine_dependencies()

'fastcore==1.3.19 nbdev==1.1.12 nbformat==5.0.8 nbqa==0.5.6 networkx==2.5 pandas==0.25.3'

In [61]:
generated_reqs_path = os.path.join(
    Path(".").resolve().parent, "requirements-generated.txt"
)
if os.path.exists(generated_reqs_path):
    os.remove(generated_reqs_path)
assert not os.path.exists(generated_reqs_path)
determine_dependencies()
assert os.path.exists(generated_reqs_path)
os.remove(generated_reqs_path)

# Requirement.txt Manipulation

> Read pip requirements file and convert to a structure that can be used to transform that output to a different format.

For more information see here:

https://www.python.org/dev/peps/pep-0440/#version-specifiers

In [62]:
test_lines = (
    "fastcore == 1.3.19",
    "\n",
    "#",
    "nbformat >= 5.0.8",
    "# scidev/nb_lint.py: 10,11,12",
    "nbqa ~= 0.5.6",
    "nbqa <=0.5.6",
)

In [63]:
def reqs_lines_to_sep_str(req_lines: List[str], sep: str = " "):
    return " ".join(
        [
            l.replace(" ", "").strip()
            for l in req_lines
            if not l.startswith("#") and len(l.strip()) > 0
        ]
    )

In [64]:
assert (
    "fastcore==1.3.19 nbformat>=5.0.8 nbqa~=0.5.6 "
    "nbqa<=0.5.6" == reqs_lines_to_sep_str(test_lines)
)

In [65]:
def reqs_file_to_sep_str(pip_reqs_path: Path) -> str:
    with open(pip_reqs_path, "r") as pip_reqs_file:
        lines = pip_reqs_file.readlines()
    return reqs_lines_to_sep_str(lines)

In [66]:
determine_dependencies()
reqs_str = reqs_file_to_sep_str(generated_reqs_path)
os.remove(generated_reqs_path)

In [67]:
def update_requirements(output_filename: str = "settings.ini"):
    config = ConfigParser(delimiters=["="])
    settings_path = os.path.join(Path(".").resolve().parent, "settings.ini")
    config.read(settings_path)

    generated_reqs_path = os.path.join(
        Path(".").resolve().parent, "requirements-generated.txt"
    )
    determine_dependencies()
    reqs_str = reqs_file_to_sep_str(generated_reqs_path)
    os.remove(generated_reqs_path)

    out_path = os.path.join(Path(".").resolve().parent, output_filename)
    config.set("DEFAULT", "requirements", reqs_str)

    with open(out_path, "w") as configfile:
        config.write(configfile)

In [69]:
test_outfile = os.path.join(Path(".").resolve().parent, "settings.ini")
update_requirements(test_outfile)

In [70]:
config = ConfigParser(delimiters=["="])
config.read(test_outfile)
assert('nbdev' in config.get("DEFAULT", "requirements"))

In [71]:
required_keys = (
'lib_name',
'description',
'version',
'custom_sidebar',
'license',
'status',
'console_scripts',
'nbs_path',
'lib_path',
'title',
'tst_flags')

In [72]:
assert(all([config.get("DEFAULT", k) is not None for k in required_keys]))

# Create conda build file

In [91]:
print(yaml.dump(meta_data))

package:
  name: sciflow
  version: 0.0.1
requirements:
  host:
  - pip
  - python
  - setuptools
  run:
  - fastcore==1.3.19
  - nbdev==1.1.12
  - nbformat==5.0.8
  - nbqa==0.5.6
  - networkx==2.5
  - pandas==0.25.3
source:
  path: /home/jovyan/git/sciflow



In [93]:
def create_conda_meta_file(out_dir: Path, out_file: str = "meta.yaml"):
    meta_data  = {'package': 
              {
                  'name': Config().get('lib_name'),
                  'version': Config().get('version')
              },
              'source': {'path': str(Config().path('lib_path').resolve().parent)},
              'requirements': {
                  'host': ['pip', 'python', 'setuptools'],
                  'run': determine_dependencies().split(' ')
              },
             }
    with open(os.path.join(out_dir, out_file), 'w') as conda_build_file:
        yaml.dump(meta_data, conda_build_file)

In [94]:
create_conda_meta_file(Path('.'))

# TODO

1. Create meta.yaml from settings.ini
2. Test local_conda_release
3. Test release