In [None]:
#hide
#default_exp export

# nbdev.export
- Exporting a notebook to a library

In [None]:
#export
from nbdev.read import *

from nbdev.imports import *
from fastcore.script import *
from fastcore.imports import *

from collections import defaultdict
from pprint import pformat
import ast,contextlib

In [None]:
from fastcore.test import *
from pdb import set_trace
from importlib import reload

## NotebookProcessor -

Special comments at the start of a cell can be used to provide information to `nbdev` about how to process a cell, so we need to be able to find the location of these comments.

In [None]:
minimal = read_nb('../tests/minimal.ipynb')

In [None]:
#export
def extract_comments(ss):
    "Take leading comments from lines of code in `ss`, remove `#`, and split"
    ss = ss.splitlines()
    first_code = first(i for i,o in enumerate(ss) if not o.strip() or re.match('\s*[^#\s]', o)) or 0
    return L((s.strip()[1:]).strip().split() for s in ss[:first_code]).filter()

nbdev comments start with `#`, followed by whitespace delimited tokens, which `extract_comments` extracts from the start of a cell, up until a blank line or a line containing something other than comments:

In [None]:
exp  = "#export module\n# hide\n1+2\n#foo\n#bar"
test_eq(extract_comments(exp), [['export', 'module'],['hide']])

In [None]:
#export
class NotebookProcessor:
    "Base class for nbdev notebook processors"
    def __init__(self, path, debug=False): self.nb,self.path,self.debug = read_nb(path),Path(path),debug

Subclass `NotebookProcessor` to add methods to act on nbdev comments. The method names are of the form `cmd_type`, where "`cmd`" is the first word of the nbdev comment, and `type` is the `cell_type` of the cell (normally "`code`). The methods must take at least `comment` and `code` as params, plus extra params for any additional words included in a comment. Here's an example that prints any word following a "print me" comment:

In [None]:
class _PrintExample(NotebookProcessor):
    def printme_code(self, comment, code, to_print): print(to_print)

We can create a processor by passing it a notebook:

In [None]:
everything_fn = '../tests/01_everything.ipynb'
proc = _PrintExample(everything_fn)

The basic functionality of a notebook processor is to read and act on nbdev comments.

In [None]:
#export
@patch
def process_comment(self:NotebookProcessor, comment, cell):
    cmd,*args = comment
    cmd = f"{cmd}_{cell.cell_type}"
    if self.debug: print(cmd, args)
    if not hasattr(self, cmd): return
    try: getattr(self,cmd)(comment,cell, *args)
    except TypeError: pass

Behind the scenes, `process_comment`  is used to call subclass methods. You can subclass this to change the behavior of a processor.

In [None]:
proc.process_comment(["printme","hello"], SimpleNamespace(cell_type="code"))

hello


In [None]:
#export
@patch
def process_cell(self:NotebookProcessor, cell):
    comments = extract_comments(cell.source)
    if not comments: return self.no_cmd(cell)
    for comment in comments: self.process_comment(comment, cell)
    return cell

@patch
def no_cmd(self:NotebookProcessor, cell): return cell

Subclass `process_cell` to change how `process_comment` is called. By default, it calls `self.no_cmd` for any cells without comments. The return value of `process_cell` is used to replace the cell in the notebook.

In [None]:
def _make_code_cell(code, idx=0): return AttrDict(source=code, cell_type="code")
def _make_code_cells(*ss): return dict2nb({'cells':L(ss).map(_make_code_cell)}).cells

proc.process_cell(_make_code_cell("#printme hello"));

In [None]:
#export
@patch
def process(self:NotebookProcessor):
    "Process all cells with `process_cell` and replace `self.nb.cells` with result"
    for i in range_of(self.nb.cells): self.nb.cells[i] = self.process_cell(self.nb.cells[i])

In [None]:
proc.process()

testing


`NotebookProcessor.process` doesn't change a notebook or act on any comments, unless you subclass it.

In [None]:
everything = read_nb(everything_fn)
proc = NotebookProcessor(everything_fn)
proc.process()
for a_,b_ in zip(everything.cells, proc.nb.cells): test_eq(str(a_),str(b_))

## Variable helpers

These functions let us find and modify the definitions of variables in python modules.

In [None]:
#export
def find_var(lines, varname):
    "Find the line numbers where `varname` is defined in `lines`"
    start = first(i for i,o in enumerate(lines) if o.startswith(varname))
    if start is None: return None,None
    empty = ' ','\t'
    if start==len(lines)-1 or lines[start+1][:1] not in empty: return start,start+1
    end = first(i for i,o in enumerate(lines[start+1:]) if o[:1] not in empty)
    return start,len(lines) if end is None else (end+start+1)

In [None]:
t = 'a_=(1,\n  2,\n  3)\n\nb_=3'
test_eq(find_var(t.splitlines(), 'a_'), (0,3))
test_eq(find_var(t.splitlines(), 'b_'), (4,5))

In [None]:
#export
def read_var(code, varname):
    "Eval and return the value of `varname` defined in `code`"
    lines = code.splitlines()
    start,end = find_var(lines, varname)
    if start is None: return None
    res = [lines[start].split('=')[-1].strip()]
    res += lines[start+1:end]
    try: return eval('\n'.join(res))
    except SyntaxError: raise Exception('\n'.join(res)) from None

In [None]:
test_eq(read_var(t, 'a_'), (1,2,3))
test_eq(read_var(t, 'b_'), 3)

In [None]:
#export
def update_var(varname, func, fn=None, code=None):
    "Update the definition of `varname` in file `fn`, by calling `func` with the current definition"
    if fn:
        fn = Path(fn)
        code = fn.read_text()
    lines = code.splitlines()
    v = read_var(code, varname)
    res = func(v)
    start,end = find_var(lines, varname)
    del(lines[start:end])
    lines.insert(start, f"{varname} = {res}")
    code = '\n'.join(lines)
    if fn: fn.write_text(code)
    else: return code

In [None]:
exec(t)
test_eq((a_,b_), ((1,2,3),3))
t2 = update_var('a_', lambda o:0, code=t)
exec(t2)
test_eq((a_,b_), (0,3))
t3 = update_var('b_', lambda o:0, code=t)
exec(t3)
test_eq((a_,b_), ((1,2,3),0))

## ModuleMaker -

In [None]:
#export
class ModuleMaker:
    "Helper class to create exported library from notebook source cells"
    def __init__(self, dest, name, nb_path, is_new=True):
        dest,nb_path = Path(dest),Path(nb_path)
        store_attr()
        self.fname = dest/(name.replace('.','/') + ".py")
        if is_new: dest.mkdir(parents=True, exist_ok=True)
        else: assert self.fname.exists(), f"{self.fname} does not exist"
        self.dest2nb = nb_path.relpath(dest)
#         relp = nb_path.relpath(get_config().path('lib_path'))
        self.hdr = f"# %% {self.dest2nb}"
        print(self.hdr)

In [None]:
get_config().path('lib_path')

Path('/home/jhoward/git/nbdev/nbdev')

In order to export a notebook, we need an way to create a Python file. `ModuleMaker` fills that role. Pass in the directory where you want to module created, the name of the module, the path of the notebook source, and set `is_new` to `True` if this is a new file being created (rather than an existing file being added to). The location of the saved module will be in `fname`.

In [None]:
mm = ModuleMaker(dest='tmp', name='test.testing', nb_path=Path.cwd()/'01_export.ipynb', is_new=True)
mm.fname

# %% ../01_export.ipynb


Path('tmp/test/testing.py')

In [None]:
#export
_def_types = ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef
_assign_types = ast.AnnAssign, ast.Assign, ast.AugAssign

def _val_or_id(it): return [getattr(o, 'value', getattr(o, 'id', None)) for o in it.value.elts]
def _all_targets(a): return L(getattr(a,'elts',a))
def _wants(o): return isinstance(o,_def_types) and not any(L(o.decorator_list).filter(Self.id.startswith('patch')))

In [None]:
#export
def retr_exports(trees):
    # include anything mentioned in "_all_", even if otherwise private
    # NB: "_all_" can include strings (names), or symbols, so we look for "id" or "value"
    assigns = trees.filter(risinstance(_assign_types))
    all_assigns = assigns.filter(lambda o: getattr(o.targets[0],'id',None)=='_all_')
    all_vals = all_assigns.map(_val_or_id).concat()
    syms = trees.filter(_wants).attrgot('name')

    # assignment targets (NB: can be multiple, e.g. "a=b=c", and/or destructuring e.g "a,b=(1,2)")
    assign_targs = L(L(assn.targets).map(_all_targets).concat() for assn in assigns).concat()
    exports = (assign_targs.attrgot('id')+syms).filter(lambda o: o and o[0]!='_')
    return (exports+all_vals).unique()

In [None]:
#export
@patch
def make_all(self:ModuleMaker, cells):
    "Create `__all__` with all exports in `cells`"
    if cells is None: return ''
    parsed = cells.attrgot('parsed').concat()
    return retr_exports(parsed)

We want to add an `__all__` to the top of the exported module. This methods autogenerates it from all code in `cells`.

In [None]:
nb = _make_code_cells("from __future__ import print_function", "def a():...", "def b():...",
                      "c=d=1", "_f=1", "_g=1", "_all_=['_g']", "@patch\ndef h(self:ca):...")
test_eq(set(mm.make_all(nb)), set(['a','b','c','d', '_g']))

In [None]:
#export
def relative_import(name, fname, level=0):
    "Convert a module `name` to a name relative to `fname`"
    assert not level
    sname = name.replace('.','/')
    if not(os.path.commonpath([sname,fname])): return name
    rel = os.path.relpath(sname, fname)
    if rel==".": return "."
    res = rel.replace(f"..{os.path.sep}", ".")
    return "." + res.replace(os.path.sep, ".")

In [None]:
test_eq(relative_import('nbdev.core', "xyz"), 'nbdev.core')
test_eq(relative_import('nbdev.core', 'nbdev'), '.core')
_p = Path('fastai')
test_eq(relative_import('fastai.core', _p/'vision'), '..core')
test_eq(relative_import('fastai.core', _p/'vision/transform'), '...core')
test_eq(relative_import('fastai.vision.transform', _p/'vision'), '.transform')
test_eq(relative_import('fastai.notebook.core', _p/'data'), '..notebook.core')
test_eq(relative_import('fastai.vision', _p/'vision'), '.')

In [None]:
#export
def absolute_import(name, fname, level):
    "Unwarps a relative import in `name` according to `mod_name`"
    if not level: return name
    mods = fname.split(os.path.sep)
    if not name: return '.'.join(mods)
    return '.'.join(mods[:len(mods)-level+1]) + f".{name}"

In [None]:
test_eq(absolute_import('xyz', 'nbdev', 0), 'xyz')
test_eq(absolute_import('', 'nbdev', 1), 'nbdev')
test_eq(absolute_import('core', 'nbdev', 1), 'nbdev.core')
test_eq(absolute_import('core', 'nbdev/vision', 2), 'nbdev.core')
test_eq(absolute_import('transform', 'nbdev/vision', 1), 'nbdev.vision.transform')
test_eq(absolute_import('notebook.core', 'nbdev/data', 2), 'nbdev.notebook.core')

In [None]:
#export
def update_import(source, tree, libname, f):
    imps = L(tree).filter(risinstance(ast.ImportFrom))
    if not imps: return
    src = source.splitlines(True)
    for imp in imps:
        nmod = f(imp.module, libname, imp.level)
        lin = imp.lineno-1
        sec = src[lin][imp.col_offset:imp.end_col_offset]
        newsec = re.sub(f"(from +){'.'*imp.level}{imp.module}", fr"\1{nmod}", sec)
        src[lin] = src[lin].replace(sec,newsec)
    return src

@patch
def import2relative(cell:NbCell, libname):
    if not getattr(cell,'parsed',None): return
    src = update_import(cell.source, cell.parsed, libname, relative_import)
    if src: cell.set_source(src)

In [None]:
ss = "from nbdev.export import *\nfrom nbdev.a.b import *"
cell = _make_code_cells([ss])[0]
cell.import2relative('nbdev')
test_eq(cell.source, 'from .export import *\nfrom .a.b import *')

cell = _make_code_cells([ss])[0]
cell.import2relative('nbdev/a')
test_eq(cell.source, 'from ..export import *\nfrom .b import *')

In [None]:
#export
@patch
def make(self:ModuleMaker, cells, all_cells=None):
    "Write module containing `cells` with `__all__` generated from `all_cells`"
    for cell in all_cells: cell.import2relative(Config().lib_name)
    if not self.is_new: return self._make_exists(cells, all_cells)
    self.fname.parent.mkdir(exist_ok=True, parents=True)
    _all = self.make_all(all_cells)
    trees = cells.attrgot('parsed')
    try: last_future = max(i for i,tree in enumerate(trees) if any(
         isinstance(t,ast.ImportFrom) and t.module=='__future__' for t in tree))+1
    except ValueError: last_future=0
    with self.fname.open('w') as f:
        f.write(f"# AUTOGENERATED! DO NOT EDIT! File to edit: {self.dest2nb}.\n\n")
        export_cells(cells[:last_future], self.hdr, f, 0)
        f.write(create_all_cell(_all))
        export_cells(cells[last_future:], self.hdr, f, 1)

In [None]:
def _print_file(fname, mx=None): print(Path(fname).read_text().strip()[:ifnone(mx,9999)])

In [None]:
cells = _make_code_cells("from __future__ import print_function", "def a(): ...", "def b(): ...")
mm.make(cells, L([cells[1]]))
_print_file('tmp/test/testing.py')

Pass `all_cells=None` if you don't want any `__all__` added.

In [None]:
#export
@patch
def _update_all(self:ModuleMaker, all_cells, alls):
    return pformat(alls + self.make_all(all_cells), width=160)

@patch
def _make_exists(self:ModuleMaker, cells, all_cells=None):
    "`make` for `is_new=False`"
    if all_cells: update_var('__all__', partial(self._update_all, all_cells), fn=self.fname)
    with self.fname.open('a') as f:
        export_cells(cells, self.hdr, f)

If `is_new=False` then the additional definitions are added to the bottom, and any existing `__all__` is updated with the newly-added symbols.

In [None]:
c2 = _make_code_cells("def c(): ...", "def d(): ...")
mm = ModuleMaker(dest='tmp', name='test.testing', nb_path=Path.cwd()/'01_export.ipynb', is_new=False)
mm.make(c2, c2)

In [None]:
from tmp.test.testing import *
g = globals()
for s in "a c d".split(): assert s in g, s
assert 'b' not in g
assert a() is None

## ExportModuleProcessor -

In [None]:
#export
class ExportModuleProcessor(NotebookProcessor):
    "A `NotebookProcessor` which exports code to a module"
    def __init__(self, path, dest, mod_maker=ModuleMaker, debug=False):
        dest = Path(dest)
        store_attr()
        super().__init__(path,debug=debug)

    def process(self):
        self.modules,self.in_all = defaultdict(L),defaultdict(L)
        super().process()

Specify `path` containing the source notebook, `dest` where the module(s) will be exported to, and optionally a class to use to create the module (`ModuleMaker`, by default).

In [None]:
proc = ExportModuleProcessor(everything_fn, 'tmp')

In [None]:
#export
@patch
def default_exp_code(self:ExportModuleProcessor, comment, cell, exp_to): self.default_exp = exp_to

You must include a `default_exp` comment somewhere in your notebook to show what module to export to by default.

In [None]:
proc.process()
proc.default_exp

In [None]:
#export
@patch
def exporti_code(self:ExportModuleProcessor, comment, cell, exp_to=None):
    mod = ifnone(exp_to, '#')
    self.modules[mod].append(cell)
    return mod

Exported cells are stored in a `dict` called `modules`, where the keys are the modules exported to. Those without an explicit module are stored in the `'#'` key, which will be exported to `default_exp`.

`exporti` comments are used to export a cell, without including the definition in `__all__`.

In [None]:
proc.process()
proc.modules['#']

In [None]:
#export
@patch
def export_code(self:ExportModuleProcessor, comment, cell, exp_to=None):
    mod = self.exporti_code(comment, cell, exp_to=exp_to)
    self.in_all[mod].append(cell)
ExportModuleProcessor.exports_code = ExportModuleProcessor.export_code

`export` and `exports` export a cell and include definitions in `__all__`. (`exports` also displays the source code in documentation).

In [None]:
#export
@patch
def create_modules(self:ExportModuleProcessor):
    self.process()
    for mod,cells in self.modules.items():
        all_cells = self.in_all[mod]
        name = self.default_exp if mod=='#' else mod
        mm = self.mod_maker(dest=self.dest, name=name, nb_path=self.path, is_new=mod=='#')
        mm.make(cells, all_cells)

Let's check we can import a test file:

In [None]:
shutil.rmtree('tmp')
proc = ExportModuleProcessor('../tests/00_some.thing.ipynb', 'tmp')
proc.create_modules()

import tmp.some.thing
reload(tmp.some.thing)
test_eq(tmp.some.thing.__all__, ['a'])
test_eq(tmp.some.thing.a, 1)

We'll also check that our 'everything' file exports correctly:

In [None]:
proc = ExportModuleProcessor(everything_fn, 'tmp')
proc.create_modules()

import tmp.everything
reload(tmp.everything)
from tmp.everything import *
g = globals()
_alls = L("a b d e m n o p q".split())
for s in _alls.map("{}_y"): assert s in g, s
for s in "c_y_nall _f_y_nall g_n h_n i_n j_n k_n l_n".split(): assert s not in g, s
for s in _alls.map("{}_y") + ["c_y_nall", "_f_y_nall"]: assert hasattr(tmp.everything,s), s

That notebook should also export one extra function to `tmp.some.thing`:

In [None]:
import tmp.some.thing
reload(tmp.some.thing)
test_eq(tmp.some.thing.__all__, ['a','h_n'])
test_eq(tmp.some.thing.h_n(), None)

## Export -

In [None]:
(path/'export.py').unlink(missing_ok=True)
ExportModuleProcessor('01_export.ipynb', 'nbdev').create_modules()
import nbdev.export
reload(nbdev.export)
assert hasattr(nbdev.export, 'ModuleMaker')

In [None]:
#hide
def nb2dict(d, k=None):
    "Convert parsed notebook to `dict`"
    if k=='source': return d.splitlines(keepends=True)
    if isinstance(d, (L,list)): return list(L(d).map(nb2dict))
    if not isinstance(d, dict): return d
    return dict(**{k:nb2dict(v,k) for k,v in d.items() if k[-1] != '_'})

# This returns the exact same string as saved by Jupyter.

assert minimal_txt==nb2dict(minimal)

#export
def write_nb(nb, path):
    "Write `nb` to `path`"
    nb = nb2dict(nb)
    with io.open(path, 'w', encoding='utf-8') as f:
        f.write(json.dumps(nb, sort_keys=True, indent=1, ensure_ascii=False))
        f.write("\n")