In [None]:
#|hide
#|default_exp processors

# processors
- Some processors for `NBProcessor`

In [None]:
#|export
import ast

from nbprocess.read import *
from nbprocess.imports import *
from nbprocess.process import *

from fastcore.imports import *
from fastcore.xtras import *

In [None]:
#|hide
from fastcore.test import *

## Helpers

In [None]:
#|hide
_test_file = '../tests/docs_test.ipynb'

On this page we'll be using this private helper to process a notebook and return the results, to simplify testing:

In [None]:
def _run_procs(procs=None, preprocs=None, postprocs=None):
    nbp = NBProcessor(_test_file, procs, preprocs=preprocs, postprocs=postprocs)
    nbp.process()
    return '\n'.join([str(cell) for cell in nbp.nb.cells])

## Cell processors

In [None]:
#|export
_re_ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')

def strip_ansi(cell):
    "Strip Ansi Characters."
    for outp in cell.get('outputs', []):
        if outp.get('name')=='stdout': outp['text'] = [_re_ansi_escape.sub('', o) for o in outp.text]

Gets rid of colors that are streamed from standard out, which can interfere with static site generators:

In [None]:
res = _run_procs(strip_ansi)
assert not _re_ansi_escape.findall(res)

In [None]:
#|export
def hide_(nbp, cell):
    "Hide cell from output"
    del(cell['source'])

In [None]:
res = _run_procs(hide_)
assert 'you will not be able to see this cell at all either' not in res

In [None]:
#|export
_re_hideline = re.compile(r'#\|\s*hide_line\s*$', re.MULTILINE)
def hide_line(cell):
    "Hide lines of code in code cells with the directive `hide_line` at the end of a line of code"
    if cell.cell_type == 'code' and _re_hideline.search(cell.source):
        cell.source = '\n'.join([c for c in cell.source.splitlines() if not _re_hideline.search(c)])

In [None]:
res = _run_procs(hide_line)
assert r"def show():\n    a = 2\n    b = 3" not in res
assert r"def show():\n    a = 2"                in res

In [None]:
#|export
def filter_stream_(nbp, cell, *words):
    "Remove output lines containing any of `words` in `cell` stream output"
    if not words: return
    for outp in cell.get('outputs', []):
        if outp.output_type == 'stream':
            outp['text'] = [l for l in outp.text if not re.search('|'.join(words), l)]

In [None]:
res = _run_procs(filter_stream_)
exp=r"'A line\n', 'Another line.\n'"
assert exp in res

In [None]:
#|export
_magics_pattern = re.compile(r'^\s*(%%|%).*', re.MULTILINE)

def clean_magics(cell):
    "A preprocessor to remove cell magic commands"
    if cell.cell_type == 'code': cell.source = _magics_pattern.sub('', cell.source).strip()

In [None]:
res = _run_procs(clean_magics)
assert "%%" not in res

In [None]:
#|export
_langs = 'bash|html|javascript|js|latex|markdown|perl|ruby|sh|svg'
_lang_pattern = re.compile(rf'^\s*%%\s*({_langs})\s*$', flags=re.MULTILINE)

def lang_identify(cell):
    "A preprocessor to identify bash/js/etc cells and mark them appropriately"
    if cell.cell_type == 'code':
        lang = _lang_pattern.findall(cell.source)
        if lang:
            lang = lang[0]
            if lang=='js': lang='javascript'  # abbrev provided by jupyter
            cell.metadata.language = lang

When we issue a shell command in a notebook with `!`, we need to change the code-fence from `python` to `bash` and remove the `!`:

In [None]:
res = _run_procs(lang_identify)
assert "'language': 'bash'" in res

In [None]:
#|export
_re_hdr_dash = re.compile(r'^#+\s+.*\s+-\s*$', re.MULTILINE)

def rm_header_dash(cell):
    "Remove headings that end with a dash -"
    src = cell.source.strip()
    if cell.cell_type == 'markdown' and src.startswith('#') and src.endswith(' -'): del(cell['source'])

In [None]:
res = _run_procs(rm_header_dash)
assert 'some words' in res
assert 'A heading to Hide' not in res
assert 'Yet another heading to hide' not in res

In [None]:
#|export
_exp_dirs = {'export','exporti'}
_hide_dirs = {*_exp_dirs, 'hide','default_exp'}

def rm_export(cell):
    "Remove cells that are exported or hidden"
    if cell.directives_.keys() & _hide_dirs: del(cell['source'])

In [None]:
res = _run_procs(rm_export)
assert 'dontshow' not in res

In [None]:
#|export
_re_exps = re.compile(r'^\s*#\|\s*(?:export|exporti)').search

def _show_docs(trees):
    return [t for t in trees if isinstance(t,ast.Expr) and nested_attr(t, 'value.func.id')=='show_doc']

In [None]:
#|export
_imps = {ast.Import, ast.ImportFrom}

def _do_eval(cell):
    trees = cell.parsed_()
    if cell.cell_type != 'code' or not trees: return False
    if cell.directives_.keys() & _exp_dirs or filter_ex(trees, risinstance(_imps)): return True
    if _show_docs(trees): return True
    return False

In [None]:
#|export
class exec_show_docs:
    "Execute cells needed for `show_docs` output, including exported cells and imports"
    def __init__(self):
        self.k = NBRunner()
        self.k('from nbprocess.showdoc import show_doc')

    def __call__(self, cell):
        if not _do_eval(cell): return
        self.k.run(cell)

In [None]:
res = _run_procs(exec_show_docs)

In [None]:
#|export
_re_showdoc = re.compile(r'^show_doc', re.MULTILINE)
def _is_showdoc(cell): return cell['cell_type'] == 'code' and _re_showdoc.search(cell.source)

def clean_show_doc(cell):
    "Remove ShowDoc input cells"
    if not _is_showdoc(cell): return
    cell.source = '#| echo: false\n' + cell.source

In [None]:
'show_doc(another_func)' in res

True

## Notebook preprocessors

In [None]:
#| export
def insert_warning(nb):
    "Insert Autogenerated Warning Into Notebook after the first cell."
    content = "<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->"
    nb.cells.insert(1, mk_cell(content, False))

This preprocessor inserts a warning in the markdown destination that the file is autogenerated.  This warning is inserted in the second cell so we do not interfere with front matter.

In [None]:
res = _run_procs(preprocs=[insert_warning])
assert "<!-- WARNING: THIS FILE WAS AUTOGENERATED!" in res

In [None]:
L('foo', None, 'a').filter(lambda x:x == 1)
_tstre = re.compile('a')

In [None]:
#| export
_re_title = re.compile(r'^#\s+(.*)[\n\r](?:^>\s+(.*))?', flags=re.MULTILINE)
_re_fm = re.compile(r'^---.*\S+.*---', flags=re.DOTALL)

def _get_celltyp(nb, cell_type): return nb.cells.filter(lambda c: c.cell_type == cell_type)

def get_title(nb): 
    "Get the title and description from a notebook from the H1"
    md_cells = _get_celltyp(nb, 'markdown').filter(lambda c: 'source' in c and _re_title.search(c.source))
    if md_cells:
        cell = md_cells[0]
        title,desc=_re_title.match(cell.source).groups()
        del(cell['source'])
        return title,desc
    else: return None,None

In [None]:
_nb = read_nb(_test_file)
test_eq(get_title(_nb), ('a title', 'A description'))
test_eq(get_title(read_nb('../tests/minimal.ipynb')), (None, None))

In [None]:
#| export
def get_fm(nb):
    "Get the frontmatter in the notebook as a raw cell"
    return _get_celltyp(nb, 'raw').filter(lambda c: 'source' in c and _re_fm.search(c.source))

In [None]:
test_eq(bool(get_fm(_nb)), False)
test_eq(bool(get_fm(read_nb('../tests/01_everything.ipynb'))), True)

In [None]:
#| export
def insert_fm(nb):
    "Insert Front Matter If It Doesn't Exist With Title/Description"
    if not get_fm(nb):
        title,desc = get_title(nb)
        if title:
            desc = f'description: "{desc}"\n' if desc else ''
            content = f'---\ntitle: "{title}"\n{desc}---'
            nb.cells.insert(0, NbCell(0, dict(cell_type='raw', metadata={}, source=content)))

In [None]:
_raw_res = _run_procs()
_res = _run_procs(postprocs=[insert_fm])
assert r'---\ntitle: "a title"\ndescription: "A description"\n---' in _res
assert '# a title' in _raw_res and '# a title' not in _res

In [None]:
#|export
_def_types = (ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef)
def _def_names(cell, shown):
    return [o.name for o in concat(cell.parsed_()) if isinstance(o,_def_types) and o.name not in shown and o.name[0]!='_']

In [None]:
#|export
def add_show_docs(nb):
    "Add show_doc cells after exported cells, unless they are already documented"
    exports = L(cell for cell in nb.cells if _re_exps(cell.source))
    trees = nb.cells.map(NbCell.parsed_).concat()
    shown_docs = {t.value.args[0].id for t in _show_docs(trees)}
    for cell in reversed(exports):
        for nm in _def_names(cell, shown_docs):
            code = f'show_doc({nm})'
            nb.cells.insert(cell.idx_+1, mk_cell(code))

In [None]:
res = _run_procs(preprocs=add_show_docs)
assert "show_doc(some_func)'" in res
assert "show_doc(and_another)'" in res
assert "show_doc(another_func)'" not in res

## Export -

In [None]:
#|skip
from nbprocess.export import nbs_export
nbs_export()