In [None]:
#|default_exp processors

# processors
> Some processors for NBProcessor

In [None]:
#|export
import ast

from nbdev.config import *
from nbdev.imports import *
from nbdev.process import *
from nbdev.showdoc import *
from nbdev.doclinks import *

from execnb.nbio import *
from execnb.shell import *
from fastcore.imports import *
from fastcore.xtras import *
import sys,yaml

In [None]:
#|hide
from fastcore.test import *

In [None]:
#|hide
_test_file = '../../tests/docs_test.ipynb'

On this page we'll be using this private helper to process a notebook and return the results, to simplify testing:

In [None]:
def _run_procs(procs=None, return_nb=False, path=_test_file):
    nbp = NBProcessor(path, procs)
    nbp.process()
    if return_nb: return nbp.nb
    return '\n'.join([str(cell) for cell in nbp.nb.cells])

In [None]:
#|export
_re_fm = re.compile(r'^---(.*\S+.*)---', flags=re.DOTALL)

def is_frontmatter(nb):
    "List of raw cells in `nb` that contain frontmatter"
    return _celltyp(nb, 'raw').filter(lambda c: _re_fm.search(c.get('source', '')))

def yml2dict(s:str, rm_fence=True):
    "convert a string that is in a yaml format to a dict"
    if rm_fence: 
        match = _re_fm.search(s.strip())
        if match: s = match.group(1)
    return yaml.safe_load(s)

def _get_frontmatter(nb):
    cell = first(is_frontmatter(nb))
    return cell,(yml2dict(cell.source) if cell else {})

In [None]:
#|export
_langs = 'bash|html|javascript|js|latex|markdown|perl|ruby|sh|svg'
_lang_pattern = re.compile(rf'^\s*%%\s*({_langs})\s*$', flags=re.MULTILINE)

class populate_language(Processor):
    "Set cell language based on NB metadata and magics"
    def begin(self): self.language = nb_lang(self.nb)
    def cell(self, cell):
        if cell.cell_type != 'code': return
        lang = _lang_pattern.findall(cell.source)
        if lang: cell.metadata.language = lang[0]
        else: cell.metadata.language = self.language

In [None]:
#|hide
res = _run_procs(populate_language, return_nb=True)
assert set(L(res.cells).attrgot('metadata').attrgot('language').filter()) == {'python', 'bash'}

In [None]:
#| export
class insert_warning(Processor):
    "Insert Autogenerated Warning Into Notebook after the first cell."
    content = "<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->"
    def begin(self): self.nb.cells.insert(1, mk_cell(self.content, 'markdown'))

This preprocessor inserts a warning in the markdown destination that the file is autogenerated.  This warning is inserted in the second cell so we do not interfere with front matter.

In [None]:
res = _run_procs(insert_warning)
assert "<!-- WARNING: THIS FILE WAS AUTOGENERATED!" in res

In [None]:
L('foo', None, 'a').filter(lambda x:x == 1)
_tstre = re.compile('a')

In [None]:
#|export
_def_types = (ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef)
def _def_names(cell, shown):
    cellp = cell.parsed_()
    return [showdoc_nm(o) for o in concat(cellp)
            if isinstance(o,_def_types) and o.name not in shown and o.name[0]!='_'] if cellp else []

def _get_nm(tree):
    i = tree.value.args[0]
    if hasattr(i, 'id'): val = i.id
    else: val = try_attrs(i.value, 'id', 'func', 'attr')
    return f'{val}.{i.attr}' if isinstance(i, ast.Attribute) else i.id

In [None]:
#|export
def _show_docs(trees):
    return [t for t in trees if isinstance(t,ast.Expr) and nested_attr(t, 'value.func.id')=='show_doc']

def cell_lang(cell): return nested_attr(cell, 'metadata.language', 'python')
def _want_doc(c):
    return c.source and c.cell_type=='code' and (set(['export', 'exports', 'exec_doc']).intersection(c.directives_))

class add_show_docs(Processor):
    "Add show_doc cells after exported cells, unless they are already documented"
    def begin(self):
        nb = self.nb
        exports = L(cell for cell in nb.cells if _want_doc(cell))
        trees = L(nb.cells).map(NbCell.parsed_).concat()
        shown_docs = {_get_nm(t) for t in _show_docs(trees)}
        for cell in reversed(exports):
            if cell_lang(cell) != 'python':  raise ValueError(f"{cell.metadata.language} can't export:\n{cell.source}")
            for nm in _def_names(cell, shown_docs): nb.cells.insert(cell.idx_+1, mk_cell(f'show_doc({nm})'))
        nb.has_docs_ = shown_docs or exports

In [None]:
res = _run_procs([populate_language, add_show_docs])
assert "show_doc(some_func)'" in res
assert "show_doc(and_another)'" in res
assert "show_doc(another_func)'" not in res

In [None]:
#|hide
# tests @patch works
_nb = _run_procs([populate_language, add_show_docs], return_nb=True, path='../../tests/showdoc_test.ipynb')
assert r'show_doc(Foo.a_method)' in L(_nb.cells).attrgot('source')

In [None]:
#|hide
_yml = """
---
foo: bar
comments:
  hypothesis: 
    theme: clean
categories: [c1, c2]
---
"""

test_eq(yml2dict(_yml), {'foo': 'bar', 'comments': {'hypothesis': {'theme': 'clean'}}, 'categories': ['c1', 'c2']})

In [None]:
#|export
def yaml_str(s:str):
    "Create a valid YAML string from `s`"
    if s[0]=='"' and s[-1]=='"': return s
    res = s.replace('\\', '\\\\').replace('"', r'\"')
    return f'"{res}"'

In [None]:
#| export
_re_title = re.compile(r'^#\s+(.*)[\n\r]+(?:^>\s+(.*))?', flags=re.MULTILINE)

def _celltyp(nb, cell_type): return L(nb.cells).filter(lambda c: c.cell_type == cell_type)
def _istitle(cell): 
    txt = cell.get('source', '')
    return bool(_re_title.search(txt)) if txt else False

In [None]:
#|export
def nb_fmdict(nb, remove=True): 
    "Infer the front matter from a notebook's markdown formatting"
    md_cells = _celltyp(nb, 'markdown').filter(_istitle)
    if not md_cells: return {}
    cell = md_cells[0]
    title_match = _re_title.match(cell.source)
    if title_match:
        title,desc=title_match.groups()
        flags = re.findall('^-\s+(.*)', cell.source, flags=re.MULTILINE)
        flags = [s.split(':', 1) for s in flags if ':' in s] if flags else []
        flags = merge({k:v for k,v in flags if k and v}, 
                      {'title':yaml_str(title)}, {'description':yaml_str(desc)} if desc else {})
        if remove: cell['source'] = None
        return yml2dict('\n'.join([f"{k}: {flags[k]}" for k in flags]))
    else: return {}

In [None]:
_testnb = read_nb('../../tests/docs_test.ipynb')
_res = nb_fmdict(_testnb)
test_eq(_res, dict(key1='value1', key2='value2', categories=['c1', 'c2'], title='a title', description='A description'))

In [None]:
#|hide
_testnb2 = read_nb('../../tests/directives.ipynb')
test_eq(nb_fmdict(_testnb2), {})

In [None]:
#|export
def _replace_fm(d:dict, # dictionary you wish to conditionally change
                k:str,  # key to check 
                val:str,# value to check if d[k] == v
                repl_dict:dict #dictionary that will be used as a replacement 
               ):
    "replace key `k` in dict `d` if d[k] == val with `repl_dict`"
    if str(d.get(k, '')).lower().strip() == str(val.lower()).strip():
        d.pop(k)
        d = merge(d, repl_dict)
    return d

def _fp_alias(d):
    "create aliases for fastpages front matter to match Quarto front matter."
    d = _replace_fm(d, 'search_exclude', 'true', {'search':'false'})
    d = _replace_fm(d, 'hide', 'true', {'draft': 'true'})
    return d

In [None]:
#|hide
_testnb3 = read_nb('../../tests/2020-09-01-fastcore.ipynb')
_fm = _fp_alias(nb_fmdict(_testnb3))
assert 'hide' not in _fm and 'draft' in _fm
assert 'search_exclude' not in _fm and 'search' in _fm
assert 'comments' in _fm

In [None]:
#|export
def _fp_image(d):
    "Correct path of fastpages images"
    prefix = 'images/copied_from_nb/'
    if d.get('image', '').startswith(prefix): d['image'] = d['image'].replace(prefix, '')
    return d

In [None]:
#|hide
_testnb3 = read_nb('../../tests/2020-09-01-fastcore.ipynb')
_fm = _fp_image(nb_fmdict(_testnb3))
test_eq(_fm['image'], 'fastcore_imgs/td.png')

In [None]:
#|export
def filter_fm(fmdict:dict):
    "Filter front matter"
    keys = ['title', 'description', 'author', 'image', 'categories', 'output-file', 'aliases', 'search', 'draft', 'comments']
    if not fmdict: return {}
    return filter_keys(fmdict, in_(keys))

In [None]:
#|export
def construct_fm(fmdict:dict):
    "Construct front matter from a dictionary"
    if not fmdict: return None
    return '---\n'+yaml.dump(fmdict)+'\n---'    

In [None]:
_testdict = nb_fmdict(read_nb('../../tests/docs_test.ipynb'))
_res = construct_fm(filter_fm(_testdict))
test_eq(len(_res.splitlines()), 8)
print(_res)

---
categories:
- c1
- c2
description: A description
title: a title

---


In [None]:
#|export
def insert_frontmatter(nb, fm_dict:dict):
    "Add frontmatter into notebook based on `filter_keys` that exist in `fmdict`."
    fm = construct_fm(fm_dict)
    if fm: nb.cells.insert(0, NbCell(0, dict(cell_type='raw', metadata={}, source=fm, directives_={})))

In [None]:
# #|export
# class default_exp:
#     "Store default_exp if present"
#     def __init__(self, nb): self.nb=nb
#     def _default_exp_(self, cell, mod): self.default_exp = mod

In [None]:
#|export
_re_defaultexp = re.compile(r'^\s*#\|\s*default_exp\s+(\S+)', flags=re.MULTILINE)

def _default_exp(nb):
    "get the default_exp from a notebook"
    code_src = L(nb.cells).filter(lambda x: x.cell_type == 'code').attrgot('source')
    default_exp = first(code_src.filter().map(_re_defaultexp.search).filter())
    return default_exp.group(1) if default_exp else None

In [None]:
#|hide
_testnb = read_nb('../../tests/docs_test.ipynb')
test_eq(_default_exp(_testnb), 'foobar')

In [None]:
#|export
def add_links(cell):
    "Add links to markdown cells"
    nl = NbdevLookup()
    if cell.cell_type == 'markdown': cell.source = nl.linkify(cell.source)
    for o in cell.get('outputs', []):
        if hasattr(o, 'data') and hasattr(o['data'], 'text/markdown'):
            o.data['text/markdown'] = [nl.link_line(s) for s in o.data['text/markdown']]

In [None]:
res = _run_procs(add_links)
assert "[`numpy.array`](https://numpy.org/doc/stable/reference/generated/numpy.array.html#numpy.array)" in res
assert "[`ModuleMaker`](https://nbdev.fast.ai/09_API/maker.html#modulemaker) but not a link to `foobar`." in res
assert "A link in a docstring: [`ModuleMaker`](https://nbdev.fast.ai/09_API/maker.html#modulemaker)." in res
assert "And not a link to <code>dict2nb</code>." in res

Gets rid of colors that are streamed from standard out, which can interfere with static site generators:

In [None]:
#|export
_re_ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')

def strip_ansi(cell):
    "Strip Ansi Characters."
    for outp in cell.get('outputs', []):
        if outp.get('name')=='stdout': outp['text'] = [_re_ansi_escape.sub('', o) for o in outp.text]

In [None]:
res = _run_procs(strip_ansi)
assert not _re_ansi_escape.findall(res)

In [None]:
#|export
def strip_hidden_metadata(cell):
    '''Strips "hidden" metadata property from code cells so it doesn't interfere with docs rendering'''
    if cell.cell_type == 'code' and 'metadata' in cell: cell.metadata.pop('hidden',None)

In [None]:
#|export
def hide_(cell):
    "Hide cell from output"
    del(cell['source'])

In [None]:
res = _run_procs(hide_)
assert 'you will not be able to see this cell at all either' not in res

In [None]:
#|export
def _re_hideline(lang=None): return re.compile(fr'{langs[lang]}\|\s*hide_line\s*$', re.MULTILINE)

def hide_line(cell):
    "Hide lines of code in code cells with the directive `hide_line` at the end of a line of code"
    lang = cell_lang(cell)
    if cell.cell_type == 'code' and _re_hideline(lang).search(cell.source):
        cell.source = '\n'.join([c for c in cell.source.splitlines() if not _re_hideline(lang).search(c)])

In [None]:
res = _run_procs(hide_line)
assert r"def show():\n    a = 2\n    b = 3" not in res
assert r"def show():\n    a = 2"                in res

In [None]:
#|hide
# integration test with hide_line
_nb = _run_procs([populate_language,hide_line], path='../../tests/APL.ipynb')
assert 'hide_line' not in _nb

In [None]:
#|export
def filter_stream_(cell, *words):
    "Remove output lines containing any of `words` in `cell` stream output"
    if not words: return
    for outp in cell.get('outputs', []):
        if outp.output_type == 'stream':
            outp['text'] = [l for l in outp.text if not re.search('|'.join(words), l)]

In [None]:
res = _run_procs(filter_stream_)
exp=r"'A line\n', 'Another line.\n'"
assert exp in res

In [None]:
#|export
_magics_pattern = re.compile(r'^\s*(%%|%).*', re.MULTILINE)

def clean_magics(cell):
    "A preprocessor to remove cell magic commands"
    if cell.cell_type == 'code': cell.source = _magics_pattern.sub('', cell.source).strip()

In [None]:
res = _run_procs(clean_magics)
assert "%%" not in res

In [None]:
#|export
_re_hdr_dash = re.compile(r'^#+\s+.*\s+-\s*$', re.MULTILINE)

def rm_header_dash(cell):
    "Remove headings that end with a dash -"
    if cell.source:
        src = cell.source.strip()
        if cell.cell_type == 'markdown' and src.startswith('#') and src.endswith(' -'): del(cell['source'])

In [None]:
res = _run_procs(rm_header_dash)
assert 'some words' in res
assert 'A heading to Hide' not in res
assert 'Yet another heading to hide' not in res

In [None]:
#|export
_hide_dirs = {'export','exporti', 'hide','default_exp'}

def rm_export(cell):
    "Remove cells that are exported or hidden"
    if cell.directives_ and (cell.directives_.keys() & _hide_dirs): del(cell['source'])

In [None]:
res = _run_procs(rm_export)
assert 'dontshow' not in res

In [None]:
#|export
_re_showdoc = re.compile(r'^show_doc', re.MULTILINE)
def _is_showdoc(cell): return cell['cell_type'] == 'code' and _re_showdoc.search(cell.source)

def clean_show_doc(cell):
    "Remove ShowDoc input cells"
    if not _is_showdoc(cell): return
    cell.source = '#|output: asis\n#| echo: false\n' + cell.source

In [None]:
#|export
def _ast_contains(trees, types):
    for tree in trees:
        for node in ast.walk(tree):
            if isinstance(node, types): return True

def _do_eval(cell):
    if cell_lang(cell) != 'python': return
    if not cell.source or 'nbdev_export'+'()' in cell.source: return
    trees = cell.parsed_()
    if cell.cell_type != 'code' or not trees: return
    if cell.directives_.get('eval:', [''])[0].lower() == 'false': return

    _show_dirs = {'export','exports','exporti','exec_doc'}
    if cell.directives_.keys() & _show_dirs: return True
    if _ast_contains(trees, (ast.Import, ast.ImportFrom)):
        if _ast_contains(trees, (ast.Expr, ast.Assign)):
            warn(f'Found a cell containing mix of imports and computations. Please use separate cells. See nbdev FAQ.\n---\n{cell.source}\n---\n')
        return True
    if _show_docs(trees): return True

In [None]:
#|export
class exec_show_docs(Processor):
    "Execute cells needed for `show_docs` output, including exported cells and imports"
    def begin(self):
        if nb_lang(self.nb) != 'python': return
        self.k = CaptureShell()
        self.k.run_cell('from nbdev.showdoc import show_doc')

    def __call__(self, cell):
        if not self.nb.has_docs_ or not hasattr(self, 'k'): return
        fm = getattr(self.nb, 'frontmatter_', {})
        if str2bool(fm.get('skip_showdoc', False)): return
        if _do_eval(cell): self.k.cell(cell)
        title = fm.get('title', '')
        if self.k.exc: raise Exception(f"Error{' in notebook: '+title if title else ''} in cell {cell.idx_} :\n{cell.source}") from self.k.exc[1]

In [None]:
res = _run_procs([add_show_docs, exec_show_docs])
assert res

## Export -

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()