In [None]:
#hide
#default_exp read

# nbdev.read
- Reading a notebook, and initial bootstrapping for notebook exporting

In [None]:
#export
from fastcore.imports import *
from fastcore.foundation import *
from fastcore.utils import *
from fastcore.test import *
from fastcore.script import *
from fastcore.xtras import *

import json,ast,functools

In [None]:
import time,nbclient,tempfile
from IPython.display import Markdown
from pprint import pformat,pprint

## Reading executing, and writing notebooks

A notebook is just a json file:

In [None]:
minimal_fn = Path('../tests/minimal.ipynb')
minimal_txt = minimal_fn.read_json()

We'll create a function that lets use display JSON in a more compact form, so we can take a look at this file:

In [None]:
def display_json(d):
    "Formatter to reduce vertical space used by JSON display"
    s = pformat(d, indent=2, width=120, compact=True, sort_dicts=False)
    return Markdown(f"```python\n{s}\n```")

In [None]:
display_json(minimal_txt)

```python
{ 'cells': [ {'cell_type': 'markdown', 'metadata': {}, 'source': ['# A minimal notebook']},
             { 'cell_type': 'code',
               'execution_count': None,
               'metadata': {'foo': 'bar'},
               'outputs': [],
               'source': ['# Do some arithmetic\n', '1+1']}],
  'metadata': { 'kernelspec': {'display_name': 'Python 3', 'language': 'python', 'name': 'python3'},
                'language_info': { 'codemirror_mode': {'name': 'ipython', 'version': 3},
                                   'file_extension': '.py',
                                   'mimetype': 'text/x-python',
                                   'name': 'python',
                                   'nbconvert_exporter': 'python',
                                   'pygments_lexer': 'ipython3',
                                   'version': '3.8.3'}},
  'nbformat': 4,
  'nbformat_minor': 4}
```

The important bit for us is the `cells`:

In [None]:
display_json(minimal_txt['cells'])

```python
[ {'cell_type': 'markdown', 'metadata': {}, 'source': ['# A minimal notebook']},
  { 'cell_type': 'code',
    'execution_count': None,
    'metadata': {'foo': 'bar'},
    'outputs': [],
    'source': ['# Do some arithmetic\n', '1+1']}]
```

The second cell here is a `code` cell, however it contains no outputs, because it hasn't been executed yet. To execute a notebook, we first need to convert it into a format suitable for `nbclient` (which expects some `dict` keys to be available as attrs, and some available as regular `dict` keys). Normally, `nbformat` is used for this step, but it's rather slow and inflexible, so we'll write our own function based on `fastcore`'s handy `dict2obj`, which makes all keys available as both attrs *and* keys.

In [None]:
#export
class NbCell(AttrDict):
    def __init__(self, idx, cell):
        super().__init__(cell)
        self.idx_ = idx
        if 'source' in self: self.set_source(self.source)

    def __repr__(self): return self.source
    
    def set_source(self, source):
        self.source = ''.join(source)
        if '_parsed_' in self: del(self['_parsed_'])
    
    def parsed_(self):
        if self.cell_type!='code' or self.source[:1]=='%': return
        if '_parsed_' not in self: self._parsed_ = ast.parse(self.source).body
        return self._parsed_

We use an `AttrDict` subclass which has some basic functionality for accessing notebook cells.

In [None]:
#export
def dict2nb(js):
    "Convert a dict to an `AttrDict`, "
    nb = dict2obj(js)
    nb.cells = nb.cells.enumerate().starmap(NbCell)
    return nb

We can now convert our JSON into this `nbclient`-compatible format...

In [None]:
minimal = dict2nb(minimal_txt)

...and execute it:

In [None]:
nbclient.execute(minimal);

One nice feature of the output of `dict2nb` is that we can still use it as a `dict`, so `display_json` still works as before. We can see that the cell has been executed, and the output added back to the `nb`:

In [None]:
cell = minimal.cells[1]
display_json(cell.outputs)

```python
[{'output_type': 'execute_result', 'metadata': {}, 'data': {'text/plain': '2'}, 'execution_count': 1}]
```

The abstract syntax tree of source code cells is available in the `parsed_` property:

In [None]:
cell.parsed_()[0].value.op

<_ast.Add at 0x7fd6ae4eba30>

Since loading JSON and converting to an NB is something we'll do a lot, we'll create a shortcut function for it:

In [None]:
#export
def read_nb(path):
    "Return notebook at `path`"
    return dict2nb(Path(path).read_json())

In [None]:
minimal = read_nb(minimal_fn)
print(minimal.cells[0])

# A minimal notebook


## Config

nbdev uses a `settings.ini` file in the root of the project to store all configuration details. This file is in `ConfigParser` format, and can be read and written conveniently using fastcore's `Config` class.

In [None]:
#export
@call_parse
def nbdev_create_config(
    user:str, # Repo username
    host:str='github', # Repo hostname
    lib_name:str=None, # Name of library
    path:str='.', # Path to create config file
    cfg_name:str='settings.ini', # Name of config file to create
    branch:str='master', # Repo branch
    git_url:str="https://github.com/%(user)s/%(lib_name)s/tree/%(branch)s/", # Repo URL
    custom_sidebar:bool_arg=False, # Create custom sidebar?
    nbs_path:str='.', # Name of folder containing notebooks
    lib_path:str='%(lib_name)s', # Folder name of root module
    doc_path:str='docs', # Folder name containing docs
    tst_flags:str='', # Test flags
    version:str='0.0.1', # Version number
    **kwargs
):
    "Creates a new config file for `lib_name` and `user` and saves it."
    if lib_name is None:
        parent = Path.cwd().parent
        lib_name = parent.parent.name if parent.name=='nbs' else parent.name
    g = locals()
    config = {o:g[o] for o in 'host lib_name user branch git_url lib_path nbs_path doc_path \
        tst_flags version custom_sidebar'.split()}
    config = merge(config, kwargs)
    save_config_file(Path(path)/cfg_name, config)

This is a wrapper for `fastcore`'s `save_config_file` which sets some `nbdev` defaults. It is also installed as a CLI command.

In [None]:
#export
@functools.lru_cache(maxsize=None)                
def get_config(cfg_name='settings.ini', path=None):
    "`Config` for ini file found in `path` (defaults to `cwd`)"
    cfg_path = Path.cwd() if path is None else path
    while cfg_path != cfg_path.parent and not (cfg_path/cfg_name).exists(): cfg_path = cfg_path.parent
    return Config(cfg_path, cfg_name=cfg_name)

`get_config` searches for `settings.ini` in the current directory, and then in all parent directories, stopping when it is found.

In [None]:
nbdev_create_config('fastai', path='..', nbs_path='nbs', tst_flags='tst', cfg_name='test_settings.ini')
cfg = get_config('test_settings.ini')
test_eq(cfg.lib_name, 'nbdev')
test_eq(cfg.git_url, "https://github.com/fastai/nbdev/tree/master/")
cwd = Path.cwd()
test_eq(cfg.config_path, cwd.parent.absolute())
test_eq(cfg.path('lib_path'), cwd.parent/'nbdev')
test_eq(cfg.path('nbs_path'), cwd)
test_eq(cfg.path('doc_path'), cwd.parent/'docs')

##  Exporting a basic module

In [None]:
#export
_init = '__init__.py'

def _has_py(fs): return any(1 for f in fs if f.endswith('.py'))

def add_init(path):
    "Add `__init__.py` in all subdirs of `path` containing python files if it's not there already"
    # we add the lowest-level `__init__.py` files first, which ensures _has_py succeeds for parent modules
    path = Path(path)
    path.mkdir(exist_ok=True)
    if not (path/_init).exists(): (path/_init).touch()
    for r,ds,fs in os.walk(path, topdown=False):
        r = Path(r)
        subds = (os.listdir(r/d) for d in ds)
        if _has_py(fs) or any(filter(_has_py, subds)) and not (r/_init).exists(): (r/_init).touch()

Python modules require a `__init.py__` file in all directories that are modules. We assume that all directories containing a python file (including in subdirectories of any depth) is a module, and therefore add a `__init__.py` to each.

In [None]:
with tempfile.TemporaryDirectory() as d:
    d = Path(d)
    (d/'a/b').mkdir(parents=True)
    (d/'a/b/f.py').touch()
    (d/'a/c').mkdir()
    add_init(d)
    assert not (d/'a/c'/_init).exists(), "Should not add init to dir without py file"
    for e in [d, d/'a', d/'a/b']: assert (e/_init).exists(),f"Missing init in {e}"

In [None]:
#export
def export_cells(cells, hdr, file, offset=0):
    "Export `cells` to `file`"
    for cell in cells: file.write(f'{hdr} {cell.idx_}\n{cell}\n\n\n')

## Export -

In [None]:
#export
def basic_export_nb(fname, dest, lib_path=None):
    "Basic exporter to bootstrap nbdev"
    if lib_path is None: lib_path = get_config().lib_path
    fname,dest = Path(fname),Path(dest)
    nb = read_nb(fname)

    # grab the source from all the cells that have an `export` comment
    cells = L(cell for cell in nb.cells if re.match(r'#\s*export', cell.source))
    
    # find all the exported functions, to create `__all__`:
    trees = cells.map(NbCell.parsed_).concat()
    funcs = trees.filter(risinstance((ast.FunctionDef,ast.ClassDef))).attrgot('name')
    exp_funcs = [f for f in funcs if f[0]!='_']

    # write out the file
    dest.mkdir(exist_ok=True)
    hdr = f"# %% {fname.relpath(lib_path)}"
    with (dest/'read.py').open('w') as f:
        f.write(f"# %% auto 0\n__all__ = {exp_funcs}\n\n\n")
        export_cells(cells, hdr, f)

In [None]:
path = Path('../nbdev')
(path/'read.py').unlink(missing_ok=True)
add_init(path)
basic_export_nb("00_read.ipynb", path)

from nbdev import read
assert read.export_cells
assert 'basic_export_nb' in read.__all__