# Parsing Jupyter Notebooks

In [5]:
import nbformat
import io

# Reading Notebooks

In [3]:
def read_nb(nb):
    with io.open(nb, 'r', encoding='utf8') as f:
        nb = nbformat.read(f, nbformat.NO_CONVERT)
    return nb

def write_nb(nb, fn):
    if not fn.endswith('.ipynb'):
        fn += '.ipynb'
    with io.open(fn, 'w', encoding='utf8') as f:
        nbformat.write(nb, f, nbformat.NO_CONVERT)

def dump_nb(nb, cells=5, lines=5):
    for c in xrange(0, cells):
        print("====== " + nb.cells[c]['cell_type'] + " ======")
        src = nb.cells[c]['source'].splitlines()
        if len(src) > lines:
            print('\n'.join(src[0:lines]))
            print("...")
        else:
            print(nb.cells[c]['source'])

## Types of cells in  this notebook

In [4]:
from collections import defaultdict
def get_structure(cells):
    cell_types = defaultdict(list)
    for i, cell in enumerate(cells):
        cell_types[cell.cell_type].append(i)
    return cell_types
            

## Turning Output Cells OFF

In [6]:
def remove_outputs(nb):
    """Set output attribute of all code cells to be empty"""
    for cell in nb.cells:
        if cell.cell_type == 'code':
            cell.outputs = []

def clear_notebook(old_ipynb, new_ipynb):
    with io.open(old_ipynb, 'r') as f:
        nb = nbformat.read(f, nbformat.NO_CONVERT)

    remove_outputs(nb)
    
    with io.open(new_ipynb, 'w', encoding='utf8') as f:
        nbformat.write(nb, f, nbformat.NO_CONVERT)

source_nb = 'atoms/visualization/choropleth_classification.ipynb'

new_nb = 'nout.ipynb'
clear_notebook(source_nb, new_nb)

## Importing by Type

```
@include {
    'resource' = 'http://geopyter.org/atoms/fundamentals/lists.ipynb',
    'select' = 'h1.Understanding Lists'
}
```

In [7]:
source_nb = 'atoms/visualization/choropleth_classification.ipynb'
nb = read_nb(source_nb)

In [8]:
import re
def transHeader(matchobj):
    return matchobj.group(1) + '#' * int(matchobj.group(2))

def parseSelect(s): 
    t = [ re.sub(r'(-?)h(\d+)', lambda matchobj: matchobj.group(1) + '#' * int(matchobj.group(2)), x, flags=re.IGNORECASE) for x in re.split(r'\s*(\-?h\d+)\.', s) ]
    for i in xrange(1, len(t), 2):
        print ' '.join([t[i], t[i+1]])
    
def includeContent(nb, select=None, ctype=None, clear=True):
    """Import content from another notebook:
    * select: specification of content within notebook to include/exclude
    * version: specify a particular version of a notebook to use (via Git/GitHub)
    * ctype: code/markdown; specify only code or only markdown cell types for including?
    * clear: boolean; clear outputs while importing?
    Other params?
    """
    
    # Read the notebook into a composition
    cnb = read_nb(nb)
    
    # If we only want to *keep* the specified
    # cell type and delete all others...
    if ctype is not None:
        print("Ctype: " + str(ctype))
        ctypes = get_structure(cnb.cells)
        for t in ctypes.keys():
            if t != ctype:
                #print("Deleting type " + t)
                for c in xrange(len(ctypes[t])-1,-1,-1):
                    #print("Deleting: " + str(ctypes[t][c]))
                    del cnb.cells[ctypes[t][c]]
    
    # Remove any code output from the included 
    # notebook as part of the process
    if clear is True:
        print("Clear: " + str(clear))
        remove_outputs(cnb)
    
    # Now the 'magic' (I'm probably not writing
    # great Python code here though...)
    if select is not None:
        # =====
        # The data structure here needs some thought,
        # but for now I'm going with a dictionary of 
        # dictionaries...
        # =====
        patterns = {}
        
        # Split on semi-colons in the select statement
        selections = select.split(";")
        
        # And now break each statement into sub-selects
        for s in selections:
            parseSelect(s)
        
        #for c in cnb.cells:
            
    
    return cnb

foo = includeContent('atoms/foundations/Functions.ipynb', 
                     select='h2.Layout of a Function h3.Function Definition',
                     ctype='markdown', 
                     clear=False)
write_nb(foo, 'test')

Ctype: markdown
## Layout of a Function
### Function Definition
