-
Notifications
You must be signed in to change notification settings - Fork 22
/
convert_notebooks_to_html_partial.py
233 lines (184 loc) · 6.62 KB
/
convert_notebooks_to_html_partial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
"""Usage: convert_notebooks_to_html_partial.py [NOTEBOOKS ...]
This script takes the .ipynb files in the notebooks/ folder converts them to
HTML partials in the ch/ folder, recreating the folder structure in notebooks/.
It also adds the prev_page and next_page frontmatter keys to each HTML page
using _data/toc.yml to find the previous and next page in the book.
For reference:
https://nbconvert.readthedocs.org/en/latest/nbconvert_library.html
http://nbconvert.readthedocs.org/en/latest/nbconvert_library.html#using-different-preprocessors
Arguments:
NOTEBOOKS Notebook files to convert. By default, will convert all
notebooks in the notebooks/ folder.
"""
import glob
import os
import yaml
import toolz.curried as t
from textwrap import dedent
from docopt import docopt
from subprocess import run
from itertools import chain
import nbformat
from nbinteract import InteractExporter
from traitlets.config import Config
# The HTML file needs to start with Jekyll front-matter and wrapped in a raw
# tag so that Jekyll won't process double curly braces in the HTML
wrapper = """
---
prev_page: {prev_page}
next_page: {next_page}
---
{{% raw %}}
<div id="ipython-notebook">
<div class="buttons">
<button class="interact-button js-nbinteract-widget">
Show Widgets
</button>
</div>
{html}
</div>
{{% endraw %}}
""".strip()
html_exporter = InteractExporter(
config=Config(
InteractExporter=dict(
# Use ExtractOutputPreprocessor to extract images to separate files
preprocessors=[
'nbconvert.preprocessors.ExtractOutputPreprocessor'
],
template_file='plain',
button_at_top=False,
)
)
)
# Output notebook HTML images into this directory
NOTEBOOK_IMAGE_DIR = 'notebooks-images'
# YAML file containing textbook table of contents
TOC_PATH = '_data/toc.yml'
def convert_notebooks_to_html_partial(notebook_paths, url_map):
"""
Converts notebooks in notebook_paths to HTML partials
"""
for notebook_path in notebook_paths:
# Computes <name>.ipynb from notebooks/01/<name>.ipynb
path, filename = os.path.split(notebook_path)
# Computes examples from notebooks/examples
chapter = os.path.split(path)[1] if os.sep in path else ''
# Computes <name> from <name>.ipynb
basename, _ = os.path.splitext(filename)
# Computes <name>.html from notebooks/<name>.ipynb
outfile_name = basename + '.html'
# This results in images like AB_5_1.png for a notebook called AB.ipynb
unique_image_key = basename
# This sets the img tag URL in the rendered HTML.
output_files_dir = '/' + NOTEBOOK_IMAGE_DIR
# Path to output final HTML file
outfile_path = os.path.join(chapter, outfile_name)
if chapter:
os.makedirs(chapter, exist_ok=True)
extract_output_config = {
'unique_key': unique_image_key,
'output_files_dir': output_files_dir,
}
notebook = nbformat.read(notebook_path, 4)
notebook.cells.insert(0, _preamble_cell(path))
html, resources = html_exporter.from_notebook_node(
notebook,
resources=extract_output_config,
)
if outfile_path not in url_map:
print(
'[Warning]: {} not found in _data/toc.yml. This page will '
'not appear in the textbook table of contents.'
.format(outfile_path)
)
prev_page = url_map.get(outfile_path, {}).get('prev', 'false')
next_page = url_map.get(outfile_path, {}).get('next', 'false')
final_output = wrapper.format(
html=html,
prev_page=prev_page,
next_page=next_page,
)
# Write out HTML
with open(outfile_path, 'w', encoding='utf-8') as outfile:
outfile.write(final_output)
# Write out images
for relative_path, image_data in resources['outputs'].items():
image_name = os.path.basename(relative_path)
final_image_path = os.path.join(NOTEBOOK_IMAGE_DIR, image_name)
with open(final_image_path, 'wb') as outimage:
outimage.write(image_data)
print(outfile_path + " written.")
def _preamble_cell(path):
"""
This cell is inserted at the start of each notebook to set the working
directory to the correct folder.
"""
code = dedent(
'''
# HIDDEN
# Clear previously defined variables
%reset -f
'''.format(path)
)
return nbformat.v4.new_code_cell(source=code)
def convert_notebooks_to_markdown(notebook_paths):
"""
Since notebooks are difficult to use for code reviewing, this converts each
notebook to Markdown so that reviewers can use the plain text versions of
notebooks.
"""
run(['jupyter', 'nbconvert', '--to', 'markdown', *notebook_paths])
#
# Construct mapping between entry URLs and prev / next pages.
#
flatmap = t.curry(lambda f, items: chain.from_iterable(map(f, items)))
def _not_internal_link(entry):
return not entry.get('url', '').startswith('/')
def _flatten_sections(entry):
sections = entry.get('sections', [])
return [t.dissoc(entry, 'sections')] + sections
def _sliding_three(entries):
return ([(None, entries[0], entries[1])] +
list(t.sliding_window(3, entries)) +
[(entries[-2], entries[-1], None)])
wrap_url = "'{}'".format
def _adj_pages(triplet):
prev, cur, nex = triplet
return {
cur.strip('/'): {
'prev': wrap_url(prev) if prev is not None else 'false',
'next': wrap_url(nex) if nex is not None else 'false',
}
}
def generate_url_map(yaml_path=TOC_PATH) -> dict:
"""
Generates mapping from each URL to its previous and next URLs in the
textbook. The dictionary looks like:
{
'ch/10/some_page.html' : {
'prev': 'ch/09/foo.html',
'next': 'ch/10/bar.html',
},
...
}
"""
with open(yaml_path) as f:
data = yaml.load(f)
pipeline = [
t.remove(_not_internal_link),
flatmap(_flatten_sections),
t.map(t.get('url')), list, _sliding_three,
t.map(_adj_pages),
t.merge()
]
return t.pipe(data, *pipeline)
if __name__ == '__main__':
arguments = docopt(__doc__)
notebooks = arguments['NOTEBOOKS'] or sorted(
glob.glob('notebooks/**/*.ipynb', recursive=True)
)
os.makedirs(NOTEBOOK_IMAGE_DIR, exist_ok=True)
url_map = generate_url_map()
convert_notebooks_to_html_partial(notebooks, url_map)
# convert_notebooks_to_markdown(notebooks)