@@ -53,6 +53,22 @@ async def html_to_pdf(html_file, pdf_file, pyppeteer_args=None):
53
53
width = dimensions ["width" ]
54
54
height = dimensions ["height" ]
55
55
56
+ await page .evaluate (
57
+ """
58
+ function getOffset( el ) {
59
+ var _x = 0;
60
+ var _y = 0;
61
+ while( el && !isNaN( el.offsetLeft ) && !isNaN( el.offsetTop ) ) {
62
+ _x += el.offsetLeft - el.scrollLeft;
63
+ _y += el.offsetTop - el.scrollTop;
64
+ el = el.offsetParent;
65
+ }
66
+ return { top: _y, left: _x };
67
+ }
68
+ """ ,
69
+ force_expr = True ,
70
+ )
71
+
56
72
await page .addStyleTag (
57
73
{
58
74
"content" : """
@@ -73,6 +89,17 @@ async def html_to_pdf(html_file, pdf_file, pyppeteer_args=None):
73
89
}
74
90
)
75
91
92
+ h1s = await page .evaluate (
93
+ """() => {
94
+ var vals = []
95
+ for (const elem of document.getElementsByTagName("h1")) {
96
+ //console.log(elem, getOffset(elem).top, elem.innerText)
97
+ vals.push({ top: getOffset(elem).top, text: elem.innerText })
98
+ }
99
+ return vals
100
+ }"""
101
+ )
102
+
76
103
await page .pdf (
77
104
{
78
105
"path" : pdf_file ,
@@ -87,12 +114,47 @@ async def html_to_pdf(html_file, pdf_file, pyppeteer_args=None):
87
114
88
115
await browser .close ()
89
116
117
+ return h1s
118
+
119
+
120
+ def finish_pdf (pdf_in , pdf_out , notebook , headings ):
121
+ """Add finishing touches to the PDF file.
122
+
123
+ To make the PDF nicer we:
90
124
91
- def attach_notebook (pdf_in , pdf_out , notebook ):
125
+ * attach the original notebook to the PDF for reference
126
+ * add bookmarks pointing to the headers in a notebook
127
+ """
92
128
pdf = PyPDF2 .PdfFileWriter ()
93
129
pdf .appendPagesFromReader (PyPDF2 .PdfFileReader (pdf_in , "rb" ))
94
130
pdf .addAttachment (notebook ["file_name" ], notebook ["contents" ])
95
131
132
+ for heading in headings :
133
+ page_num = heading ["top" ] // (200 * 72 )
134
+
135
+ page_height = pdf .getPage (page_num ).artBox [- 1 ]
136
+
137
+ # position on the page as measured from the bottom of the page
138
+ # with a bit of leeway so that clicking the bookmark doesn't put
139
+ # the heading right at the border
140
+ on_page_pos = page_height - (heading ["top" ] % (200 * 72 )) + 20
141
+
142
+ # there is no nice way of passing the "zoom arguments" at the very
143
+ # end of the function call without explicitly listing all the parameters
144
+ # of the function. We can't use keyword arguments :(
145
+ pdf .addBookmark (
146
+ heading ["text" ],
147
+ page_num ,
148
+ None ,
149
+ None ,
150
+ False ,
151
+ False ,
152
+ "/XYZ" ,
153
+ 0 ,
154
+ on_page_pos ,
155
+ None ,
156
+ )
157
+
96
158
with open (pdf_out , "wb" ) as fp :
97
159
pdf .write (fp )
98
160
@@ -116,7 +178,9 @@ async def notebook_to_pdf(
116
178
with tempfile .NamedTemporaryFile (suffix = ".html" ) as f :
117
179
f .write (exported_html .encode ())
118
180
f .flush ()
119
- await html_to_pdf (f .name , pdf_path , pyppeteer_args )
181
+ heading_positions = await html_to_pdf (f .name , pdf_path , pyppeteer_args )
182
+
183
+ return heading_positions
120
184
121
185
122
186
class PDFExporter (Exporter ):
@@ -161,7 +225,7 @@ def from_notebook_node(self, notebook, resources=None, **kwargs):
161
225
pdf_fname2 = os .path .join (name , "output-with-attachment.pdf" )
162
226
pyppeteer_args = ["--no-sandbox" ] if self .no_sandbox else None
163
227
164
- self .pool .submit (
228
+ heading_positions = self .pool .submit (
165
229
asyncio .run ,
166
230
notebook_to_pdf (
167
231
notebook ,
@@ -174,13 +238,14 @@ def from_notebook_node(self, notebook, resources=None, **kwargs):
174
238
).result ()
175
239
resources ["output_extension" ] = ".pdf"
176
240
177
- attach_notebook (
241
+ finish_pdf (
178
242
pdf_fname ,
179
243
pdf_fname2 ,
180
244
{
181
245
"file_name" : f"{ resources ['metadata' ]['name' ]} .ipynb" ,
182
246
"contents" : nbformat .writes (notebook ).encode ("utf-8" ),
183
247
},
248
+ heading_positions ,
184
249
)
185
250
186
251
with open (pdf_fname2 , "rb" ) as f :
0 commit comments