forked from tensorflow/tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
md2ipynb.py
58 lines (47 loc) · 1.7 KB
/
md2ipynb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import re
import os
import sys
import shutil
import subprocess
from HTMLParser import HTMLParser
MD_LINKS=re.compile(r'''
(?P<prefix>
\[ #find a pair of square brackets
[^\]]*?\] #that do not contain a closing square bracket
\( #followed by parens
[^\)]*? #that do not contain a closing paren
)\.md #with a '.md' extension
(?P<suffix>
(\#[^\)]*?)? #and optional frgment
\) #and closing paren
)''',
re.VERBOSE | re.MULTILINE)
#DANGER: html is not parseable with regex, there are a huge number of ways this can go wrong
#[issue on notedown]()
HTML_LINKS=re.compile(r'''
(?P<prefix>
<[^<>]*?href=[^\s<>]*? #containing a literal 'href=' and link text
)\.md #followed my a .md extension
(?P<suffix>
[^<>]*?> #and the remainder of the tag
)
''',
re.VERBOSE | re.MULTILINE)
def convert(filepath):
print(filepath)
ipynb=subprocess.check_output([
'notedown',filepath])
ipynb=MD_LINKS.sub('\g<prefix>.ipynb\g<suffix>',ipynb)
ipynb=HTML_LINKS.sub('\g<prefix>.ipynb\g<suffix>',ipynb)
outfilepath=os.path.splitext(filepath)[0]+".ipynb"
with open(outfilepath,'w') as out:
out.write(ipynb)
def main(root = os.path.split(__file__)[0]):
root = os.path.expanduser(root)
for (dirpath, _, filenames) in os.walk(root):
for filename in filenames:
filepath = os.path.join(dirpath,filename)
if filename.endswith('.md'):
convert(filepath)
if __name__ == "__main__":
main(*sys.argv[1:])