Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 449 lines (350 sloc) 13.5 KB
#!/usr/bin/env python
import os
import re
import sys
import shutil
import datetime
import subprocess
from os import path
from glob import iglob
from pprint import pprint
from argparse import ArgumentParser
from collections import OrderedDict
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
import unidecode
import houdini as h
from tornado import template
from misaka import Markdown, HtmlRenderer, HtmlTocRenderer, SmartyPants, \
EXT_NO_INTRA_EMPHASIS, EXT_TABLES, EXT_FENCED_CODE, EXT_AUTOLINK, \
EXT_STRIKETHROUGH, EXT_LAX_SPACING, EXT_SPACE_HEADERS, \
EXT_SUPERSCRIPT, \
HTML_SKIP_HTML, HTML_SKIP_STYLE, HTML_SKIP_IMAGES, HTML_SKIP_LINKS, \
HTML_EXPAND_TABS, HTML_SAFELINK, HTML_TOC, HTML_HARD_WRAP, \
HTML_USE_XHTML, HTML_ESCAPE
from yaml import load as load_yaml
try:
from yaml import CLoader as YamlLoader
except ImportError:
from yaml import Loader as YamlLoader
misaka_extensions = {
'no_intra_emphasis': EXT_NO_INTRA_EMPHASIS,
'tables': EXT_TABLES,
'fenced_code': EXT_FENCED_CODE,
'autolink': EXT_AUTOLINK,
'strikethrough': EXT_STRIKETHROUGH,
'lax_spacing': EXT_LAX_SPACING,
'space_headers': EXT_SPACE_HEADERS,
'superscript': EXT_SUPERSCRIPT
}
misaka_html_flags = {
'skip_html': HTML_SKIP_HTML,
'skip_style': HTML_SKIP_STYLE,
'skip_images': HTML_SKIP_IMAGES,
'skip_links': HTML_SKIP_LINKS,
'expand_tabs': HTML_EXPAND_TABS,
'safelink': HTML_SAFELINK,
'toc': HTML_TOC,
'hard_wrap': HTML_HARD_WRAP,
'use_xhtml': HTML_USE_XHTML,
'escape': HTML_ESCAPE
}
RE_NON_WORD = re.compile(r'\W+')
def slugify(s):
if '/' in s:
return '/'.join(map(slugify, s.split('/')))
s = unidecode.unidecode(s).lower()
return RE_NON_WORD.sub('-', s)
def list_files(dir_path):
"""List all files recursively."""
for name in os.listdir(dir_path):
full_path = path.join(dir_path, name)
if path.isdir(full_path):
for name in list_files(full_path):
yield name
else:
yield full_path
def parse_yaml(data):
return ExtendedDict(load_yaml(data, Loader=YamlLoader))
def split_frontmatter(data):
m = re.match(r'\A---\s+^(.+?)$\s+---\s*(.*)\Z', data, re.M | re.S)
return (parse_yaml(m.group(1)), m.group(2)) if m else None
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
def format_date(format, date=None):
if date is None:
date = datetime.datetime.utcnow()
return date.strftime(format)
class ExtendedDict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
return None
def __setattr__(self, name, value):
self[name] = value
class CodeHtmlFormatter(HtmlFormatter):
def wrap(self, source, outfile):
return self._wrap_code(source)
def _wrap_code(self, source):
yield 0, '<pre class="highlight"><code>'
for i, t in source:
yield i, t
yield 0, '</code></pre>'
RE_WIKILINK = re.compile(r'(?<!\\)\[\[(.*?)(?:\s*\|\s*(.*?))?\]\]')
class ElyseRenderer(HtmlRenderer, SmartyPants):
elyse = None
def _render_str(self, text):
return text
def preprocess(self, document):
document = super().preprocess(document)
return self._render_str(document)
def _make_wikilink(self, match):
title, ident = match.groups()
if not title:
return 'NO TITLE'
return '<a href="%s">%s</a>' % (self.elyse._get_url(ident or title), title)
def postprocess(self, document):
document = super().postprocess(document)
return RE_WIKILINK.sub(self._make_wikilink, document)
def block_code(self, text, lang):
if not lang:
return '\n<pre class="highlight"><code>%s</code></pre>\n' % h.escape_html(text.strip())
lexer = get_lexer_by_name(lang, stripall=True)
formatter = CodeHtmlFormatter()
return highlight(text, lexer, formatter)
class Elyse(object):
def __init__(self, opts={}):
self.opts = opts
self.config = ExtendedDict()
self.opts['src'] = path.abspath(opts['src'])
self.opts['dest'] = path.abspath(opts['dest'])
# Check for configuration file
config_path = path.join(self.opts['src'], 'config.yml')
if path.exists(config_path):
print('>> Load config.yml')
with open(config_path, 'r') as fd:
self.config = parse_yaml(fd.read())
else:
print('>> config.yml not found')
# Data
self._data = ExtendedDict({
'posts': [],
'archive': OrderedDict(),
'tags': ExtendedDict()
})
# Template loader
self._loader = template.Loader(path.join(self.opts['src'], 'templates'), autoescape=None)
# Markdown
extensions = 0
if 'misaka_extensions' in self.config:
for name in self.config.misaka_extensions:
extensions |= misaka_extensions[name]
flags = 0
if 'misaka_html_flags' in self.config:
for name in self.config.misaka_html_flags:
flags |= misaka_html_flags[name]
renderer = ElyseRenderer(flags)
renderer.elyse = self
renderer._render_str = self._render_str
self._markdown = Markdown(renderer, extensions)
if flags ^ HTML_TOC:
flags |= HTML_TOC
renderer_toc = ElyseRenderer(flags)
renderer_toc.elyse = self
renderer_toc._render_str = self._render_str
self._toc = Markdown(HtmlTocRenderer(), extensions)
self._markdown_toc = Markdown(renderer_toc, extensions)
# Default template variables
self._defaults = {
'site': self.config,
'url': self._get_url,
'asset': self._get_asset,
'markdown': self._render_md,
'date': format_date
}
def _assets(self):
src = path.join(self.opts['src'], 'assets')
dest = path.join(self.opts['dest'], 'assets')
if not path.exists(src):
return
print('>> Assets')
shutil.copytree(src, dest)
def _posts(self):
src = path.join(self.opts['src'], 'posts')
dest = self.opts['dest']
if not path.exists(src):
return
print('>> Posts')
for fn in reversed(sorted(os.listdir(src))):
# Match: year-month-day-title.md
match = re.match(r'([0-9]{4})-([0-9]{2})-([0-9]{2})\s+(.*?)\.md', fn)
if not match:
continue
year, month, day, title = match.groups()
dest_dir = path.join(dest, year, month, day, slugify(title))
dest_fn = path.join(dest_dir, 'index.html')
# Render post
with open(path.join(src, fn), 'r') as fd:
raw_data = fd.read()
data = split_frontmatter(raw_data)
if data is None:
print('.. Invalid post: %s' % path.join(src, fn))
continue
post, content = data
if 'layout' not in post:
post.layout = 'post.html'
# Last modification date of the post
post.mdate = datetime.datetime.fromtimestamp(os.stat(path.join(src, fn)).st_mtime)
post.url = '%s' % '/'.join(match.groups())
post.date = datetime.date(*[int(n) for n in match.groups()[:-1]])
post.content = self._render_md(content)
output = self._render_tpl(post.layout, post=post)
# Write
os.makedirs(dest_dir)
with open(dest_fn, 'wb') as fd:
fd.write(output)
# Add to posts
self._data.posts.append(post)
# Add to archive
if 'archive_layout' in self.config:
if post.date.year not in self._data.archive:
self._data.archive[post.date.year] = []
self._data.archive[post.date.year].append(post)
# Add to tags
if 'tag_layout' in self.config:
for tag in post.tags:
if tag not in self._data.tags:
self._data.tags[tag] = ExtendedDict({
'title': tag,
'posts': []
})
self._data.tags[tag].posts.append(post)
def _pages(self):
src = path.join(self.opts['src'], 'pages')
dest = self.opts['dest']
if not path.exists(src):
return
print('>> Pages')
for fn in list_files(src):
name, ext = path.splitext(fn.replace(src, '', 1).strip('/\\'))
name = slugify(name)
with open(fn, 'r') as fd:
raw_data = fd.read()
if ext == '.md':
data = split_frontmatter(raw_data)
if data is None:
print('.. Invalid page: %s%s' % (name, ext))
continue
page, content = data
if 'layout' not in page:
print('.. No layout specified: %s%s' % (name, ext))
continue
if page.toc is True:
page.toc, page.content = self._render_md_toc(content)
else:
page.content = self._render_md(content)
output = self._render_tpl(page.layout, page=page,
posts=self._data.posts,
archive=self._data.archive,
tags=self._data.tags)
else:
output = self._render_str(raw_data,
posts=self._data.posts,
archive=self._data.archive,
tags=self._data.tags)
# Check if files need to placed in the root
if name in self.config.root:
if ext == '.md':
ext = '.html'
name = '%s%s' % (name, ext)
dest_fn = path.join(dest, name)
else:
if not path.exists(path.join(dest, name)):
os.makedirs(path.join(dest, name))
dest_fn = path.join(dest, name, 'index.html')
# Write page
with open(dest_fn, 'wb') as fd:
fd.write(output)
def _tags(self):
if not self._data.tags or 'tag_layout' not in self.config:
return
print('>> Tags')
dest = path.join(self.opts['dest'], 'tags')
for tag, data in self._data.tags.items():
output = self._render_tpl(self.config.tag_layout, tag=data)
os.makedirs(path.join(dest, tag))
with open(path.join(dest, tag, 'index.html'), 'wb') as fd:
fd.write(output)
def _archive(self):
if 'archive_layout' not in self.config:
return
print('>> Archive')
output = self._render_tpl(self.config.archive_layout,
archive=self._data.archive)
os.makedirs(path.join(self.opts['dest'], 'archive'))
with open(path.join(self.opts['dest'], 'archive', 'index.html'), 'wb') as fd:
fd.write(output)
def _pre_scripts(self):
if 'pre_scripts' not in self.config:
return
print('>> Pre-scripts')
for script in self.config.pre_scripts:
subprocess.Popen(path.join(self.opts['src'], 'scripts', script),
cwd=self.opts['dest'], shell=True)
def _post_scripts(self):
if 'post_scripts' not in self.config:
return
print('>> Post-scripts')
for script in self.config.post_scripts:
subprocess.Popen(path.join(self.opts['src'], 'scripts', script),
cwd=self.opts['dest'], shell=True)
def _get_asset(self, path):
return '%s/assets/%s' % (self.config.base_url, path)
def _get_url(self, path=''):
return '%s/%s' % (self.config.base_url, slugify(path))
def _render_md(self, data):
return self._markdown.render(data)
def _render_md_toc(self, data):
return (
self._toc.render(data),
self._markdown_toc.render(data)
)
def _render_toc(self, data):
return self._markdown_toc.render(data)
def _render_str(self, template_string, **kwargs):
defaults = self._defaults
defaults.update(kwargs)
t = template.Template(template_string, loader=self._loader, autoescape=None)
return t.generate(**defaults)
def _render_tpl(self, template, **kwargs):
defaults = self._defaults
defaults.update(kwargs)
return self._loader.load(template).generate(**defaults)
def generate(self):
# Cleanup before writing
if path.exists(self.opts['dest']):
for p in os.listdir(self.opts['dest']):
p = path.join(self.opts['dest'], p)
if path.isdir(p):
shutil.rmtree(p)
else:
os.remove(p)
# Generate~
self._pre_scripts()
self._assets()
self._posts()
self._pages()
self._tags()
self._archive()
self._post_scripts()
def main(args):
parser = ArgumentParser(description = 'A static blog generator.')
parser.add_argument('src', default='.', nargs='?', metavar='source',
help='the location %(prog)s looks for source files.')
parser.add_argument('dest', default='build', nargs='?',
metavar='destination', help='the location %(prog)s outputs to.')
opts = vars(parser.parse_args(args))
Elyse(opts).generate()
if __name__ == '__main__':
main(sys.argv[1:])