Skip to content

Commit

Permalink
fully refactored feed.xml generation
Browse files Browse the repository at this point in the history
feed.xml will for now contain entries corresponding to all
blog posts. Closes #29.

Both feed.xml and the various atoms/*.xml files are believed
to be reasonably compliant Atom. Closes #33.
  • Loading branch information
BartMassey committed Aug 26, 2022
1 parent a07c748 commit ce33615
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 109 deletions.
10 changes: 5 additions & 5 deletions gitatom/__main__.py
Expand Up @@ -30,11 +30,11 @@ def on_commit(mds):
files = []
for md in mds:
# make feed entry file
outname, feedfile = build.atomify("file", md=md)
outfile = open('./atoms/' + outname, 'w')
outfile.write(feedfile)
outfile.close()
files.append('atoms/' + outname)
feedfile = build.gen_feedfile(md)
outname = 'atoms/' + build.filepart(md) + '.xml'
with open(outname, 'w') as outfile:
outfile.write(feedfile)
files.append(outname)

html = build.build_it()
for f in html:
Expand Down
211 changes: 107 additions & 104 deletions gitatom/build.py
Expand Up @@ -67,6 +67,10 @@ def split_content(content):
body = fields[2]
return title, body

# get the non-extension part of the basename of p
def filepart(p):
return path.splitext(path.basename(p))[0]

RAW_TEXT_RE = re.compile("<[^>]*>([^<]*)<[^>]*>\n", flags=re.M)

# extract text from ElementTree leaf preserving entities.
Expand All @@ -78,134 +82,120 @@ def raw_text(node):
assert match is not None, f"internal error: failed match on '{text}'"
return match[1]


# Set up a from-scratch configuration from the config data.
def get_atom_config():
# Provides: site_url, now
def new_config():
cfg = config.load_into_dict()
if 'site_url' not in cfg:
cfg['site_url'] = f"http://{feed_id}"
cfg['site_url'] = f"http://{cfg['feed_id']}"
cfg['now'] = f"{datetime.utcnow().isoformat(timespec='seconds')}Z"
return cfg

def cfg_from_atom(cfg, md):
# Update a config from an atom file if possible; otherwise
# from the source md file.
# Requires: feed_id, entry_filename, now
# Provides: entry_id, entry_published, entry_updated
def update_cfg_from_atom(cfg):
cfg['entry_id'] = cfg['feed_id'] + '/' + cfg['entry_filename']

# Check for a matching xml file
atompath = './atoms/'
atom_filename = './atoms/' + cfg['entry_filename'] + '.xml'
# should only ever return 0-1 matches
exists = glob.glob(atompath + outname)
exists = Path(atom_filename).is_file()
if exists:
# atom file already exists so update instead of create new
# overwrite existing file
s = slice(len(atompath), len(exists[0])) # use path length to slice
# check slice path against existing file name
assert outname == exists[0][s], "weird match fail"
# retain existing publish date
tree = ET.parse(atompath + outname)
# atom file already exists so retain existing
# publish date
tree = ET.parse(atom_filename)
root = tree.getroot()
entry = root.find('{*}entry')
assert entry is not None, f"{outname}: no entry: {list(root)}"
published = entry.find('{*}published')
assert published is not None, f"{outname}: no published: {list(entry)}"
entry_published = published.text
entry_updated = now
cfg['entry_published'] = published.text
else:
# atom file didnt exist yet
# use current time
entry_published = now
entry_updated = entry_published
feed_updated = entry_updated
cfg['entry_published'] = cfg['now']
cfg['entry_updated'] = cfg['now']
return cfg

# Generate an Atom Feed Entry
# Requires: entry_title, entry_author, entry_id,
# entry_published, entry_updated, entry_link,
# entry_content_tag, entry_content
def gen_entry(cfg):
# Create entry
entry = '<entry>\n'
entry += '<title>' + cfg['entry_title'] + '</title>\n'
entry += '<author><name>' + cfg['entry_author'] + '</name></author>\n'
entry += '<id>' + cfg['entry_id'] + '</id>\n'
entry += '<published>' + cfg['entry_published'] + '</published>\n'
entry += '<updated>' + cfg['entry_updated'] + '</updated>\n'
if 'entry_link' in cfg:
entry += cfg['entry_link'] + '\n'
entry += cfg['entry_content_tag'] + '\n'
entry += cfg['entry_content']
entry += '</content>\n'
entry += '</entry>\n'
return entry

# generate a feed header
def feed_header(cfg):
# generate atom feed header
# Requires: feed_title, feed_id, now
def gen_atom_header(cfg):
header = '<?xml version="1.0" encoding="utf-8"?>\n'
header += '<feed xmlns="http://www.w3.org/2005/Atom">\n'
header += f'<title>{cfg["feed_title"]}</title>\n'
header += f'<id>{cfg["feed_id"]}</id>\n'
header += '<link rel="self" type="application/atom+xml" '
header += f'href="{cfg["site_url"]}/feed.atom"/>\n'
header += '<title>' + cfg["feed_title"] + '</title>\n'
header += '<id>' + cfg["feed_id"] + '</id>\n'
header += '<updated>' + cfg["now"] + '</updated>\n'
header += '<generator uri="https://github.com/GitAtom-PDX/GitAtom">'
header += 'GitAtom</generator>\n'
header += f'<updated>{cfg["now"]}</updated>\n'
return header

# Generate an Atom Feed File or Feed File header or Feed File entry.
def xatomify(outtype, md=None):
assert outtype in {"file", "header", "entry"}, \
f"internal error: unknown atom output type {outtype}"
def gen_atom_footer(cfg):
return "</feed>\n"



content_title, content_body = split_content(content)

# Create entry
entry = '<entry>\n'
if outtype == "file":
entry += '<title>' + entry_filename + '</title>\n'
elif outtype == "entry":
entry += '<title>' + content_title + '</title>\n'
else:
assert False, f"internal error: unknown atom content_type {outtype}"
entry += '<author><name>' + author + '</name></author>\n'
entry += '<id>' + entry_id + '</id>\n'
entry += '<published>' + entry_published + '</published>\n'
entry += '<updated>' + entry_updated + '</updated>\n'
# https://stackoverflow.com/a/66029848/364875
if outtype == "file":
entry += '<content type="text/markdown; charset=UTF-8; variant=GFM">'
entry += content
entry += '</content>\n'
elif outtype == "entry":
entry += f'<link href="{site_url}/posts/{entry_filename}.html">\n'
entry += f'<content type="xhtml" xml:base="{site_url}">'
html_content = cmarkgfm.markdown_to_html(
content_body,
options = cmarkgfm_options.CMARK_OPT_UNSAFE,
)
entry += html_content
entry += '</content>\n'
else:
assert False, f"internal error: unknown atom content type {outtype}"
entry += '</entry>\n'

if outtype == "entry":
return entry

if outtype == "file":

assert False, f"internal error: unknown atom type {outtype}"

# HERE

def gen_feed_file(cfg):
feed = '<?xml version="1.0" encoding="UTF-8"?>\n'
feed += '<feed xmlns="http://www.w3.org/2005/Atom">\n'
feed += '<title>' + cfg['feed_title'] + '</title>\n'
feed += '<updated>' + cfg['feed_updated'] + '</updated>\n'
feed += '<id>' + cfg['feed_id'] + '</id>\n'
feed += entry
feed += '</feed>\n'
return outname, feed

def atomify(md):
# generate Atom Feed content string
def gen_feedfile(md):
# Check for invalid filetype
assert md.endswith('.md'), f"Non .md file {md}"

# Get a base config
cfg = new_config()

# Read and escape content.
with open (md,'r') as f:
cfg['entry_filename'] = filepart(md)
with open(md, 'r') as f:
raw_content = f.read()
content = escape.escape(raw_content)
cfg['entry_content'] = content

# Update cfg for entry.
cfg = update_cfg_from_atom(cfg)
cfg['entry_title'] = cfg['entry_filename']
cfg['entry_author'] = cfg['author']
site_url = cfg['site_url']
entry_filename = cfg['entry_filename']
cfg['entry_link'] = f'<link href="{site_url}/posts/{entry_filename}.html"/>'
cfg['entry_content_tag'] = \
'<content type="text/markdown; charset=UTF-8; variant=GFM">'

# Generate feed file contents.
feed = gen_atom_header(cfg)
feed += gen_entry(cfg)
feed += gen_atom_footer(cfg)

return feed

# generate Atom feed header
# Requires: gen_atom_header(), site_url
def gen_feed_header(cfg):
header = gen_atom_header(cfg)
header += '<link rel="self" type="application/atom+xml" '
header += f'href="{cfg["site_url"]}/feed.atom"/>\n'
return header

cfg = get_atom_config()
cfg['content'] = content

# Get path info.
# TODO make os-agnostic
cfg['entry_filename'] = path.splitext(path.basename(md))[0]
cfg['outname'] = cfg['entry_filename'] + '.xml'
outname, feed = gen_feed_file(cfg)
return outname, feed
def gen_feed_footer(cfg):
return gen_atom_footer(cfg)

# scan, render and write blog content
def build_it():
Expand Down Expand Up @@ -234,26 +224,25 @@ def build_it():
# atoms to be scanned and rendered
atoms = list(atoms_dir.glob('*.xml'))
# posts to be sorted and rendered on blog template
# and in Atom feed
posts = list()
# posts to be sorted and rendered on archive template
archives = list()
# generated html files to be returned
files = list()
# markdown bodies to be added to feed.
contents = list()

# read atoms and render individual entries, add to lists
for atom in atoms:
# extract relevant information from atom feed file
tree = ET.parse(atom)
root = tree.getroot()
entry = root.find('{*}entry')
atom_id = entry.find('{*}id')
atom_content_raw = raw_text(entry.find('{*}content'))
atom_content = escape.unescape(atom_content_raw)
atom_updated = entry.find('{*}updated')
atom_published = entry.find('{*}published')

contents.append(atom_content)
atom_filename = filepart(atom)

post_title, post_markdown = split_content(atom_content)
post_body = cmarkgfm.markdown_to_html(
Expand Down Expand Up @@ -285,6 +274,8 @@ def build_it():
'title' : post_title,
'body' : post_body,
'link' : post_link,
'id' : atom_id.text,
'filename' : atom_filename,
}
posts.append(post)

Expand Down Expand Up @@ -348,13 +339,25 @@ def build_it():

# write feed.xml
with open("./site/feed.xml", 'w') as feed:
atomheader = atomify("header")
feed.write(atomheader)
for md in contents:
# add entry to feed file
atomentry = atomify("entry", md=md)
feed.write(atomentry)
feed.write('</feed>\n')
cfg = new_config()
feed_header = gen_feed_header(cfg)
feed.write(feed_header)
for post in posts:
cfg['entry_title'] = post['title']
cfg['entry_author'] = cfg['author']
cfg['entry_id'] = post['id']
cfg['entry_published'] = post['published']
cfg['entry_updated'] = post['updated']
site_url = cfg['site_url']
link_url = site_url + '/' + post['filename'] + '.html'
cfg['entry_link'] = link_url
cfg['entry_content'] = post['body']
cfg['entry_content_tag'] = \
f'<content type="xhtml" xml:base="{site_url}">'
feed_entry = gen_entry(cfg)
feed.write(feed_entry)
feed_footer = gen_feed_footer(cfg)
feed.write(feed_footer)
files.append('site/feed.xml')

# write blog and archive html files, build files list
Expand Down

0 comments on commit ce33615

Please sign in to comment.