Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ETag and Last-Modified #130

Merged
merged 5 commits into from Oct 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
88 changes: 82 additions & 6 deletions publ/caching.py
@@ -1,10 +1,19 @@
# caching.py
""" Useful caching functions """

import os
import hashlib
import arrow

from flask_caching import Cache
from flask import request
from pony import orm

from . import config
from . import index
from . import utils
from . import queries
from . import model

cache = Cache() # pylint: disable=invalid-name

Expand All @@ -14,11 +23,78 @@ def init_app(app):
cache.init_app(app, config=config.cache)


def make_category_key():
""" Key generator for categories """
return 'category/' + request.full_path
def do_not_cache():
""" Return whether we should cache a page render """

if index.in_progress():
# We are reindexing the site
return True

if request.if_none_match or request.if_modified_since:
# we might be returning a 304 NOT MODIFIED based on a client request,
# and we don't want to cache that as the result for *all* client
# requests to this URI
return True

return False


def get_cache_tag(file, mtime):
""" Get the ETag,Last-Modified for a file """
etag = hashlib.md5(utils.file_fingerprint(
file).encode('utf-8')).hexdigest()[:16]
return etag, mtime


def get_view_cache_tag(template, entry=None):
""" Get a pessimistic cache tag for a view

Arguments:

template -- the template file being used to render
entry -- the entry to use; defaults to the most recently-published entry

Returns (etag,last-modified)
"""

candidates = []

# If no entry is specified, check the most recently indexed file
if index.last_modified.file:
candidates.append(index.last_modified())

# check the template file
candidates.append((template.mtime, template.file_path))

if entry:
# Use the entry in question
entry_file = entry.file_path
candidates.append((os.stat(entry_file).st_mtime, entry_file))

# check the most recently-published entry (even on entry views, since this
# can affect prev/next links)
with orm.db_session:
last_published = queries.build_query({}).order_by(
orm.desc(model.Entry.utc_date))[:1]
if last_published:
# We actually want the publish time, not the file modification time
last_entry = last_published[0]
last_pubtime = arrow.get(last_entry.utc_date).timestamp
candidates.append((last_pubtime, last_entry.file_path))

last_mtime, last_file = max(candidates)
return get_cache_tag(last_file, last_mtime)


def not_modified(etag, mtime):
""" Return True if the request indicates that the client's cache is valid """

if request.if_none_match.contains(etag):
return True

if request.if_modified_since:
mod_time = arrow.get(int(mtime))
if request.if_modified_since >= mod_time:
return True

def make_entry_key():
""" Key generator for entries """
return 'entry/' + request.path
return False
12 changes: 12 additions & 0 deletions publ/index.py
Expand Up @@ -27,6 +27,13 @@
WORK_QUEUE = getattr(THREAD_POOL, '_work_queue', None)


def last_modified():
""" information about the most recently scanned file """
return last_modified.mtime, last_modified.file
last_modified.file = None
last_modified.mtime = None


def queue_length():
""" Get the approximate length of the indexer work queue """
return WORK_QUEUE.qsize() if WORK_QUEUE else None
Expand Down Expand Up @@ -76,6 +83,11 @@ def do_scan():
elif result:
set_fingerprint(fullpath)

mtime = os.stat(fullpath).st_mtime
if not last_modified.mtime or mtime > last_modified.mtime:
last_modified.mtime = mtime
last_modified.file = fullpath


@orm.db_session
def get_last_fingerprint(fullpath):
Expand Down
22 changes: 18 additions & 4 deletions publ/rendering.py
Expand Up @@ -6,6 +6,7 @@
import os
import logging
import base64
import email.utils

import flask
from flask import request, redirect, render_template, url_for
Expand Down Expand Up @@ -196,7 +197,7 @@ def render_path_alias(path):
return redir


@cache.cached(key_prefix=caching.make_category_key, unless=index.in_progress)
@cache.cached(key_prefix="category/%s", query_string=True, unless=caching.do_not_cache)
@orm.db_session
def render_category(category='', template=None):
""" Render a category page.
Expand Down Expand Up @@ -241,6 +242,11 @@ def render_category(category='', template=None):
# nope, we just don't know what this is
raise http_error.NotFound("No such view")

# We now know what's going to be rendered, let's get a caching tag for it
etag, last_modified = caching.get_view_cache_tag(tmpl)
if caching.not_modified(etag, last_modified):
return 'Not modified', 304

view_spec = {'category': category}
if 'date' in request.args:
view_spec['date'] = request.args['date']
Expand All @@ -251,10 +257,12 @@ def render_category(category='', template=None):
return render_publ_template(
tmpl,
category=Category(category),
view=view_obj), {'Content-Type': mime_type(tmpl)}
view=view_obj), {'Content-Type': mime_type(tmpl),
'ETag': etag,
'Last-Modified': email.utils.formatdate(last_modified)}


@cache.cached(key_prefix=caching.make_entry_key, unless=index.in_progress)
@cache.cached(key_prefix="entry/%s", query_string=True, unless=caching.do_not_cache)
@orm.db_session
def render_entry(entry_id, slug_text='', category=''):
""" Render an entry page.
Expand Down Expand Up @@ -331,10 +339,16 @@ def render_entry(entry_id, slug_text='', category=''):
if not tmpl:
raise http_error.BadRequest("Missing entry template")

etag, last_modified = caching.get_view_cache_tag(tmpl, record)
if caching.not_modified(etag, last_modified):
return 'Not modified', 304

return render_publ_template(
tmpl,
entry=entry_obj,
category=Category(category)), {'Content-Type': mime_type(tmpl)}
category=Category(category)), {'Content-Type': mime_type(tmpl),
'ETag': etag,
'Last-Modified': email.utils.formatdate(last_modified)}


def render_transparent_chit():
Expand Down
4 changes: 3 additions & 1 deletion publ/template.py
Expand Up @@ -20,7 +20,9 @@ def __init__(self, name, filename, file_path):
"""
self.name = name
self.filename = filename
self.last_modified = arrow.get(os.stat(file_path).st_mtime)
self.file_path = file_path
self.mtime = os.stat(file_path).st_mtime
self.last_modified = arrow.get(self.mtime)

def __str__(self):
return self.name