Initial implementation of issue #167

-Untested -Other source files have not been updated
AspenWeb · Apr 2, 2013 · 5955e35 · 5955e35
1 parent 433bf59
commit 5955e35
Show file tree

Hide file tree

Showing 6 changed files with 124 additions and 150 deletions.
diff --git a/aspen/resources/__init__.py b/aspen/resources/__init__.py
@@ -32,60 +32,70 @@
 import re
 import functools
 
-SPLITTER = '----+'
-
-def memoizing(func):
-    '''Decorator to make functions cache their return values
-    '''
-    cache = dict()
-    @functools.wraps(func)
-    def memoizing(*args):
-        if args not in cache: #args is a tuple, and useable as a dict key
-            cache[args] = func(*args)
-        return cache[args]
-    return memoizing
-
-@memoizing
-def suffixed(splitter):
-    return splitter + '.*\n'
-
-@memoizing
-def escaped(splitter):
-    return re.compile('^(/*)/(%s)' % suffixed(splitter), re.MULTILINE)
-
-@memoizing
-def splitting(splitter):
-    return re.compile('^' + suffixed(splitter), re.MULTILINE)
-
-def split(raw, splitter=SPLITTER):
-    '''Pure split method. This function defines the plain logic to split a
-    string into a list of strings via a splitter.
+#Paginate methods.
+#=================
+
+SPLITTER = '^\[----+\](?P<header>.*?)\n'
+ESCAPED_SPLITTER = '^/(/*)(\[----+\].*?\n)'
+SPECLINE = '^(.*?)\s*(?:via\s*(.*?))?$'
+
+SPLITTER = re.compile(SPLITTER, re.MULTILINE)
+ESCAPED_SPLITTER = re.compile(ESCAPED_SPLITTER, re.MULTILINE)
+SPECLINE = re.compile(SPECLINE, re.MULTILINE)
+
+class Page(object):
+    __slots__ = ('header', 'content', 'padding')
+
+    def __init__(self, content, header='', padding=0):
+        self.content = content
+        self.header = header
+        self.padding = padding
+
+    @property
+    def padded_content(self):
+        return ('\n' * self.padding) + self.content
+
+def split(raw):
+    '''Pure split generator. This function defines the plain logic to split a
+    string into a list of pages
     '''
-
-    splitter = splitting(splitter)
-    return splitting.split(raw)
-
-def escape(raw, splitter=SPLITTER):
+
+    current_index = 0
+
+    header = ''
+
+    for page_break in SPLITTER.finditer(raw):
+        content = raw[current_index:page_break.start()]
+        yield Page(content, header)
+        header = page_break.group('header').strip()
+        current_index = page_break.end()
+
+    if current_index == 0: #Hacky way to say "if no page_breaks were found"
+        yield Page(raw)
+
+def escape(content):
     '''Pure escape method. This function defines the logic to properly convert
     escaped splitter patterns in a string
     '''
-    escaper = escaped(splitter)
-    return escaper.sub('\1\2', raw)
+    return ESCAPED_SPLITTER.sub('\1\2', content)
 
-def split_and_escape(raw, splitter=SPLITTER):
-    '''This function defines the logic to split and escape a string. Escaping is
-    only performed if there are more than one pages from the split.
+def split_and_escape(raw):
+    '''This function defines the logic to split and escape a string.
     '''
-    pages = split(raw, splitter)
-    if len(pages) > 1:
-        pages = [escape(page, splitter) for page in pages]
-    return pages
-
+    for page in split(raw):
+        page.content = escape(page.content)
+        yield page
+
+def parse_specline(header):
+    '''Attempt to parse the header in a page returned from split(...) as a
+    specline. Returns a tuple (content_type, renderer)
+    '''
+    return SPECLINE.match(header).groups('')
+
 def can_split(raw, splitter=SPLITTER):
     '''Determine if a text block would be split by a splitter
     '''
-    return splitting(splitter).search(raw) is not None
-
+    return bool(SPLITTER.search(raw))
 
 from aspen.exceptions import LoadError
 from aspen.resources.json_resource import JSONResource
@@ -168,9 +178,12 @@ def get_resource_class(filename, raw, media_type):
         # and I've actually seen, in the wild, a file with exactly twos. So
         # we sniff the first few bytes.
 
-        def s(x):
-            return raw.startswith(x)
-        is_dynamic = s('"""') or s('import') or s('from')
+        #def s(x):
+        #    return raw.startswith(x)
+        #is_dynamic = s('"""') or s('import') or s('from')
+
+        #Testing for a regex match should be reliable enough, even in a binary
+        is_dynamic = can_split(raw)
 
     if not is_dynamic:
         Class = StaticResource

diff --git a/aspen/resources/dynamic_resource.py b/aspen/resources/dynamic_resource.py
@@ -1,5 +1,5 @@
 from aspen import Response
-from aspen.resources import split_and_escape
+from aspen.resources import split_and_escape, Page
 from aspen.resources.resource import Resource
 
 
@@ -80,8 +80,9 @@ def parse_into_pages(self, raw):
 
         """
 
-        pages = split_and_escape(raw)
+        pages = list(split_and_escape(raw))
         npages = len(pages)
+
 
         # Check for too few pages. This is a sanity check as get_resource_class
         # should guarantee this. Bug if it fails.
@@ -101,88 +102,74 @@ def parse_into_pages(self, raw):
         return pages
 
     def compile_pages(self, pages):
-        """Given a list of bytestrings, replace the bytestrings with objects.
+        """Given a list of pages, replace the pages with objects.
 
         All dynamic resources compile the first two pages the same way. It's
         the third and following pages that differ, so we require subclasses to
         supply a method for that: compile_page.
 
         """
 
-        # Standardize newlines.
-        # =====================
-        # compile requires \n, and doing it now makes the next line easier. In
-        # general it's nice to standardize this, I think. XXX Should we be
-        # going back to \r\n for the wire? That's HTTP, right?
-
-        pages = [page.replace('\r\n', '\n') for page in pages]
-
-        one = pages[0]
-        two = pages[1]
-
-
-        # Compute paddings and pad the second and third pages.
+        # Compute paddings
         # ====================================================
-        # This is so we get accurate tracebacks. We pass padding to the
-        # compile_page hook; the SocketResource subclass uses it, since it has
-        # an additional logic page that it wants to pad. We don't simply pad
-        # all pages because then for content pages the user would view source
-        # in their browser and see nothing but whitespace until they scroll way
-        # down.
-
-        paddings = self._compute_paddings(pages)
-        two = paddings[1] + two
-
+        # This is so we get accurate tracebacks.
+        for page, padding in zip(pages, self._compute_paddings(pages)):
+            page.padding = padding
 
         # Exec the first page and compile the second.
         # ===========================================
+
+        one = pages[0]
+        two = pages[1]
 
         context = dict()
         context['__file__'] = self.fs
         context['website'] = self.website
 
-        one = compile(one, self.fs, 'exec')
+        one = compile(one.padded_content, self.fs, 'exec')
         exec one in context    # mutate context
         one = context          # store it
 
-        two = compile(two, self.fs, 'exec')
+        two = compile(two.padded_content, self.fs, 'exec')
 
         pages[0] = one
         pages[1] = two
 
 
         # Subclasses are responsible for the rest.
         # ========================================
-
-        for i, page in enumerate(pages[2:]):
-            i += 2  # no start kw to enumerate in Python 2.5
-            pages[i] = self.compile_page(page, paddings[i])
-
-        pages[2:] = []
+
+        pages[2:] = (self.compile_page(page) for page in pages[2:])
 
         return pages
-
-
+    
+    @staticmethod
     def _compute_paddings(pages):
-        """Given a list of bytestrings, return a 1-shorter list of bytestrings.
+        """Given a list of pages, return a list of paddings, such that the line
+        numbers for each page are correct to the original source file when the
+        paddings are applied
         """
-        if not pages:
-            return []
-
-        # A file with many, many lines would flog this algorithm.
-        lines_in = lambda s: '\n' * s.count('\n')
-        paddings = ['']  # first page doesn't need padding
-        paddings += [paddings[-1] + lines_in(page) for page in pages[:-1]]
-        return paddings
-
-    _compute_paddings = staticmethod(_compute_paddings)
-
+
+        lines = 0
+
+        for page in pages:
+            yield lines
+            lines += (page.content.count('\n') + 1)
+
+    @staticmethod
+    def _prepend_empty_pages(pages, min_length):
+        """Given a list of pages, and a min length, prepend blank pages to the
+        list until it is at least as long as min_length
+        """
+        num_extra_pages = min_length - len(pages)
+        #Note that range(x) returns an empty list if x < 1
+        pages[0:0] = (Page() for _ in range(num_extra_pages))
 
     # Hooks
     # =====
 
     def compile_page(self, *a):
-        """Given a bytestring, return an object.
+        """Given a page, return an object.
         """
         raise NotImplementedError
 

diff --git a/aspen/resources/json_resource.py b/aspen/resources/json_resource.py
@@ -7,12 +7,6 @@ class JSONResource(DynamicResource):
     min_pages = 2
     max_pages = 2
 
-    def compile_page(self, page, padding):
-        """Given None, return None. JSON resources have no third page.
-        """
-        assert page is None, page  # sanity check
-        return None
-
     def process_raised_response(self, response):
         """Given a response, mutate it as needed.
         """

diff --git a/aspen/resources/negotiated_resource.py b/aspen/resources/negotiated_resource.py
@@ -23,12 +23,12 @@
 
 from aspen import Response
 import mimeparse
-from aspen.resources import PAGE_BREAK
 from aspen.resources.dynamic_resource import DynamicResource
 from aspen.utils import typecheck
+from aspen.resources import parse_specline
 
 
-renderer_re = re.compile(r'#![a-z0-9.-]+')
+renderer_re = re.compile(r'[a-z0-9.-]+')
 media_type_re = re.compile(r'[A-Za-z0-9.+*-]+/[A-Za-z0-9.+*-]+')
 
 
@@ -46,17 +46,11 @@ def __init__(self, *a, **kw):
         DynamicResource.__init__(self, *a, **kw)
 
 
-    def compile_page(self, page, __ignored):
-        """Given a bytestring, return a (renderer, media type) pair.
+    def compile_page(self, page):
+        """Given a bytestring, return a (render, media type) pair.
         """
-        if '\n' in page:
-            specline, raw = page.split('\n', 1)
-        else:
-            specline = ''
-            raw = page
-        specline = specline.strip(PAGE_BREAK + ' \n')
-        make_renderer, media_type = self._parse_specline(specline)
-        render = make_renderer(self.fs, raw)
+        make_render, media_type = self._parse_specline(page.header)
+        render = make_render(self.fs, page.content)
         if media_type in self.renderers:
             raise SyntaxError("Two content pages defined for %s." % media_type)
 
@@ -66,7 +60,6 @@ def compile_page(self, page, __ignored):
 
         return (render, media_type)  # back to parent class
 
-
     def get_response(self, context):
         """Given a context dict, return a response object.
         """
@@ -107,13 +100,12 @@ def get_response(self, context):
 
         return response
 
-
     def _parse_specline(self, specline):
         """Given a bytestring, return a two-tuple.
 
         The incoming string is expected to be of the form:
 
-            ^L #!renderer media/type
+            media_type via renderer
 
         The renderer is optional. It will be computed based on media type if
         absent. The return two-tuple contains a render function and a media
@@ -123,26 +115,14 @@ def _parse_specline(self, specline):
 
         """
         typecheck(specline, str)
-        if specline == "":
-            raise SyntaxError("Content pages in negotiated resources must "
-                              "have a specline.")
-
-        # Parse into one or two parts.
-        parts = specline.split()
-        nparts = len(parts)
-        if nparts not in (1, 2):
-            raise SyntaxError("A negotiated resource specline must have one "
-                              "or two parts: #!renderer media/type. Yours is: "
-                              "%s." % specline)
-
-        # Assign parts.
-        if nparts == 1:
-            media_type = parts[0]
+
+        # Parse into parts
+        parts = parse_specline(specline)
+
+        #Assign parts
+        media_type, renderer = parts
+        if renderer == '':
             renderer = self.website.default_renderers_by_media_type[media_type]
-            renderer = "#!" + renderer
-        else:
-            assert nparts == 2, nparts
-            renderer, media_type = parts
 
         # Validate media type.
         if media_type_re.match(media_type) is None:
@@ -167,7 +147,7 @@ def _get_renderer_factory(self, media_type, renderer):
             msg = ("Malformed renderer %s. It must match %s. Possible "
                    "renderers (might need third-party libs): %s.")
             raise SyntaxError(msg % (renderer, renderer_re.pattern, possible))
-        renderer = renderer[2:]  # strip off the hashbang
+
         renderer = renderer.decode('US-ASCII')
 
         factories = self.website.renderer_factories