Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stricter handling of input #5

Merged
merged 5 commits into from May 6, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.txt
Expand Up @@ -4,6 +4,8 @@ Changes
Next release
------------

- New exception type: ``ParseError``, raised when the input is malformed.

- Peppercorn will no longer run under Python 2.4. 2.5 or better is required.

- Python 3.2 compatibility.
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Expand Up @@ -103,3 +103,4 @@ Contributors

- Chris McDonough, 2011/02/16
- Atsushi Odagiri, 2012/02/05
- Valentin Ungureanu, 2012/02/06
2 changes: 2 additions & 0 deletions docs/api.rst
Expand Up @@ -4,3 +4,5 @@ API Documentation
.. automodule:: peppercorn

.. autofunction:: parse

.. autoexception:: ParseError
16 changes: 16 additions & 0 deletions docs/index.rst
Expand Up @@ -144,6 +144,22 @@ also be used as a source of information:

peppercorn.parse(fields)

It is possible to hand-craft input that cannot be parsed into a data
structure. In such cases :func:`peppercorn.parse` will raise
:exc:`peppercorn.ParseError`:

.. code-block:: python
:linenos:
fields = [
('name', 'project1'),
('__start__', 'bogus:marker'),
('title', 'Cool project'),
]
try:
pstruct = peppercorn.parse(fields)
except peppercorn.ParseError:
raise HTTPBadRequest()

.. toctree::
:maxdepth: 2

Expand Down
121 changes: 78 additions & 43 deletions peppercorn/__init__.py
@@ -1,55 +1,90 @@
import functools
from peppercorn.compat import next

def data_type(value):
if ':' in value:
return [ x.strip() for x in value.rsplit(':', 1) ]
return ('', value.strip())

START = '__start__'
END = '__end__'
SEQUENCE = 'sequence'
MAPPING = 'mapping'
RENAME = 'rename'

def stream(next_token_gen, token):

class ParseError(Exception):
"""
thanks to the effbot for
http://effbot.org/zone/simple-iterator-parser.htm
An exception raised when the input is malformed.
"""
op, data = token
if op == START:
name, typ = data_type(data)
out = []
if typ in (SEQUENCE, MAPPING, RENAME):
if typ in (SEQUENCE, RENAME):
out = []
add = lambda x, y: out.append(y)
else:
out = {}
add = out.__setitem__
token = next_token_gen()
op, data = token
while op != END:
key, val = stream(next_token_gen, token)
add(key, val)
token = next_token_gen()
op, data = token
if typ == RENAME:
if out:
out = out[0]
else:
out = ''
return name, out
else:
raise ValueError('Unknown stream start marker %s' % repr(token))


class RenameList(list): pass


_COLLECTION_TYPES = {
SEQUENCE: list,
MAPPING: dict,
RENAME: RenameList,
}


def data_type(marker):
"""Extract the name and the data type from a start marker.

Return the name and a collection instance.
"""
if ':' in marker:
name, typ = [ x.strip() for x in marker.rsplit(':', 1) ]
else:
return op, data
name = ''
typ = marker.strip()
try:
collection = _COLLECTION_TYPES[typ]()
except KeyError:
raise ParseError('Unknown stream start marker %s' % marker)
return name, collection


def parse(fields):
""" Infer a data structure from the ordered set of fields and
return it."""
fields = [(START, MAPPING)] + list(fields) + [(END,'')]
src = iter(fields)
result = stream(functools.partial(next, src), next(src))[1]
return result
return it.

A :exc:`ParseError` is raised if a data structure can't be inferred.
"""
stack = [{}]

def add_item(name, value):
"""Add an item to the last collection in the stack"""
current = stack[-1]
if isinstance(current, dict):
current[name] = value
else:
current.append(value)

for op, data in fields:
if op == START:
name, collection = data_type(data)
add_item(name, collection)
# Make future calls to `add_item` work on this collection:
stack.append(collection)
elif op == END:
# A start marker should have been encountered by now.
# Each start marker grows the stack.
# If this is not the case, then the end marker is illegal.
if len(stack) < 2:
raise ParseError('Spurious stream end marker')
collection = stack.pop()
# Replace all instances of RenameList with their first item.
# Note that the stack still contains a reference to the same
# collection object, so the value present in the stack is
# updated by mutating this collection.
if isinstance(collection, dict):
items = collection.items()
else:
items = enumerate(collection)
rename_info = []
for key, value in items:
if isinstance(value, RenameList):
rename_info.append((key, value[0] if value else ''))
for key, value in rename_info:
collection[key] = value
else:
add_item(op, data)

if len(stack) > 1:
raise ParseError('Unclosed sequence')

return stack[0]
8 changes: 0 additions & 8 deletions peppercorn/compat.py
@@ -1,10 +1,2 @@
import sys
PY3 = sys.version_info[0] == 3

try:
next = next
except NameError: # pragma: no cover
# for Python 2.5
def next(gen):
return gen.next()

72 changes: 69 additions & 3 deletions peppercorn/tests.py
Expand Up @@ -85,12 +85,12 @@ def test_fieldstorage(self):
self._assertFieldsResult(result)

def test_bad_start_marker(self):
from peppercorn import START
from peppercorn import START, ParseError
fields = [
(START, 'something:unknown'),
]

self.assertRaises(ValueError, self._callFUT, fields)
self.assertRaises(ParseError, self._callFUT, fields)

def test_unnamed_start_marker(self):
from peppercorn import START, END, MAPPING
Expand Down Expand Up @@ -129,7 +129,73 @@ def test_rename_no_subelements(self):

result = self._callFUT(fields)
self.assertEqual(result, {'': {'name':''}})


def test_unclosed_sequence(self):
from peppercorn import START, MAPPING, ParseError
fields = [
('name', 'fred'),
(START, 'series:%s' % MAPPING),
]
self.assertRaises(ParseError, self._callFUT, fields)

def test_deep_nesting(self):
import sys
from peppercorn import START, END, MAPPING
depth = sys.getrecursionlimit()
# Create a valid input nested deeper than the recursion limit:
fields = [(START, 'x:' + MAPPING)] * depth + [(END, '')] * depth
result = self._callFUT(fields)

temp = data = {}
for _ in range(depth):
temp['x'] = temp = {}

# Don't do self.assertEqual(result, data) here as
# this will exceed the recursion limit:
deep_ok = repr(result) == repr(data)
self.assertTrue(deep_ok, 'deep nesting failed')

def test_spurios_initial_end(self):
from peppercorn import END, ParseError
fields = [
(END, ''),
('name', 'fred'),
]
self.assertRaises(ParseError, self._callFUT, fields)

def test_spurious_intermediary_end(self):
from peppercorn import START, SEQUENCE, END, ParseError
fields = [
(START, 'names:%s' % SEQUENCE),
('foo', 'fred'),
(END, ''),
(END, ''),
('bar', 'joe'),
('year', '2012'),
]
self.assertRaises(ParseError, self._callFUT, fields)

def test_spurious_nested_end(self):
from peppercorn import END, ParseError
fields = self._getFields()
index = fields.index(('month', '12'))
self.assertEqual(index, 7)
fields.insert(7, (END, ''))
fields.insert(7, (END, ''))
fields.insert(7, (END, ''))
self.assertRaises(ParseError, self._callFUT, fields)

def test_spurious_final_end(self):
from peppercorn import START, RENAME, END, ParseError
fields = [
(START, 'names:%s' % RENAME),
('foo', 'fred'),
('bar', 'joe'),
(END, ''),
(END, ''),
]
self.assertRaises(ParseError, self._callFUT, fields)


def encode_multipart_formdata(fields):
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
Expand Down