Defines a new syntax for fileinputs

OneGov · May 29, 2015 · 96270e5 · 96270e5
1 parent 57b0dd3
commit 96270e5
Show file tree

Hide file tree

Showing 8 changed files with 140 additions and 10 deletions.
diff --git a/onegov/form/core.py b/onegov/form/core.py
@@ -1,12 +1,29 @@
 import inspect
+import magic
 import weakref
 
 from collections import OrderedDict
 from itertools import groupby
+from onegov.form.errors import InvalidMimeType
 from operator import itemgetter
+from mimetypes import types_map
 from wtforms import Form as BaseForm
 
 
+default_whitelist = {
+    'application/excel',
+    'application/vnd.ms-excel',
+    'application/msword',
+    'application/pdf',
+    'application/zip',
+    'image/gif',
+    'image/jpeg',
+    'image/png',
+    'image/x-ms-bmp',
+    'text/plain',
+}
+
+
 class Form(BaseForm):
     """ Extends wtforms.Form with useful methods and integrations needed in
     OneGov applications.
@@ -77,6 +94,27 @@ def submitted(self, request):
         """ Returns true if the given request is a successful post request. """
         return request.POST and self.validate()
 
+    def load_file(self, request, field_id, whitelist=default_whitelist):
+        """ Loads the given input field from the request, making sure it's
+        mimetype matches the extension and is found in the mimetype whitelist.
+
+        """
+
+        field = getattr(self, field_id)
+        file_ext = '.' + field.data.split('.')[-1]
+        file_data = request.FILES[field.name].read()
+
+        mimetype_by_extension = types_map.get(file_ext, '0xdeadbeef')
+        mimetype_by_introspection = magic.from_buffer(file_data)
+
+        if mimetype_by_extension != mimetype_by_introspection:
+            raise InvalidMimeType()
+
+        if mimetype_by_introspection not in whitelist:
+            raise InvalidMimeType()
+
+        return file_data
+
 
 class Fieldset(object):
     """ Defines a fieldset with a list of fields. """

diff --git a/onegov/form/errors.py b/onegov/form/errors.py
@@ -2,10 +2,14 @@ class FormError(Exception):
     pass
 
 
-class DuplicateLabelError(Exception):
+class DuplicateLabelError(FormError):
 
     def __init__(self, label):
         self.label = label
 
     def __repr__(self):
         return "DuplicateLabelError(label='{}')".format(self.label)
+
+
+class InvalidMimeType(FormError):
+    pass
diff --git a/onegov/form/parser/core.py b/onegov/form/parser/core.py
@@ -243,6 +243,7 @@
     email,
     field_identifier,
     fieldset_title,
+    fileinput,
     password,
     radio,
     stdnum,
@@ -251,8 +252,9 @@
     time,
 )
 from onegov.form.utils import label_to_field_id
-from onegov.form.validators import Stdnum
+from onegov.form.validators import Stdnum, ExpectedExtensions
 from wtforms import (
+    FileField,
     PasswordField,
     RadioField,
     StringField,
@@ -276,6 +278,7 @@
 elements.datetime = datetime()
 elements.date = date()
 elements.time = time()
+elements.fileinput = fileinput()
 elements.radio = radio()
 elements.checkbox = checkbox()
 elements.boxes = elements.checkbox | elements.radio
@@ -287,7 +290,8 @@
     elements.stdnum,
     elements.datetime,
     elements.date,
-    elements.time
+    elements.time,
+    elements.fileinput
 ])
 
 
@@ -351,6 +355,11 @@ def construct_checkbox(loader, node):
     return elements.checkbox.parseString(node.value)
 
 
+@constructor('!fileinput')
+def construct_fileinput(loader, node):
+    return elements.fileinput.parseString(node.value)
+
+
 def parse_form(text):
     """ Takes the given form text, parses it and returns a WTForms form
     class (not an instance of it).
@@ -468,6 +477,14 @@ def handle_block(builder, block, dependency=None):
             dependency=dependency,
             required=identifier.required
         )
+    elif field.type == 'fileinput':
+        field_id = builder.add_field(
+            field_class=FileField,
+            label=identifier.label,
+            dependency=dependency,
+            required=identifier.required,
+            validators=[ExpectedExtensions(field.extensions)]
+        )
     elif field.type == 'radio':
         choices = [(c.label, c.label) for c in field.choices]
         checked = [c.label for c in field.choices if c.checked]

diff --git a/onegov/form/parser/grammar.py b/onegov/form/parser/grammar.py
@@ -1,8 +1,9 @@
 # -*- coding: utf-8 -*-
 from onegov.form.compat import unicode_characters
 from pyparsing import (
-    col,
+    alphanums,
     Combine,
+    Group,
     Literal,
     MatchFirst,
     nums,
@@ -209,14 +210,29 @@ def stdnum():
     return parser
 
 
-class Stack(list):
-    length_of_marker_box = 3
+def fileinput():
+    """ Returns a fileinput parser.
 
-    def init(self, string, line, tokens):
-        column = col(line, string) + self.length_of_marker_box
+    For all kindes of files::
+        *.*
 
-        if len(self) == 0 or self[0] < column:
-            self[:] = [column]
+    For specific files:
+        *.pdf|*.doc
+    """
+    any_extension = Suppress('*.*')
+    some_extension = Suppress('*.') + Word(alphanums) + Optional(Suppress('|'))
+
+    def extract_file_types(tokens):
+        tokens['type'] = 'fileinput'
+        if len(tokens[0]) == 0:
+            tokens['extensions'] = ['*']
+        else:
+            tokens['extensions'] = [ext.lower() for ext in tokens[0].asList()]
+
+    parser = Group(any_extension | OneOrMore(some_extension))
+    parser.setParseAction(extract_file_types)
+
+    return parser
 
 
 def marker_box(characters):

diff --git a/onegov/form/tests/test_grammar.py b/onegov/form/tests/test_grammar.py
@@ -5,6 +5,7 @@
     datetime,
     email,
     field_identifier,
+    fileinput,
     password,
     radio,
     stdnum,
@@ -160,3 +161,24 @@ def test_checkbox():
     assert f.type == 'checkbox'
     assert f.label == 'Swiss German'
     assert not f.checked
+
+
+def test_fileinput():
+
+    field = fileinput()
+
+    f = field.parseString("*.*")
+    assert f.type == 'fileinput'
+    assert f.extensions == ['*']
+
+    f = field.parseString("*.pdf")
+    assert f.type == 'fileinput'
+    assert f.extensions == ['pdf']
+
+    f = field.parseString("*.bat")
+    assert f.type == 'fileinput'
+    assert f.extensions == ['bat']
+
+    f = field.parseString("*.png|*.jpg|*.gif")
+    assert f.type == 'fileinput'
+    assert f.extensions == ['png', 'jpg', 'gif']
diff --git a/onegov/form/tests/test_parser.py b/onegov/form/tests/test_parser.py
@@ -6,6 +6,7 @@
 from onegov.form.parser import parse_form
 from textwrap import dedent
 from webob.multidict import MultiDict
+from wtforms import FileField
 from wtforms.fields.html5 import (
     DateField,
     DateTimeLocalField,
@@ -169,6 +170,13 @@ def test_parse_time():
     assert isinstance(form.time, TimeField)
 
 
+def test_parse_fileinput():
+    form = parse_form("File = *.pdf|*.doc")()
+
+    assert form.file.label.text == 'File'
+    assert isinstance(form.file, FileField)
+
+
 def test_parse_radio():
 
     text = dedent("""

diff --git a/onegov/form/validators.py b/onegov/form/validators.py
@@ -24,3 +24,27 @@ def __call__(self, form, field):
             self.format.validate(field.data)
         except StdnumValidationError:
             raise ValidationError(field.gettext(u'Invalid input.'))
+
+
+class ExpectedExtensions(object):
+    """ Makes sure an uploaded file has one of the expected extensions.
+
+    That doesn't necessarily mean the file is really what it claims to be.
+    But that's not the concern of this validator. That is the job of
+    :meth:`onegov.form.core.Form.load_file`.
+
+    Usage::
+
+        ExpectedFileType('*')  # no check, really
+        ExpectedFileType('pdf')  # makes sure the given file is a pdf
+    """
+
+    def __init__(self, extensions):
+        self.extensions = ['.' + ext.lstrip('.') for ext in extensions]
+
+    def __call__(self, form, field):
+        if not field.data:
+            return
+
+        if not field.data.endswith(self.extension):
+            raise ValidationError(field.gettext(u'Invalid input.'))
diff --git a/setup.py b/setup.py
@@ -38,6 +38,7 @@ def get_long_description():
         'onegov.core>=0.3.0',
         'pyparsing',
         'pyyaml',
+        'python-magic',
         'python-stdnum',
         'wtforms',
         'wtforms-components[color]',