From c384ef11f5a03e527a393028b89e0dc5849e31ea Mon Sep 17 00:00:00 2001 From: Fitz Elliott Date: Tue, 27 Mar 2018 15:31:22 -0400 Subject: [PATCH 1/3] tabular: document sheet data structure, log sizes * Add more documentation for the `sheets` data structure. Rename or create some derived variables to make code clearer. * Log the number of columns and rows when one or the other exceeds the maximum allowed number. --- mfr/extensions/tabular/exceptions.py | 9 +++++++-- mfr/extensions/tabular/render.py | 21 +++++++++++++++------ mfr/extensions/tabular/settings.py | 2 +- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/mfr/extensions/tabular/exceptions.py b/mfr/extensions/tabular/exceptions.py index 36b6532fc..28f3d0787 100644 --- a/mfr/extensions/tabular/exceptions.py +++ b/mfr/extensions/tabular/exceptions.py @@ -29,9 +29,14 @@ class TableTooBigError(TabularRendererError): __TYPE = 'tabular_table_too_big' - def __init__(self, message, *args, code: int=400, **kwargs): + def __init__(self, message, *args, code: int=400, nbr_cols: int=0, nbr_rows: int=0, **kwargs): super().__init__(message, *args, code=code, **kwargs) - self.attr_stack.append([self.__TYPE, {}]) + self.nbr_cols = nbr_cols + self.nbr_rows = nbr_rows + self.attr_stack.append([self.__TYPE, { + 'nbr_cols': self.nbr_cols, + 'nbr_rows': self.nbr_rows + }]) class UnexpectedFormattingError(TabularRendererError): diff --git a/mfr/extensions/tabular/render.py b/mfr/extensions/tabular/render.py index d0a806909..d190b5fdf 100644 --- a/mfr/extensions/tabular/render.py +++ b/mfr/extensions/tabular/render.py @@ -46,14 +46,23 @@ def _render_grid(self, fp, ext, *args, **kwargs): # assets_path, ext): size = settings.SMALL_TABLE self._renderer_tabular_metrics['size'] = 'small' self._renderer_tabular_metrics['nbr_sheets'] = len(sheets) - for sheet in sheets: - sheet = sheets[sheet] # Sheets are stored in key-value pairs of the form {sheet: (col, row)} - if len(sheet[0]) > 9: # Check the number of columns + for sheet_title in sheets: + sheet = sheets[sheet_title] + + # sheet is a two-element list. sheet[0] is a list of dicts containing metadata about + # the column headers. Each dict contains four keys: `field`, `name`, `sortable`, `id`. + # sheet[1] is a list of dicts where each dict contains the row data. The keys are the + # fields the data belongs to and the values are the data values. + + nbr_cols = len(sheet[0]) + if nbr_cols > 9: size = settings.BIG_TABLE self._renderer_tabular_metrics['size'] = 'big' - if len(sheet[0]) > settings.MAX_SIZE or len(sheet[1]) > settings.MAX_SIZE: - raise exceptions.TableTooBigError('Table is too large to render.', extension=ext) + nbr_rows = len(sheet[1]) + if nbr_cols > settings.MAX_SIZE or nbr_rows > settings.MAX_SIZE: + raise exceptions.TableTooBigError('Table is too large to render.', extension=ext, + nbr_cols=nbr_cols, nbr_rows=nbr_rows) return sheets, size @@ -61,7 +70,7 @@ def _populate_data(self, fp, ext): """Determine the appropriate library and use it to populate rows and columns :param fp: file pointer :param ext: file extension - :return: tuple of column headers and row data + :return: a dict mapping sheet titles to tuples of column headers and row data """ function_preference = settings.LIBS.get(ext.lower()) diff --git a/mfr/extensions/tabular/settings.py b/mfr/extensions/tabular/settings.py index 258bafc03..efee6a3d8 100644 --- a/mfr/extensions/tabular/settings.py +++ b/mfr/extensions/tabular/settings.py @@ -4,7 +4,7 @@ config = settings.child('TABULAR_EXTENSION_CONFIG') -MAX_SIZE = int(config.get('MAX_SIZE', 10000)) +MAX_SIZE = int(config.get('MAX_SIZE', 10000)) # max number of rows or columns allowed. TABLE_WIDTH = int(config.get('TABLE_WIDTH', 700)) TABLE_HEIGHT = int(config.get('TABLE_HEIGHT', 600)) From 6e587e640cb7eeb3b7d708c2da32c6e752fd2a91 Mon Sep 17 00:00:00 2001 From: Fitz Elliott Date: Tue, 27 Mar 2018 15:36:02 -0400 Subject: [PATCH 2/3] tabular: refuse to render files larger than 10Mb * Tabular data has to be slurped into memory to be displayed, meaning large files can cause resource shortages or oom errors. Throw an informative error if the user tries to render a file larger than `MAX_FILE_SIZE` bytes. * Fix order of imports, removed old commented-out code. --- mfr/extensions/tabular/exceptions.py | 17 +++++++++++++++++ mfr/extensions/tabular/render.py | 20 +++++++++++++++++--- mfr/extensions/tabular/settings.py | 1 + 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/mfr/extensions/tabular/exceptions.py b/mfr/extensions/tabular/exceptions.py index 28f3d0787..59f461eb6 100644 --- a/mfr/extensions/tabular/exceptions.py +++ b/mfr/extensions/tabular/exceptions.py @@ -47,3 +47,20 @@ def __init__(self, message, *args, code: int=500, formatting_function: str='', * super().__init__(message, *args, code=code, **kwargs) self.formatting_function = formatting_function self.attr_stack.append([self.__TYPE, {'formatting_function': self.formatting_function}]) + + +class FileTooLargeError(TabularRendererError): + + __TYPE = 'tabular_file_too_large' + + def __init__(self, message, *args, code: int=400, file_size: int=None, max_size: int=None, + **kwargs): + super().__init__(message, *args, code=code, **kwargs) + + self.file_size = file_size + self.max_size = max_size + + self.attr_stack.append([self.__TYPE, { + 'file_size': self.file_size, + 'max_size': self.max_size + }]) diff --git a/mfr/extensions/tabular/render.py b/mfr/extensions/tabular/render.py index d190b5fdf..b9d3cb1a6 100644 --- a/mfr/extensions/tabular/render.py +++ b/mfr/extensions/tabular/render.py @@ -1,12 +1,16 @@ -import os import json +import logging +import os +from humanfriendly import format_size from mako.lookup import TemplateLookup -from mfr.core import extension +from mfr.core import extension from mfr.extensions.tabular import settings from mfr.extensions.tabular import exceptions +logger = logging.getLogger(__name__) + class TabularRenderer(extension.BaseRenderer): @@ -16,6 +20,16 @@ class TabularRenderer(extension.BaseRenderer): ]).get_template('viewer.mako') def render(self): + file_size = os.path.getsize(self.file_path) + if file_size > settings.MAX_FILE_SIZE: + raise exceptions.FileTooLargeError( + 'Tabular files larger than {} are not rendered. Please download ' + 'the file to view.'.format(format_size(settings.MAX_FILE_SIZE, binary=True)), + file_size=file_size, + max_size=settings.MAX_FILE_SIZE, + extension=self.metadata.ext, + ) + with open(self.file_path, errors='replace') as fp: sheets, size = self._render_grid(fp, self.metadata.ext) return self.TEMPLATE.render( @@ -34,7 +48,7 @@ def file_required(self): def cache_result(self): return True - def _render_grid(self, fp, ext, *args, **kwargs): # assets_path, ext): + def _render_grid(self, fp, ext, *args, **kwargs): """Render a tabular file to html :param fp: file pointer object :return: RenderResult object containing html and assets diff --git a/mfr/extensions/tabular/settings.py b/mfr/extensions/tabular/settings.py index efee6a3d8..c085bab26 100644 --- a/mfr/extensions/tabular/settings.py +++ b/mfr/extensions/tabular/settings.py @@ -4,6 +4,7 @@ config = settings.child('TABULAR_EXTENSION_CONFIG') +MAX_FILE_SIZE = int(config.get('MAX_FILE_SIZE', 10 * 1024 * 1024)) # 10Mb MAX_SIZE = int(config.get('MAX_SIZE', 10000)) # max number of rows or columns allowed. TABLE_WIDTH = int(config.get('TABLE_WIDTH', 700)) TABLE_HEIGHT = int(config.get('TABLE_HEIGHT', 600)) From b540713fa5738cbf1f84e9f434b676f5612c1c99 Mon Sep 17 00:00:00 2001 From: Fitz Elliott Date: Tue, 27 Mar 2018 16:05:10 -0400 Subject: [PATCH 3/3] tabular: log col and row count for too big xlsx files --- mfr/extensions/tabular/libs/xlrd_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mfr/extensions/tabular/libs/xlrd_tools.py b/mfr/extensions/tabular/libs/xlrd_tools.py index fe4b19742..7afbd145d 100644 --- a/mfr/extensions/tabular/libs/xlrd_tools.py +++ b/mfr/extensions/tabular/libs/xlrd_tools.py @@ -19,7 +19,8 @@ def xlsx_xlrd(fp): for sheet in wb.sheets(): if sheet.ncols > max_size or sheet.nrows > max_size: - raise TableTooBigError('Table is too large to render.', '.xlsx') + raise TableTooBigError('Table is too large to render.', '.xlsx', + nbr_cols=sheet.ncols, nbr_rows=sheet.nrows) if sheet.ncols < 1 or sheet.nrows < 1: sheets[sheet.name] = ([], [])