Skip to content

Commit

Permalink
Merge branch 'feature/hypothesis-ids-for-pdfs' into develop
Browse files Browse the repository at this point in the history
 * Start exporting pdfs as pdfs so that MFR can add stable annotation
   ids to them.

 [SVCS-846]
  • Loading branch information
felliott committed Jun 28, 2018
2 parents 997e5a3 + e8e55d5 commit 5cca53e
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 29 deletions.
12 changes: 12 additions & 0 deletions mfr/extensions/pdf/export.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import os
import imghdr
import logging
from http import HTTPStatus

from PIL import Image, TiffImagePlugin
from pdfrw import PdfReader, PdfWriter
from reportlab.pdfgen import canvas

from mfr.core import extension
from mfr.extensions.pdf import exceptions
from mfr.extensions.pdf.settings import EXPORT_MAX_PAGES

logger = logging.getLogger(__name__)


class PdfExporter(extension.BaseExporter):

Expand Down Expand Up @@ -63,6 +67,14 @@ def tiff_to_pdf(self, tiff_img, max_size):
c.save()

def export(self):
if self.ext.lower() == '.pdf':
pdf = PdfReader(self.source_file_path)
pdf.ID[0] = self.metadata.stable_id
pdf.ID[1] = self.metadata.unique_key
PdfWriter(self.output_file_path, trailer=pdf).write()
return

logger.debug('pdf-export: format::{}'.format(self.format))
parts = self.format.split('.')
export_type = parts[-1].lower()
max_size = [int(x) for x in parts[0].split('x')] if len(parts) == 2 else None
Expand Down
24 changes: 8 additions & 16 deletions mfr/extensions/pdf/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,17 @@ def render(self):

logger.debug('Extension found in supported list!')
exported_url = furl.furl(self.export_url)
if settings.EXPORT_TYPE:
if settings.EXPORT_MAXIMUM_SIZE:
exported_url.args['format'] = '{}.{}'.format(settings.EXPORT_MAXIMUM_SIZE,
settings.EXPORT_TYPE)
else:
exported_url.args['format'] = settings.EXPORT_TYPE

self.metrics.add('needs_export', True)
return self.TEMPLATE.render(
base=self.assets_url,
url=escape_url_for_template(exported_url.url),
enable_hypothesis=settings.ENABLE_HYPOTHESIS
)
if self.metadata.ext.lower() in settings.EXPORT_NEEDS_SCALING:
exported_url.args['format'] = '{}.{}'.format(settings.EXPORT_MAXIMUM_SIZE,
settings.EXPORT_TYPE)
else:
exported_url.args['format'] = 'export.{}'.format(settings.EXPORT_TYPE)

# TODO: is this dead code? ``settings.EXPORT_TYPE`` is never None or empty
self.metrics.add('needs_export', True)
return self.TEMPLATE.render(
base=self.assets_url,
url=escape_url_for_template(download_url.geturl()),
enable_hypothesis=settings.ENABLE_HYPOTHESIS,
url=escape_url_for_template(exported_url.url),
enable_hypothesis=settings.ENABLE_HYPOTHESIS
)

@property
Expand Down
11 changes: 9 additions & 2 deletions mfr/extensions/pdf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,17 @@
config = settings.child('PDF_EXTENSION_CONFIG')

EXPORT_TYPE = config.get('EXPORT_TYPE', 'pdf')
EXPORT_MAXIMUM_SIZE = config.get('EXPORT_MAXIMUM_SIZE', '1200x1200')
assert EXPORT_TYPE # mandatory config

ENABLE_HYPOTHESIS = config.get_bool('ENABLE_HYPOTHESIS', False)

# supports multiple files in the form of a space separated string
EXPORT_SUPPORTED = config.get('EXPORT_SUPPORTED', '.tiff .tif').split(' ')
EXPORT_SUPPORTED = config.get('EXPORT_SUPPORTED', '.pdf .tiff .tif').split(' ')
EXPORT_NEEDS_SCALING = config.get('EXPORT_NEEDS_SCALING', '.tiff .tif').split(' ')
EXPORT_MAX_PAGES = int(config.get('EXPORT_MAX_PAGES', 40))
EXPORT_MAXIMUM_SIZE = config.get('EXPORT_MAXIMUM_SIZE', '1200x1200')

# scaling requires page and size limits
if EXPORT_NEEDS_SCALING:
assert EXPORT_MAX_PAGES
assert EXPORT_MAXIMUM_SIZE
2 changes: 2 additions & 0 deletions mfr/providers/osf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ async def metadata(self):
differently.
"""
download_url = await self._fetch_download_url()
logger.debug('download_url::{}'.format(download_url))
if '/file?' in download_url:
# URL is for WaterButler v0 API
# TODO Remove this when API v0 is officially deprecated
Expand Down Expand Up @@ -177,6 +178,7 @@ async def _fetch_download_url(self):
)
await request.release()

logger.debug('osf-download-resolver: request.status::{}'.format(request.status))
if request.status != 302:
raise exceptions.MetadataError(
request.reason,
Expand Down
5 changes: 5 additions & 0 deletions mfr/server/handlers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import abc
import uuid
import asyncio
import logging
import pkg_resources

import tornado.web
Expand Down Expand Up @@ -31,6 +32,8 @@
'Content-Encoding',
]

logger = logging.getLogger(__name__)


class CorsMixin:

Expand Down Expand Up @@ -110,6 +113,7 @@ async def prepare(self):
provider=settings.PROVIDER_NAME,
code=400,
)
logging.debug('target_url::{}'.format(self.url))

self.provider = utils.make_provider(
settings.PROVIDER_NAME,
Expand All @@ -120,6 +124,7 @@ async def prepare(self):

self.metadata = await self.provider.metadata()
self.extension_metrics.add('ext', self.metadata.ext)
logging.debug('extension::{}'.format(self.metadata.ext))

self.cache_provider = waterbutler.core.utils.make_provider(
settings.CACHE_PROVIDER_NAME,
Expand Down
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,16 @@ def parse_requirements(requirements):
'.bmp = mfr.extensions.image:ImageExporter',
'.gif = mfr.extensions.image:ImageExporter',
'.psd = mfr.extensions.image:ImageExporter',
'.tif = mfr.extensions.pdf:PdfExporter',
'.tiff = mfr.extensions.pdf:PdfExporter',

# jsc3d
'.stp = mfr.extensions.jsc3d:JSC3DExporter',
'.step = mfr.extensions.jsc3d:JSC3DExporter',

# pdf
'.pdf = mfr.extensions.pdf:PdfExporter',
'.tif = mfr.extensions.pdf:PdfExporter',
'.tiff = mfr.extensions.pdf:PdfExporter',

# unoconv
# '.bib = mfr.extensions.unoconv:UnoconvExporter',
# '.bmp = mfr.extensions.unoconv:UnoconvExporter',
Expand Down
22 changes: 13 additions & 9 deletions tests/extensions/pdf/test_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def assets_url():

@pytest.fixture
def export_url():
return 'http://mfr.osf.io/export?url=http://osf.io/file/test.pdf'
return 'http://mfr.osf.io/export?url=http://osf.io/file/test.pdf&format=export.pdf'


@pytest.fixture
Expand All @@ -60,18 +60,22 @@ def tif_renderer(tif_metadata, tif_file_path, tif_url, assets_url, export_url):

class TestPdfRenderer:

def test_render_pdf(self, renderer, metadata, assets_url):
def test_render_pdf(self, renderer, assets_url, export_url):
body = renderer.render()
assert '<base href="{}/{}/web/" target="_blank">'.format(assets_url, 'pdf') in body
assert '<div id="viewer" class="pdfViewer"></div>' in body
assert 'DEFAULT_URL = \'{}\''.format(metadata.download_url) in body
assert 'DEFAULT_URL = \'{}\''.format(export_url) in body

def test_render_pdf_with_single_quote_in_name(self, assets_url):

download_url = 'http://wb.osf.io/file/te\'st.pdf?token=1234'
safe_download_url = 'http://wb.osf.io/file/te%27st.pdf?token=1234'
bad_download_url = 'http://osf.io/file/te\'st.pdf'
safe_download_url = 'http://osf.io/file/te%27st.pdf'

metadata = ProviderMetadata('te\'st', '.pdf', 'text/plain', '1234', download_url)
base_export_url = 'http://mfr.osf.io/export?url={}&format=export.pdf'
bad_export_url = base_export_url.format(bad_download_url)
safe_export_url = base_export_url.format(safe_download_url)

metadata = ProviderMetadata('te\'st', '.pdf', 'text/plain', '1234', bad_download_url)
renderer = PdfRenderer(metadata, '/tmp/te\'st.pdf', 'http://osf.io/file/te\'st.pdf',
assets_url,
'http://mfr.osf.io/export?url=http://osf.io/file/te\'st.pdf')
Expand All @@ -80,13 +84,13 @@ def test_render_pdf_with_single_quote_in_name(self, assets_url):

assert '<base href="{}/{}/web/" target="_blank">'.format(assets_url, 'pdf') in body
assert '<div id="viewer" class="pdfViewer"></div>' in body
assert 'DEFAULT_URL = \'{}\''.format(download_url) not in body
assert 'DEFAULT_URL = \'{}\''.format(safe_download_url) in body
assert 'DEFAULT_URL = \'{}\''.format(bad_export_url) not in body
assert 'DEFAULT_URL = \'{}\''.format(safe_export_url) in body

def test_render_tif(self, tif_renderer, assets_url):
exported_url = furl.furl(tif_renderer.export_url)
exported_url.args['format'] = '{}.{}'.format(settings.EXPORT_MAXIMUM_SIZE,
settings.EXPORT_TYPE)
settings.EXPORT_TYPE)

body = tif_renderer.render()
assert '<base href="{}/{}/web/" target="_blank">'.format(assets_url, 'pdf') in body
Expand Down

0 comments on commit 5cca53e

Please sign in to comment.