Skip to content

Commit

Permalink
Merge branch 'feature/pdf-stable-ids' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
felliott committed May 22, 2018
2 parents 46df6fb + a0cf879 commit 784eb69
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
4 changes: 3 additions & 1 deletion mfr/core/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ def download(self):

class ProviderMetadata:

def __init__(self, name, ext, content_type, unique_key, download_url):
def __init__(self, name, ext, content_type, unique_key, download_url, stable_id=None):
self.name = name
self.ext = ext
self.content_type = content_type
self.unique_key = unique_key
self.download_url = download_url
self.stable_id = stable_id

def serialize(self):
return {
Expand All @@ -62,4 +63,5 @@ def serialize(self):
'content_type': self.content_type,
'unique_key': str(self.unique_key),
'download_url': str(self.download_url),
'stable_id': None if self.stable_id is None else str(self.stable_id),
}
2 changes: 1 addition & 1 deletion mfr/extensions/unoconv/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ def export(self):
)

pdf = PdfReader(self.output_file_path)
pdf.ID[0] = self.metadata.unique_key
pdf.ID[0] = self.metadata.stable_id
pdf.ID[1] = self.metadata.unique_key
PdfWriter(self.output_file_path, trailer=pdf).write()
7 changes: 5 additions & 2 deletions mfr/providers/osf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,11 @@ async def metadata(self):
for unneeded in OsfProvider.UNNEEDED_URL_PARAMS:
cleaned_url.args.pop(unneeded, None)
self.metrics.add('metadata.clean_url_args', str(cleaned_url))
unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest()
return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url)
meta = metadata['data']
unique_key = hashlib.sha256((meta['etag'] + cleaned_url.url).encode('utf-8')).hexdigest()
stable_id = hashlib.sha256('/{}/{}/{}'.format(meta['resource'], meta['provider'], meta['path'])
.encode('utf-8')).hexdigest()
return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url, stable_id)

async def download(self):
"""Download file from WaterButler, returning stream."""
Expand Down

0 comments on commit 784eb69

Please sign in to comment.