Skip to content

Commit

Permalink
Merge pull request #112 from MichaelAquilina/use_gzip_s3
Browse files Browse the repository at this point in the history
Use gzip compression by default for s3 indexes
  • Loading branch information
MichaelAquilina committed Apr 13, 2018
2 parents 53c224c + 874ddc9 commit cd78333
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 15 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ python:
- "3.4"
- "3.5"
- "3.6"

install: pip install tox-travis codecov flake8
script:
- tox
Expand Down
2 changes: 1 addition & 1 deletion s4/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = '0.2.11'
VERSION = '0.2.12'
19 changes: 16 additions & 3 deletions s4/clients/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import collections
import copy
import fnmatch
import gzip
import json
import logging
import os
Expand Down Expand Up @@ -136,13 +137,25 @@ def load_index(self):
)
body = resp['Body'].read()
content_type = magic.from_buffer(body, mime=True)

if content_type == 'text/plain':
logger.debug('Detected plain text encoding for index')
return json.loads(body.decode('utf-8'))
elif content_type == 'application/zlib':

# the magic/file command reports gzip differently depending on its version
elif content_type in ('application/x-gzip', 'application/gzip'):
logger.debug('Detected gzip encoding for index')
body = gzip.decompress(body)
return json.loads(body.decode('utf-8'))

# Older versions of Ubuntu cannot detect zlib files
# assume octet-stream is zlib.
# If it isnt, the decompress function will blow up anyway
elif content_type in ('application/zlib', 'application/octet-stream'):
logger.debug('Detected zlib encoding for index')
body = zlib.decompress(body)
return json.loads(body.decode('utf-8'))

elif content_type == 'application/x-empty':
return {}
else:
Expand All @@ -156,8 +169,8 @@ def reload_index(self):
def flush_index(self, compressed=True):
data = json.dumps(self.index).encode('utf-8')
if compressed:
logger.debug('Using zlib encoding for writing index')
data = zlib.compress(data)
logger.debug('Using gzip encoding for writing index')
data = gzip.compress(data)
else:
logger.debug('Using plain text encoding for writing index')

Expand Down
24 changes: 15 additions & 9 deletions tests/clients/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from s4.clients import SyncObject, s3
from s4.utils import to_timestamp

from tests import utils


Expand Down Expand Up @@ -211,17 +212,22 @@ def test_get_index_keys(self, s3_client):
})
assert sorted(s3_client.get_index_keys()) == sorted(['cow', 'chicken'])

def test_get_index_timestamps(self, s3_client):
@pytest.mark.parametrize('compression', [None, 'gzip', 'zlib'])
def test_get_index_timestamps(self, s3_client, compression):
# given
utils.set_s3_index(s3_client, {
'hello': {
'remote_timestamp': 1234,
'local_timestamp': 1200,
utils.set_s3_index(
s3_client,
{
'hello': {
'remote_timestamp': 1234,
'local_timestamp': 1200,
},
'world': {
'remote_timestamp': 5000,
},
},
'world': {
'remote_timestamp': 5000,
}
})
compression=compression,
)

# then
assert s3_client.get_remote_timestamp('hello') == 1234
Expand Down
13 changes: 11 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
import datetime
import gzip
import json
import logging
import os
import zlib

import freezegun

Expand Down Expand Up @@ -95,11 +97,18 @@ def set_s3_contents(s3_client, key, timestamp=None, data=''):
write_s3(s3_client.boto, s3_client.bucket, os.path.join(s3_client.prefix, key), data)


def set_s3_index(s3_client, data):
def set_s3_index(s3_client, data, compression=None):
body = json.dumps(data).encode('utf8')

if compression == 'gzip':
body = gzip.compress(body)
elif compression == 'zlib':
body = zlib.compress(body)

s3_client.boto.put_object(
Bucket=s3_client.bucket,
Key=os.path.join(s3_client.prefix, '.index'),
Body=json.dumps(data),
Body=body,
)
s3_client.reload_index()

Expand Down

0 comments on commit cd78333

Please sign in to comment.