Skip to content
This repository has been archived by the owner on Feb 19, 2021. It is now read-only.

Commit

Permalink
Merge pull request #110 from mikegrima/schema
Browse files Browse the repository at this point in the history
Added a marshmallow durable schema object
  • Loading branch information
mikegrima committed Sep 27, 2018
2 parents 5a7b38d + 5f6649e commit 6041ecb
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 2 deletions.
2 changes: 1 addition & 1 deletion historical/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
__summary__ = ("Historical tracking of AWS configuration data.")
__uri__ = "https://github.com/Netflix-Skunkworks/historical"

__version__ = "0.4.2"
__version__ = "0.4.3"

__author__ = "The Historical developers"
__email__ = "security@netflix.com"
Expand Down
2 changes: 1 addition & 1 deletion historical/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ def extract_log_level_from_environment(k, default):
REGION_ATTR = os.environ.get('REGION_ATTR', 'Region')
SIMPLE_DURABLE_PROXY = os.environ.get('SIMPLE_DURABLE_PROXY', False)
LOGGING_LEVEL = extract_log_level_from_environment('LOGGING_LEVEL', logging.INFO)
EVENT_TOO_BIG_FLAG = "event_too_big"
EVENT_TOO_BIG_FLAG = 'event_too_big'
30 changes: 30 additions & 0 deletions historical/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,33 @@ def serialize_me(self, account_id, region, next_token=None):
payload['next_token'] = next_token

return self.dumps(payload).data


class SimpleDurableSchema(Schema):
arn = fields.Str(required=True)
event_time = fields.Str(required=True, default=default_event_time)
tech = fields.Str(required=True)
event_too_big = fields.Boolean(required=False)
item = fields.Dict(required=False)

def serialize_me(self, arn, event_time, tech, item=None):
"""Dumps the proper JSON for the schema. If the event is too big, then don't include the item.
:param arn:
:param event_time:
:param tech:
:param item:
:return:
"""
payload = {
'arn': arn,
'event_time': event_time,
'tech': tech
}

if item:
payload['item'] = item

else:
payload['event_too_big'] = True

return self.dumps(payload).data.replace('<empty>', '')
64 changes: 64 additions & 0 deletions historical/tests/test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,70 @@ def test_make_proper_simple_record():
historical.common.proxy.HISTORICAL_TECHNOLOGY = old_tech


def test_simple_schema():
import historical.common.proxy

old_tech = historical.common.proxy.HISTORICAL_TECHNOLOGY
historical.common.proxy.HISTORICAL_TECHNOLOGY = 's3'

from historical.common.proxy import make_proper_simple_record
from historical.models import SimpleDurableSchema

# Small object
new_bucket = S3_BUCKET.copy()
new_bucket['eventTime'] = datetime(year=2017, month=5, day=12, hour=10, minute=30, second=0).isoformat() + 'Z'
del new_bucket['eventSource']
ddb_record = DynamoDBRecordFactory(
dynamodb=DynamoDBDataFactory(
NewImage=new_bucket,
Keys={
'arn': new_bucket['arn']
},
OldImage=new_bucket),
eventName='INSERT')
new_item = DynamoDBRecordsFactory(records=[ddb_record])
data = json.loads(json.dumps(new_item, default=serialize))['Records'][0]

test_blob = make_proper_simple_record(data)

# Test loading from the schema:
sds = SimpleDurableSchema(strict=True)

result = sds.loads(test_blob).data
test_blob = json.loads(test_blob)

assert json.dumps(result, sort_keys=True) == json.dumps(test_blob, sort_keys=True)
assert json.dumps(json.loads(sds.dumps(result).data), sort_keys=True) == json.dumps(test_blob, sort_keys=True)
serialized = sds.serialize_me(test_blob['arn'], test_blob['event_time'], test_blob['tech'], item=test_blob['item'])
assert json.dumps(json.loads(serialized), sort_keys=True) == json.dumps(result, sort_keys=True)

# Big object:
new_bucket['configuration'] = new_bucket['configuration'].copy()
new_bucket['configuration']['VeryLargeConfigItem'] = 'a' * 262144
ddb_record = DynamoDBRecordFactory(
dynamodb=DynamoDBDataFactory(
NewImage=new_bucket,
Keys={
'arn': new_bucket['arn']
},
OldImage=new_bucket),
eventName='INSERT')
new_item = DynamoDBRecordsFactory(records=[ddb_record])
data = json.loads(json.dumps(new_item, default=serialize))['Records'][0]
assert math.ceil(sys.getsizeof(json.dumps(data)) / 1024) >= 200

test_blob = make_proper_simple_record(data)
result = sds.loads(test_blob).data
test_blob = json.loads(test_blob)
assert json.dumps(result, sort_keys=True) == json.dumps(test_blob, sort_keys=True)
assert json.dumps(json.loads(sds.dumps(result).data), sort_keys=True) == json.dumps(test_blob, sort_keys=True)
serialized = sds.serialize_me(test_blob['arn'], test_blob['event_time'], test_blob['tech'])
assert json.dumps(json.loads(serialized), sort_keys=True) == json.dumps(result, sort_keys=True)

# Unmock:
historical.common.proxy.HISTORICAL_TECHNOLOGY = old_tech


def test_proxy_dynamodb_differ(historical_role, current_s3_table, durable_s3_table, mock_lambda_environment,
buckets):
"""This mostly checks that the differ is able to properly load the reduced dataset from the Proxy."""
Expand Down

0 comments on commit 6041ecb

Please sign in to comment.